fernme 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fernme/__init__.py +2 -0
- fernme/api/__init__.py +0 -0
- fernme/api/mcp_server.py +80 -0
- fernme/api/rest.py +125 -0
- fernme/audit.py +28 -0
- fernme/auth.py +64 -0
- fernme/capture/__init__.py +69 -0
- fernme/capture/agent_byproduct.py +43 -0
- fernme/capture/base.py +51 -0
- fernme/capture/config.py +81 -0
- fernme/capture/install.py +93 -0
- fernme/capture/local_tagger.py +92 -0
- fernme/capture/pipeline.py +44 -0
- fernme/capture/signal_hooks.py +120 -0
- fernme/categories.py +48 -0
- fernme/confidence.py +34 -0
- fernme/config.py +56 -0
- fernme/core/__init__.py +1 -0
- fernme/core/graph.py +95 -0
- fernme/curation.py +166 -0
- fernme/dp.py +64 -0
- fernme/eval/__init__.py +1 -0
- fernme/eval/ablation.py +76 -0
- fernme/eval/baselines.py +48 -0
- fernme/eval/context.py +60 -0
- fernme/eval/cost_variance.py +28 -0
- fernme/eval/drift.py +83 -0
- fernme/eval/experiment.py +111 -0
- fernme/eval/pareto.py +148 -0
- fernme/eval/pilot.py +120 -0
- fernme/eval/plot.py +25 -0
- fernme/eval/quality.py +65 -0
- fernme/eval/simulator.py +67 -0
- fernme/glossary.py +84 -0
- fernme/prior/__init__.py +1 -0
- fernme/prior/population.py +63 -0
- fernme/retrieve/__init__.py +2 -0
- fernme/retrieve/activation.py +70 -0
- fernme/retrieve/card.py +53 -0
- fernme/safety.py +44 -0
- fernme/service.py +529 -0
- fernme/store/__init__.py +1 -0
- fernme/store/json_store.py +56 -0
- fernme/store/postgres_store.py +186 -0
- fernme/store/sqlite_store.py +257 -0
- fernme/style.py +63 -0
- fernme/supernode.py +86 -0
- fernme/tagging.py +46 -0
- fernme/triggers.py +49 -0
- fernme/tuning.py +44 -0
- fernme/vocabulary.py +60 -0
- fernme/write/__init__.py +2 -0
- fernme/write/hebbian.py +69 -0
- fernme/write/mapping.py +41 -0
- fernme-0.3.0.dist-info/METADATA +347 -0
- fernme-0.3.0.dist-info/RECORD +60 -0
- fernme-0.3.0.dist-info/WHEEL +5 -0
- fernme-0.3.0.dist-info/licenses/LICENSE +158 -0
- fernme-0.3.0.dist-info/licenses/NOTICE +6 -0
- fernme-0.3.0.dist-info/top_level.txt +1 -0
fernme/__init__.py
ADDED
fernme/api/__init__.py
ADDED
|
File without changes
|
fernme/api/mcp_server.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""MCP server exposing FERN as agent tools, so any MCP-capable agent (incl.
|
|
2
|
+
Claude) can give a user persistent, glass-box memory. Run: python -m fern.api.mcp_server
|
|
3
|
+
Requires: pip install mcp"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
import os
|
|
6
|
+
from ..service import FernService
|
|
7
|
+
|
|
8
|
+
svc = FernService() # default: $FERNME_DB or ~/.fernme/fernme.db
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from mcp.server.fastmcp import FastMCP
|
|
12
|
+
except Exception as e: # pragma: no cover
|
|
13
|
+
FastMCP = None
|
|
14
|
+
|
|
15
|
+
if FastMCP is not None:
|
|
16
|
+
mcp = FastMCP("fernme")
|
|
17
|
+
|
|
18
|
+
@mcp.tool()
|
|
19
|
+
def remember(site: str, user: str, type: str = "note", tags: list[str] = [],
|
|
20
|
+
text: str = "", source: str = "stated", glosses: dict = {},
|
|
21
|
+
ts: float = 0.0) -> dict:
|
|
22
|
+
"""Record an interaction/preference for a user on a site (consent required).
|
|
23
|
+
|
|
24
|
+
tags: namespaced 'ns:value' tokens, e.g. 'pref:concise', 'topic:python',
|
|
25
|
+
'!likes:dairy' (leading '!' = a dislike). Prefer SPECIFIC tags.
|
|
26
|
+
text: the sentence this came from. Stored as free context (no token
|
|
27
|
+
cost) so a bare tag isn't ambiguous later.
|
|
28
|
+
source: 'stated' (the user said it) or 'inferred' (you guessed it).
|
|
29
|
+
Inferred never silently overrides stated; conflicts return a
|
|
30
|
+
'questions' list to ask the user.
|
|
31
|
+
glosses: optional {tag: one-line meaning}. Emit these as a byproduct of
|
|
32
|
+
your reply (a few tokens, no separate call). Missing ones fall
|
|
33
|
+
back to a deterministic namespace template (0 tokens).
|
|
34
|
+
Returns stored attrs, plus 'questions'/'superseded' when curation is on."""
|
|
35
|
+
payload = {"tags": tags, "source": source}
|
|
36
|
+
if text:
|
|
37
|
+
payload["text"] = text
|
|
38
|
+
if glosses:
|
|
39
|
+
payload["glosses"] = glosses
|
|
40
|
+
return svc.observe(site, user, type, payload, ts)
|
|
41
|
+
|
|
42
|
+
@mcp.tool()
|
|
43
|
+
def recall_glossary(site: str, user: str) -> dict:
|
|
44
|
+
"""What each remembered tag MEANS: {tag: {gloss, context}}. Context is the
|
|
45
|
+
sentence it came from; gloss is the supplied or templated one-liner."""
|
|
46
|
+
return svc.glossary(site, user)
|
|
47
|
+
|
|
48
|
+
@mcp.tool()
|
|
49
|
+
def grant_consent(site: str, user: str, granted: bool = True) -> dict:
|
|
50
|
+
"""Grant or withdraw a user's consent to be remembered on a site."""
|
|
51
|
+
return svc.consent(site, user, granted)
|
|
52
|
+
|
|
53
|
+
@mcp.tool()
|
|
54
|
+
def recall_card(site: str, user: str, context: list[str] = [], now: float = 0.0) -> dict:
|
|
55
|
+
"""Get the token-minimal memory card for a user (what to inject into the prompt)."""
|
|
56
|
+
return svc.card(site, user, context, now)
|
|
57
|
+
|
|
58
|
+
@mcp.tool()
|
|
59
|
+
def recall_events(site: str, user: str, contains: str = "", limit: int = 20) -> list:
|
|
60
|
+
"""Open the Cabinet: search a user's raw interaction history for specifics."""
|
|
61
|
+
return svc.recall(site, user, contains=contains or None, limit=limit)
|
|
62
|
+
|
|
63
|
+
@mcp.tool()
|
|
64
|
+
def edit_memory(site: str, user: str, attr: str, weight: float) -> dict:
|
|
65
|
+
"""Glass-box override of a single preference (locked, never decays)."""
|
|
66
|
+
return svc.edit(site, user, attr, weight)
|
|
67
|
+
|
|
68
|
+
@mcp.tool()
|
|
69
|
+
def forget_me(site: str, user: str) -> dict:
|
|
70
|
+
"""Delete everything stored about a user on a site (right to be forgotten)."""
|
|
71
|
+
return svc.delete(site, user)
|
|
72
|
+
|
|
73
|
+
def main():
|
|
74
|
+
mcp.run()
|
|
75
|
+
else:
|
|
76
|
+
def main():
|
|
77
|
+
raise SystemExit("Install the 'mcp' package: pip install mcp")
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
main()
|
fernme/api/rest.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""FastAPI REST interface for FERN v1. Run:
|
|
2
|
+
uvicorn fern.api.rest:app --port 8077
|
|
3
|
+
Every endpoint is consent-gated by the service layer."""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
import os
|
|
6
|
+
from typing import List, Optional, Any, Dict
|
|
7
|
+
from fastapi import FastAPI, HTTPException
|
|
8
|
+
from fastapi.responses import FileResponse, JSONResponse
|
|
9
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
from ..service import FernService, ConsentError
|
|
12
|
+
|
|
13
|
+
svc = FernService() # default: $FERNME_DB or ~/.fernme/fernme.db
|
|
14
|
+
app = FastAPI(title="FERN Memory API", version="1.0")
|
|
15
|
+
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
|
16
|
+
_UI = os.path.join(os.path.dirname(__file__), "..", "web", "glassbox.html")
|
|
17
|
+
_GRAPH_UI = os.path.join(os.path.dirname(__file__), "..", "web", "graph.html")
|
|
18
|
+
_API_KEY = os.environ.get("FERNME_API_KEY") # if set, all data routes require X-API-Key
|
|
19
|
+
_OPEN = {"/health", "/ui", "/docs", "/redoc", "/openapi.json", "/docs/oauth2-redirect"}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@app.middleware("http")
|
|
23
|
+
async def _auth(request, call_next):
|
|
24
|
+
if _API_KEY and request.url.path not in _OPEN:
|
|
25
|
+
if request.headers.get("x-api-key") != _API_KEY:
|
|
26
|
+
return JSONResponse({"detail": "invalid or missing X-API-Key"}, status_code=401)
|
|
27
|
+
return await call_next(request)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@app.get("/ui")
|
|
31
|
+
def ui():
|
|
32
|
+
return FileResponse(_UI)
|
|
33
|
+
|
|
34
|
+
class GraphIn(BaseModel):
|
|
35
|
+
site: str; user: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
class WhyIn(BaseModel):
|
|
38
|
+
site: str; user: str; attr: str
|
|
39
|
+
|
|
40
|
+
class MemoryGraphIn(BaseModel):
|
|
41
|
+
person: str
|
|
42
|
+
|
|
43
|
+
@app.get("/graph")
|
|
44
|
+
def graph_ui():
|
|
45
|
+
return FileResponse(_GRAPH_UI)
|
|
46
|
+
|
|
47
|
+
@app.post("/graph-data")
|
|
48
|
+
def graph_data(b: GraphIn):
|
|
49
|
+
return _guard(svc.graph, b.site, b.user)
|
|
50
|
+
|
|
51
|
+
@app.post("/memory-graph")
|
|
52
|
+
def memory_graph(b: MemoryGraphIn):
|
|
53
|
+
return _guard(svc.memory_graph, b.person)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ConsentIn(BaseModel):
|
|
57
|
+
site: str; user: str; granted: bool = True; ts: float = 0.0
|
|
58
|
+
|
|
59
|
+
class ObserveIn(BaseModel):
|
|
60
|
+
site: str; user: str; type: str = "purchase"; payload: Dict[str, Any] = {}; ts: float = 0.0
|
|
61
|
+
|
|
62
|
+
class NumericIn(BaseModel):
|
|
63
|
+
site: str; user: str; key: str; value: Any
|
|
64
|
+
|
|
65
|
+
class CardIn(BaseModel):
|
|
66
|
+
site: str; user: str; context: List[str] = []; now: float = 0.0
|
|
67
|
+
|
|
68
|
+
class EditIn(BaseModel):
|
|
69
|
+
site: str; user: str; attr: str; weight: float
|
|
70
|
+
|
|
71
|
+
class RecallIn(BaseModel):
|
|
72
|
+
site: str; user: str; type: Optional[str] = None; contains: Optional[str] = None; limit: int = 20
|
|
73
|
+
|
|
74
|
+
class UserRef(BaseModel):
|
|
75
|
+
site: str; user: str
|
|
76
|
+
|
|
77
|
+
class TriggersIn(BaseModel):
|
|
78
|
+
site: str; user: str; now: float = 0.0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _guard(fn, *a, **k):
|
|
82
|
+
try:
|
|
83
|
+
return fn(*a, **k)
|
|
84
|
+
except ConsentError as e:
|
|
85
|
+
raise HTTPException(status_code=403, detail=str(e))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@app.get("/health")
|
|
89
|
+
def health(): return {"ok": True, "service": "fern", "version": "1.0"}
|
|
90
|
+
|
|
91
|
+
@app.post("/consent")
|
|
92
|
+
def consent(b: ConsentIn): return svc.consent(b.site, b.user, b.granted, b.ts)
|
|
93
|
+
|
|
94
|
+
@app.post("/observe")
|
|
95
|
+
def observe(b: ObserveIn): return _guard(svc.observe, b.site, b.user, b.type, b.payload, b.ts)
|
|
96
|
+
|
|
97
|
+
@app.post("/numeric")
|
|
98
|
+
def numeric(b: NumericIn): return _guard(svc.set_numeric, b.site, b.user, b.key, b.value)
|
|
99
|
+
|
|
100
|
+
@app.post("/card")
|
|
101
|
+
def card(b: CardIn): return _guard(svc.card, b.site, b.user, b.context, b.now)
|
|
102
|
+
|
|
103
|
+
@app.post("/defaults")
|
|
104
|
+
def defaults(b: CardIn): return _guard(svc.defaults, b.site, b.user, b.now)
|
|
105
|
+
|
|
106
|
+
@app.post("/recall")
|
|
107
|
+
def recall(b: RecallIn): return _guard(svc.recall, b.site, b.user, b.type, b.contains, b.limit)
|
|
108
|
+
|
|
109
|
+
@app.post("/edit")
|
|
110
|
+
def edit(b: EditIn): return _guard(svc.edit, b.site, b.user, b.attr, b.weight)
|
|
111
|
+
|
|
112
|
+
@app.post("/why")
|
|
113
|
+
def why(b: WhyIn): return _guard(svc.why, b.site, b.user, b.attr)
|
|
114
|
+
|
|
115
|
+
@app.post("/export")
|
|
116
|
+
def export(b: UserRef): return _guard(svc.export, b.site, b.user)
|
|
117
|
+
|
|
118
|
+
@app.post("/delete")
|
|
119
|
+
def delete(b: UserRef): return svc.delete(b.site, b.user)
|
|
120
|
+
|
|
121
|
+
@app.post("/triggers")
|
|
122
|
+
def triggers(b: TriggersIn): return _guard(svc.triggers, b.site, b.user, b.now)
|
|
123
|
+
|
|
124
|
+
@app.post("/prior_refresh")
|
|
125
|
+
def prior_refresh(b: UserRef): return svc.prior_refresh(b.site)
|
fernme/audit.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Verifiable data ownership (#4) — a tamper-evident, user-keyed audit log.
|
|
2
|
+
|
|
3
|
+
Every action is recorded as a link in an HMAC hash chain: each entry's hash binds
|
|
4
|
+
the previous hash + the action, keyed by the user's secret. Anyone holding the key
|
|
5
|
+
can replay the chain and detect if a single entry was altered, inserted, or
|
|
6
|
+
removed. (Production upgrade: per-user asymmetric keys so the USER signs and the
|
|
7
|
+
server can't forge — same chain, stronger ownership.)"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
import hmac, hashlib, json
|
|
10
|
+
|
|
11
|
+
GENESIS = "GENESIS"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def entry_hash(key: bytes, prev_hash: str, seq: int, ts: float,
|
|
15
|
+
action: str, detail: dict) -> str:
|
|
16
|
+
msg = f"{prev_hash}|{seq}|{ts}|{action}|{json.dumps(detail, sort_keys=True)}"
|
|
17
|
+
return hmac.new(key, msg.encode(), hashlib.sha256).hexdigest()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def verify(entries, key: bytes):
|
|
21
|
+
"""Replay the chain. Returns (ok, broken_seq)."""
|
|
22
|
+
prev = GENESIS
|
|
23
|
+
for e in entries:
|
|
24
|
+
h = entry_hash(key, prev, e["seq"], e["ts"], e["action"], e["detail"])
|
|
25
|
+
if not hmac.compare_digest(h, e["hash"]):
|
|
26
|
+
return False, e["seq"]
|
|
27
|
+
prev = e["hash"]
|
|
28
|
+
return True, None
|
fernme/auth.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Sign-in -> supernode linking. The supernode is built by the USER signing in
|
|
2
|
+
with their FERN account (the consent moment), never by behind-the-back matching.
|
|
3
|
+
|
|
4
|
+
This module is provider-agnostic: it verifies a signed identity token and maps it
|
|
5
|
+
to a stable person_id, then links the current site's local user into that person's
|
|
6
|
+
supernode. A MockProvider is included so the flow is testable end-to-end without a
|
|
7
|
+
real IdP; a real Google/GitHub OIDC verifier drops in behind the same interface."""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
import hmac, hashlib, json, base64, time
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _b64(d: bytes) -> str: return base64.urlsafe_b64encode(d).decode().rstrip("=")
|
|
14
|
+
def _unb64(s: str) -> bytes: return base64.urlsafe_b64decode(s + "=" * (-len(s) % 4))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MockProvider:
|
|
18
|
+
"""Stand-in identity provider: issues and verifies HMAC-signed tokens. Mirrors
|
|
19
|
+
the contract of a real OIDC id_token (issuer, subject, expiry, signature)."""
|
|
20
|
+
def __init__(self, secret: bytes = b"mock-idp-secret", issuer: str = "mock-idp"):
|
|
21
|
+
self.secret = secret; self.issuer = issuer
|
|
22
|
+
|
|
23
|
+
def issue(self, subject: str, email: str = "", ttl: int = 3600) -> str:
|
|
24
|
+
body = {"iss": self.issuer, "sub": subject, "email": email,
|
|
25
|
+
"exp": int(time.time()) + ttl}
|
|
26
|
+
payload = _b64(json.dumps(body, sort_keys=True).encode())
|
|
27
|
+
sig = _b64(hmac.new(self.secret, payload.encode(), hashlib.sha256).digest())
|
|
28
|
+
return f"{payload}.{sig}"
|
|
29
|
+
|
|
30
|
+
def verify(self, token: str) -> dict:
|
|
31
|
+
try:
|
|
32
|
+
payload, sig = token.split(".")
|
|
33
|
+
except ValueError:
|
|
34
|
+
raise AuthError("malformed token")
|
|
35
|
+
expect = _b64(hmac.new(self.secret, payload.encode(), hashlib.sha256).digest())
|
|
36
|
+
if not hmac.compare_digest(sig, expect):
|
|
37
|
+
raise AuthError("bad signature") # tamper / forgery
|
|
38
|
+
body = json.loads(_unb64(payload))
|
|
39
|
+
if body.get("exp", 0) < time.time():
|
|
40
|
+
raise AuthError("token expired")
|
|
41
|
+
if body.get("iss") != self.issuer:
|
|
42
|
+
raise AuthError("untrusted issuer")
|
|
43
|
+
return body
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class AuthError(RuntimeError):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def person_id_for(claims: dict) -> str:
|
|
51
|
+
"""Stable, opaque person id from verified claims (issuer+subject). Never the
|
|
52
|
+
raw email -> identities aren't linkable by guessing an address."""
|
|
53
|
+
raw = f"{claims['iss']}:{claims['sub']}".encode()
|
|
54
|
+
return "person:" + hashlib.sha256(raw).hexdigest()[:16]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def sign_in_and_link(service, provider, token: str, site: str, local_user: str) -> dict:
|
|
58
|
+
"""The whole handshake: verify the token, derive the person, link THIS site's
|
|
59
|
+
local user into their supernode. Consent is implicit in the user choosing to
|
|
60
|
+
sign in with their FERN identity here."""
|
|
61
|
+
claims = provider.verify(token) # raises AuthError on tamper/expiry
|
|
62
|
+
person = person_id_for(claims)
|
|
63
|
+
service.link_identity(person, site, local_user)
|
|
64
|
+
return {"person": person, "linked": service.store.list_identities(person)}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""fernme.capture — the pluggable *perception* layer above the 0-LLM engine.
|
|
2
|
+
|
|
3
|
+
Pick how memory gets written without touching the engine. Three adapters, each
|
|
4
|
+
with an honest token-cost label:
|
|
5
|
+
|
|
6
|
+
signal structured events -> rules 0 tokens
|
|
7
|
+
local text -> local rules or local model 0 API tokens (your CPU/GPU)
|
|
8
|
+
agent host agent emits a tag line ~20-40 tokens, no extra call
|
|
9
|
+
|
|
10
|
+
Typical use:
|
|
11
|
+
|
|
12
|
+
from fernme.service import FernService
|
|
13
|
+
from fernme.store.sqlite_store import SQLiteStore
|
|
14
|
+
from fernme.capture import load_pipeline
|
|
15
|
+
|
|
16
|
+
svc = FernService(store=SQLiteStore("memory.db"))
|
|
17
|
+
svc.store.set_consent("demo.com", "elena", True)
|
|
18
|
+
pipe = load_pipeline(svc, "demo.com", "elena", "fern.toml")
|
|
19
|
+
pipe.ingest({"kind": "chat", "text": "keep it concise",
|
|
20
|
+
"tags": ["pref:concise"]})
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
from typing import Dict, List
|
|
24
|
+
|
|
25
|
+
from .base import BaseAdapter
|
|
26
|
+
from .signal_hooks import SignalAdapter
|
|
27
|
+
from .local_tagger import LocalTaggerAdapter
|
|
28
|
+
from .agent_byproduct import AgentByproductAdapter
|
|
29
|
+
from .config import load_config, write_config, default_config, VALID
|
|
30
|
+
from .pipeline import CapturePipeline
|
|
31
|
+
|
|
32
|
+
REGISTRY = {
|
|
33
|
+
"signal": SignalAdapter,
|
|
34
|
+
"local": LocalTaggerAdapter,
|
|
35
|
+
"agent": AgentByproductAdapter,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def build_adapters(cfg: Dict) -> List[BaseAdapter]:
|
|
40
|
+
"""Instantiate the active adapters from a parsed config dict."""
|
|
41
|
+
out: List[BaseAdapter] = []
|
|
42
|
+
for name in cfg.get("active", []):
|
|
43
|
+
cls = REGISTRY.get(name)
|
|
44
|
+
if cls is None:
|
|
45
|
+
continue
|
|
46
|
+
opts = dict(cfg.get(name, {}))
|
|
47
|
+
if name == "local":
|
|
48
|
+
out.append(LocalTaggerAdapter(
|
|
49
|
+
mode=opts.get("mode", "rules"),
|
|
50
|
+
model=opts.get("model", "hermes3"),
|
|
51
|
+
endpoint=opts.get("endpoint", "http://localhost:11434")))
|
|
52
|
+
elif name == "agent":
|
|
53
|
+
out.append(AgentByproductAdapter(marker=opts.get("marker", "FERN_TAGS:")))
|
|
54
|
+
else:
|
|
55
|
+
out.append(cls())
|
|
56
|
+
return out
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def load_pipeline(svc, site: str, user: str, path: str = "fern.toml") -> CapturePipeline:
|
|
60
|
+
cfg = load_config(path)
|
|
61
|
+
return CapturePipeline(svc, site, user, build_adapters(cfg))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
__all__ = [
|
|
65
|
+
"BaseAdapter", "SignalAdapter", "LocalTaggerAdapter", "AgentByproductAdapter",
|
|
66
|
+
"CapturePipeline", "REGISTRY", "VALID",
|
|
67
|
+
"build_adapters", "load_pipeline", "load_config", "write_config",
|
|
68
|
+
"default_config",
|
|
69
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""agent — near-zero capture by piggybacking on the host agent's own reply.
|
|
2
|
+
|
|
3
|
+
The realistic FERNme path when you are *already* talking to an LLM agent
|
|
4
|
+
(Claude Cowork, Codex, ...). The agent appends a tiny tag line as a byproduct of
|
|
5
|
+
the answer it is generating anyway — there is NO separate model call. The only
|
|
6
|
+
cost is the handful of extra output tokens of the tag line itself (~20-40).
|
|
7
|
+
|
|
8
|
+
This adapter does not call any model. It just parses the marker line the agent
|
|
9
|
+
emitted out of the event text and hands the tags to the 0-LLM write path.
|
|
10
|
+
|
|
11
|
+
Convention (either works):
|
|
12
|
+
FERN_TAGS: pref:concise topic:python goal:launch
|
|
13
|
+
<!--FERN pref:concise topic:python-->
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import re
|
|
17
|
+
from typing import Dict, List
|
|
18
|
+
|
|
19
|
+
from .base import BaseAdapter
|
|
20
|
+
from ..safety import sanitize_tags
|
|
21
|
+
|
|
22
|
+
_LINE = re.compile(r"FERN_TAGS:\s*(.+)", re.IGNORECASE)
|
|
23
|
+
_HTML = re.compile(r"<!--\s*FERN\s+(.+?)-->", re.IGNORECASE | re.DOTALL)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AgentByproductAdapter(BaseAdapter):
|
|
27
|
+
name = "agent"
|
|
28
|
+
cost_label = "~20-40 tokens/write — emitted inside a reply already happening (no separate call)"
|
|
29
|
+
cost_tokens = 30
|
|
30
|
+
reads_text = True
|
|
31
|
+
needs = "a host agent that appends a 'FERN_TAGS:' line"
|
|
32
|
+
|
|
33
|
+
def __init__(self, marker: str = "FERN_TAGS:"):
|
|
34
|
+
self.marker = marker
|
|
35
|
+
|
|
36
|
+
def extract(self, event: Dict) -> List[str]:
|
|
37
|
+
# explicit tags passed by the agent win outright
|
|
38
|
+
tags: List[str] = list(event.get("tags", []))
|
|
39
|
+
text = event.get("text") or ""
|
|
40
|
+
m = _LINE.search(text) or _HTML.search(text)
|
|
41
|
+
if m:
|
|
42
|
+
tags += [t for t in re.split(r"[\s,]+", m.group(1).strip()) if ":" in t]
|
|
43
|
+
return sanitize_tags(tags)
|
fernme/capture/base.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Capture adapters — the *perception* layer that sits above the 0-LLM engine.
|
|
2
|
+
|
|
3
|
+
The FERNme write (`service.observe`) is pure graph arithmetic: 0 LLM tokens,
|
|
4
|
+
always. What differs per deployment is only *how tags are produced* from
|
|
5
|
+
experience. Each adapter is one way to turn an event into tags, and carries an
|
|
6
|
+
honest `cost_label` / `cost_tokens` so an installer can tell the user exactly
|
|
7
|
+
what it will cost — no hidden LLM calls.
|
|
8
|
+
|
|
9
|
+
signal structured events (command/file/git/app/calendar) -> rules. 0 tokens.
|
|
10
|
+
local text -> local keyword rules, or a small local model (Ollama). 0 API
|
|
11
|
+
tokens; uses your own CPU/GPU.
|
|
12
|
+
agent the host agent (Claude Cowork, Codex, ...) emits a tiny tag line as a
|
|
13
|
+
byproduct of the reply it is already writing. ~20-40 output tokens,
|
|
14
|
+
no *separate* LLM call.
|
|
15
|
+
|
|
16
|
+
Adapters only PROPOSE tags. They are sanitized and written through the normal
|
|
17
|
+
no-LLM path, so the engine stays the single source of truth.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
from typing import Dict, List
|
|
21
|
+
|
|
22
|
+
# An "event" is a plain dict. Conventional keys:
|
|
23
|
+
# kind : str -- "chat" | "command" | "file" | "git" | "app" | "calendar"
|
|
24
|
+
# text : str -- free text (for chat / local tagger)
|
|
25
|
+
# ... : adapter-specific fields (cmd, path, repo, msg, name, title, ...)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BaseAdapter:
|
|
29
|
+
"""One way to turn an event into proposed tags.
|
|
30
|
+
|
|
31
|
+
Subclasses set the class attributes and implement `extract`. `cost_tokens`
|
|
32
|
+
is the rough per-write LLM-token cost this adapter *causes* (0 means it
|
|
33
|
+
spends no model tokens at all)."""
|
|
34
|
+
|
|
35
|
+
name: str = "base"
|
|
36
|
+
cost_label: str = ""
|
|
37
|
+
cost_tokens: int = 0 # rough billed tokens caused per write
|
|
38
|
+
reads_text: bool = False # whether it consumes event["text"]
|
|
39
|
+
needs: str = "nothing" # human note: what must be present to use it
|
|
40
|
+
|
|
41
|
+
def extract(self, event: Dict) -> List[str]:
|
|
42
|
+
raise NotImplementedError
|
|
43
|
+
|
|
44
|
+
# convenience so adapters are printable in the installer table
|
|
45
|
+
def info(self) -> Dict:
|
|
46
|
+
return {
|
|
47
|
+
"name": self.name,
|
|
48
|
+
"cost_label": self.cost_label,
|
|
49
|
+
"cost_tokens": self.cost_tokens,
|
|
50
|
+
"needs": self.needs,
|
|
51
|
+
}
|
fernme/capture/config.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Read/write `fern.toml` — which capture adapters are active and their options.
|
|
2
|
+
|
|
3
|
+
Dependency-free: a tiny targeted TOML reader/writer for our small, known schema
|
|
4
|
+
(so it works on any Python with no `tomli`/`tomllib` requirement). Shape:
|
|
5
|
+
|
|
6
|
+
[capture]
|
|
7
|
+
active = ["agent", "signal"]
|
|
8
|
+
|
|
9
|
+
[capture.local]
|
|
10
|
+
mode = "rules" # rules | model
|
|
11
|
+
model = "hermes3"
|
|
12
|
+
endpoint = "http://localhost:11434"
|
|
13
|
+
|
|
14
|
+
[capture.agent]
|
|
15
|
+
marker = "FERN_TAGS:"
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
from typing import Dict, List
|
|
21
|
+
|
|
22
|
+
DEFAULT_PATH = "fern.toml"
|
|
23
|
+
VALID = ("signal", "local", "agent")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def default_config(active: List[str] = None) -> Dict:
|
|
27
|
+
return {
|
|
28
|
+
"active": list(active) if active else ["agent", "signal"],
|
|
29
|
+
"local": {"mode": "rules", "model": "hermes3",
|
|
30
|
+
"endpoint": "http://localhost:11434"},
|
|
31
|
+
"agent": {"marker": "FERN_TAGS:"},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_ACTIVE = re.compile(r'^\s*active\s*=\s*\[(.*?)\]', re.MULTILINE)
|
|
36
|
+
_SECTION = re.compile(r'^\s*\[capture\.(\w+)\]\s*$')
|
|
37
|
+
_KV = re.compile(r'^\s*(\w+)\s*=\s*"(.*?)"\s*$')
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load_config(path: str = DEFAULT_PATH) -> Dict:
|
|
41
|
+
"""Parse fern.toml; missing file -> default config."""
|
|
42
|
+
cfg = default_config()
|
|
43
|
+
if not os.path.exists(path):
|
|
44
|
+
return cfg
|
|
45
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
46
|
+
text = f.read()
|
|
47
|
+
m = _ACTIVE.search(text)
|
|
48
|
+
if m:
|
|
49
|
+
items = [s.strip().strip('"').strip("'") for s in m.group(1).split(",")]
|
|
50
|
+
cfg["active"] = [s for s in items if s in VALID]
|
|
51
|
+
section = None
|
|
52
|
+
for line in text.splitlines():
|
|
53
|
+
sm = _SECTION.match(line)
|
|
54
|
+
if sm:
|
|
55
|
+
section = sm.group(1)
|
|
56
|
+
cfg.setdefault(section, {})
|
|
57
|
+
continue
|
|
58
|
+
if section:
|
|
59
|
+
kv = _KV.match(line)
|
|
60
|
+
if kv:
|
|
61
|
+
cfg[section][kv.group(1)] = kv.group(2)
|
|
62
|
+
return cfg
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def write_config(cfg: Dict, path: str = DEFAULT_PATH) -> str:
|
|
66
|
+
active = ", ".join('"%s"' % a for a in cfg.get("active", []))
|
|
67
|
+
lines = ["# FERNme capture config — which perception adapters are active.",
|
|
68
|
+
"# The engine write is always 0-LLM; only tag *production* differs.",
|
|
69
|
+
"", "[capture]", "active = [%s]" % active, ""]
|
|
70
|
+
for sec in ("local", "agent"):
|
|
71
|
+
opts = cfg.get(sec)
|
|
72
|
+
if not opts:
|
|
73
|
+
continue
|
|
74
|
+
lines.append("[capture.%s]" % sec)
|
|
75
|
+
for k, v in opts.items():
|
|
76
|
+
lines.append('%s = "%s"' % (k, v))
|
|
77
|
+
lines.append("")
|
|
78
|
+
out = "\n".join(lines)
|
|
79
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
80
|
+
f.write(out)
|
|
81
|
+
return path
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Installer / picker for FERNme capture methods.
|
|
2
|
+
|
|
3
|
+
Prints a plain cost table so the user sees exactly what each method does and how
|
|
4
|
+
many tokens it costs, then writes `fern.toml`. Runnable two ways:
|
|
5
|
+
|
|
6
|
+
python -m fernme.capture.install # interactive picker
|
|
7
|
+
python -m fernme.capture.install --methods agent,signal --out fern.toml
|
|
8
|
+
python -m fernme.capture.install --show # just print the table
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
from typing import List
|
|
14
|
+
|
|
15
|
+
from .base import BaseAdapter
|
|
16
|
+
from . import REGISTRY, VALID, default_config, write_config
|
|
17
|
+
|
|
18
|
+
# stable display order + one-line "captures" note per method
|
|
19
|
+
_ORDER = ["agent", "signal", "local"]
|
|
20
|
+
_CAPTURES = {
|
|
21
|
+
"agent": "full chat meaning (host agent emits tags)",
|
|
22
|
+
"signal": "behavior only: commands, files, git, apps, calendar",
|
|
23
|
+
"local": "full chat meaning, on your own machine",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _adapter(name: str) -> BaseAdapter:
|
|
28
|
+
return REGISTRY[name]()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def cost_table() -> str:
|
|
32
|
+
rows = [("METHOD", "TOKEN COST", "CAPTURES", "NEEDS")]
|
|
33
|
+
for name in _ORDER:
|
|
34
|
+
a = _adapter(name)
|
|
35
|
+
cost = "0 (free)" if a.cost_tokens == 0 else "~%d/write" % a.cost_tokens
|
|
36
|
+
rows.append((name, cost, _CAPTURES[name], a.needs))
|
|
37
|
+
w = [max(len(r[i]) for r in rows) for i in range(4)]
|
|
38
|
+
line = lambda r: " ".join(r[i].ljust(w[i]) for i in range(4))
|
|
39
|
+
sep = " ".join("-" * w[i] for i in range(4))
|
|
40
|
+
out = [line(rows[0]), sep] + [line(r) for r in rows[1:]]
|
|
41
|
+
note = ("\nNotes: 'agent' also costs ~25-50 tokens per *read* (the memory card "
|
|
42
|
+
"injected into the\nagent's context). 'signal' and 'local' write only "
|
|
43
|
+
"— no read-side token cost. Zero-token\nmethods trade recall for cost: "
|
|
44
|
+
"they catch less nuance than a model would.")
|
|
45
|
+
return "\n".join(out) + "\n" + note
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _interactive() -> List[str]:
|
|
49
|
+
print("\nFERNme — choose how memory gets written.\n")
|
|
50
|
+
print(cost_table())
|
|
51
|
+
print("\nYou can pick more than one (they stack). Example: agent,signal\n")
|
|
52
|
+
raw = input("Methods [agent,signal]: ").strip() or "agent,signal"
|
|
53
|
+
chosen = [m.strip() for m in raw.split(",") if m.strip() in VALID]
|
|
54
|
+
return chosen or ["agent", "signal"]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def main(argv=None) -> int:
|
|
58
|
+
p = argparse.ArgumentParser(description="Install FERNme capture methods.")
|
|
59
|
+
p.add_argument("--methods", help="comma list: %s" % ",".join(_ORDER))
|
|
60
|
+
p.add_argument("--out", default="fern.toml", help="config path to write")
|
|
61
|
+
p.add_argument("--show", action="store_true", help="print the cost table and exit")
|
|
62
|
+
args = p.parse_args(argv)
|
|
63
|
+
|
|
64
|
+
if args.show:
|
|
65
|
+
print(cost_table())
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
if args.methods:
|
|
69
|
+
chosen = [m.strip() for m in args.methods.split(",") if m.strip() in VALID]
|
|
70
|
+
bad = [m.strip() for m in args.methods.split(",") if m.strip() not in VALID]
|
|
71
|
+
if bad:
|
|
72
|
+
print("ignored unknown methods: %s (valid: %s)" % (bad, ", ".join(VALID)),
|
|
73
|
+
file=sys.stderr)
|
|
74
|
+
if not chosen:
|
|
75
|
+
print("no valid methods given", file=sys.stderr)
|
|
76
|
+
return 2
|
|
77
|
+
else:
|
|
78
|
+
chosen = _interactive()
|
|
79
|
+
|
|
80
|
+
cfg = default_config(active=chosen)
|
|
81
|
+
path = write_config(cfg, args.out)
|
|
82
|
+
print("\nWrote %s with active methods: %s" % (path, ", ".join(chosen)))
|
|
83
|
+
total = sum(_adapter(n).cost_tokens for n in chosen)
|
|
84
|
+
print("Estimated write cost: %s tokens%s" % (
|
|
85
|
+
total, " (free)" if total == 0 else " per write"))
|
|
86
|
+
if "local" in chosen:
|
|
87
|
+
print("local: starts in rules mode (0 tokens). Install Ollama + a model and "
|
|
88
|
+
"set mode=\"model\" in fern.toml to upgrade.")
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
raise SystemExit(main())
|