@krimto-labs/krimto 0.2.36 → 0.2.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,8 +9,8 @@ place and reads the right slice of it — Alice's preferences override the team'
9
9
  conventions override the org's standards, and every fact carries a paper trail (author, source,
10
10
  timestamp, reviewer).
11
11
 
12
- > **Where we are:** **v0.2.36** is the current release — the v0.2.17 wizard redesign is now
13
- > shipped end-to-end, plus nineteen patch releases of correctness fixes and agent-friendly
12
+ > **Where we are:** **v0.2.37** is the current release — the v0.2.17 wizard redesign is now
13
+ > shipped end-to-end, plus twenty patch releases of correctness fixes and agent-friendly
14
14
  > surface. The v0.2.16 architecture (markdown-in-git storage, `user → team → org` hierarchy,
15
15
  > hybrid retrieval, server-enforced access, two-way git sync, MCP over stdio + HTTP, the Docker
16
16
  > image, the web UI) is unchanged. What you get on top of v0.2.16:
@@ -41,7 +41,7 @@ timestamp, reviewer).
41
41
  > first, waits for `:8080` to accept TCP, then writes editor configs. Cursor's file
42
42
  > watcher never fires into an unbound port (the v0.2.27/28 ECONNREFUSED fix).
43
43
  >
44
- > **The agent story (v0.2.34 → v0.2.36).**
44
+ > **The agent story (v0.2.34 → v0.2.37).**
45
45
  > - **Phase B agent flags** — `editors --add cursor`, `service --always`, `search --keyword`,
46
46
  > `reset --yes`, `remote --set <url>`, `folder --to <path>`. Every command that used to
47
47
  > open an interactive prompt now has a flag form.
@@ -58,6 +58,10 @@ timestamp, reviewer).
58
58
  > service into team mode itself (no copy-paste recipe, no lock conflict), saves invite keys
59
59
  > to a 0600 backup file, and validates the git remote URL at the prompt. `krimto notes` now
60
60
  > works from any terminal (identity falls back to `git config user.email`).
61
+ > - **Write-time duplicate backstop (v0.2.37)** — `krimto_write` flags a near-duplicate fact in
62
+ > the same scope (a `related` list + a hint to `krimto_supersede`), so memory doesn't silently
63
+ > accumulate two facts about the same thing even when the agent skips `krimto_recall`. Backed
64
+ > by a new retrieval-quality eval that asserts recall returns the right fact first.
61
65
  >
62
66
  > See [ROADMAP.md](ROADMAP.md), [CHANGELOG.md](CHANGELOG.md), and the proposal-vs-reality
63
67
  > diff in [docs/krimto-v0.2.17-maria-journey.html §09](docs/krimto-v0.2.17-maria-journey.html)
@@ -503,7 +507,7 @@ Cline — is table stakes today, so Krimto ships it but doesn't lead with it.
503
507
  ## Roadmap
504
508
 
505
509
  `v0.2` (teams, v0.2.5) → `v0.2.18` (v0.2.17 wizard redesign — published as one SemVer-clean
506
- release) → `v0.2.36` (correctness + agent-friendly polish — current) → `v0.3` (OAuth + PR approval
510
+ release) → `v0.2.37` (correctness + agent-friendly polish — current) → `v0.3` (OAuth + PR approval
507
511
  flow) → `v1.0` (Krimto Cloud). See [ROADMAP.md](ROADMAP.md) for the per-release breakdown.
508
512
 
509
513
  ## License
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@krimto-labs/krimto",
3
- "version": "0.2.36",
3
+ "version": "0.2.37",
4
4
  "description": "Open-source team memory layer for AI agents — markdown files in git, user/team/org hierarchy, cross-vendor MCP server.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
@@ -54,7 +54,7 @@ import { type Requester } from "../access/scope";
54
54
 
55
55
  export type RequesterResolver = (extra: { authInfo?: AuthInfo }) => Requester;
56
56
 
57
- export const KRIMTO_VERSION = "0.2.36";
57
+ export const KRIMTO_VERSION = "0.2.37";
58
58
 
59
59
  export function resolveDataDir(): string {
60
60
  return process.env.KRIMTO_DATA ?? path.join(homedir(), ".krimto");
@@ -121,7 +121,9 @@ export function buildServer(ctx: ToolContext, resolveRequester?: RequesterResolv
121
121
  "durable fact, or when correcting a mistake you should not repeat. For the user's personal " +
122
122
  "scope use `user/me` (the server resolves it to their identity) — do not guess an email. The " +
123
123
  "write is rejected (with the list of scopes you may write to) if you target a scope you couldn't " +
124
- "read back. Call krimto_recall first to avoid duplicates.",
124
+ "read back. Call krimto_recall first to avoid duplicates — and if the write response includes a " +
125
+ "`related` list, those are near-duplicates already in this scope: prefer krimto_supersede on one " +
126
+ "of them over leaving a second copy.",
125
127
  inputSchema: {
126
128
  scope: z
127
129
  .string()
@@ -9,7 +9,7 @@ import { isValidScope, type Requester } from "../access/scope";
9
9
  import { canRead, canWrite, isOrgAdmin, type Membership } from "../access/membership";
10
10
  import { FactIndex } from "../index/factIndex";
11
11
  import { Serializer } from "../index/serialize";
12
- import { rankCandidates } from "../retrieval/pipeline";
12
+ import { lexicalSimilarity, rankCandidates } from "../retrieval/pipeline";
13
13
  import { type CommitBatcher } from "../storage/batcher";
14
14
  import { type ActivityLog } from "./activity";
15
15
  import { KrimtoError } from "./errors";
@@ -47,6 +47,12 @@ export interface WriteInput {
47
47
  source?: string;
48
48
  supersedes?: string[];
49
49
  }
50
+ export interface RelatedFact {
51
+ id: string;
52
+ title: string;
53
+ /** Hybrid-retrieval score of the existing fact against the new one's title+body. */
54
+ score: number;
55
+ }
50
56
  export interface WriteResult {
51
57
  id: string;
52
58
  scope: string;
@@ -56,6 +62,12 @@ export interface WriteResult {
56
62
  /** Human-readable hint for the agent to relay back. Teaches "this is just a file you can open." */
57
63
  hint: string;
58
64
  commit_sha: string | null;
65
+ /**
66
+ * Existing facts in the same scope that closely resemble the one just written. Surfaced so a
67
+ * weak agent that skipped krimto_recall still gets a chance to krimto_supersede instead of
68
+ * duplicating. Omitted when nothing similar was found.
69
+ */
70
+ related?: RelatedFact[];
59
71
  }
60
72
 
61
73
  export interface RecallInput {
@@ -148,6 +160,39 @@ function writableScopesFor(ctx: ToolContext): string[] {
148
160
  return scopes;
149
161
  }
150
162
 
163
+ /**
164
+ * Token-cosine bar above which an existing fact is "the same thing, said again" rather than
165
+ * merely sharing a word. Tuned from the smoke-6 cases: a near-duplicate (pizza vs pizza+sushi)
166
+ * scores ~0.8 and a same-topic update (pizza vs tacos) ~0.7, while two facts that only share a
167
+ * generic qualifier (favorite FOOD vs favorite COLOR) score ~0.38. 0.5 sits cleanly between.
168
+ */
169
+ const DUPLICATE_SIMILARITY_THRESHOLD = 0.5;
170
+
171
+ /**
172
+ * Existing facts in `scope` that closely resemble `${title} ${body}` — the server-side backstop
173
+ * for the "call krimto_recall first" rule. FTS narrows the candidate set; token cosine then
174
+ * filters out facts that merely share a generic word. Excludes anything the new write already
175
+ * supersedes (no point nagging about a fact it's replacing). Top 3, most-similar first.
176
+ */
177
+ async function findRelatedFacts(
178
+ ctx: ToolContext,
179
+ scope: string,
180
+ title: string,
181
+ body: string,
182
+ supersedes: string[] | undefined,
183
+ now: Date,
184
+ ): Promise<RelatedFact[]> {
185
+ const text = `${title} ${body}`;
186
+ const candidates = await ctx.index.searchCandidates(text, { readableScopes: [scope], now });
187
+ const excluded = new Set(supersedes ?? []);
188
+ return candidates
189
+ .filter((c) => !excluded.has(c.id))
190
+ .map((c) => ({ id: c.id, title: c.title, score: lexicalSimilarity(text, `${c.title} ${c.body}`) }))
191
+ .filter((r) => r.score >= DUPLICATE_SIMILARITY_THRESHOLD)
192
+ .sort((a, b) => b.score - a.score)
193
+ .slice(0, 3);
194
+ }
195
+
151
196
  /** Create a new fact. Author comes from the requester identity; scope is required. */
152
197
  export async function krimtoWrite(ctx: ToolContext, input: WriteInput): Promise<WriteResult> {
153
198
  // "user/me"/"user/self" (and bare "me"/"self") mean the caller's own personal scope. An agent
@@ -179,6 +224,14 @@ export async function krimtoWrite(ctx: ToolContext, input: WriteInput): Promise<
179
224
  });
180
225
  }
181
226
  return ctx.writeQueue.run(async () => {
227
+ // Run the near-duplicate check BEFORE the new fact is indexed, so it can't match itself.
228
+ // Best-effort: a failure here must never block a write — the hint is observational.
229
+ let related: RelatedFact[] = [];
230
+ try {
231
+ related = await findRelatedFacts(ctx, scope, input.title, input.body, input.supersedes, clock(ctx));
232
+ } catch {
233
+ /* dedup hint is advisory — never let it break the write path */
234
+ }
182
235
  const fact = createFact({
183
236
  scope,
184
237
  title: input.title,
@@ -224,6 +277,12 @@ export async function krimtoWrite(ctx: ToolContext, input: WriteInput): Promise<
224
277
  `git auto-commits every 30s, run \`npx @krimto-labs/krimto --help\` for the full CLI surface, ` +
225
278
  `or \`npx @krimto-labs/krimto storage\` for the storage model.)`;
226
279
  }
280
+ if (related.length > 0) {
281
+ const list = related.map((r) => `"${r.title}" (${r.id})`).join(", ");
282
+ hint +=
283
+ `\n⚠ Similar existing fact${related.length > 1 ? "s" : ""} in this scope: ${list}. ` +
284
+ `If this updates ${related.length > 1 ? "one of them" : "it"}, call krimto_supersede instead of leaving a duplicate.`;
285
+ }
227
286
  return {
228
287
  id: fact.frontmatter.id,
229
288
  scope: fact.frontmatter.scope,
@@ -231,6 +290,7 @@ export async function krimtoWrite(ctx: ToolContext, input: WriteInput): Promise<
231
290
  absolute_path: absolutePath,
232
291
  hint,
233
292
  commit_sha: null,
293
+ ...(related.length > 0 ? { related } : {}),
234
294
  };
235
295
  });
236
296
  }