@prom.codes/memory-mcp 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/bin.js +661 -14
  2. package/package.json +1 -1
package/dist/bin.js CHANGED
@@ -506,6 +506,377 @@ var PrometheusEmbeddingProvider = class {
506
506
  }
507
507
  };
508
508
 
509
+ // ../rerank-voyage/dist/index.js
510
+ var DEFAULT_MODEL = "rerank-2.5";
511
+ var DEFAULT_BASE_URL = "https://api.voyageai.com/v1";
512
+ var DEFAULT_BATCH3 = 100;
513
+ var DEFAULT_RETRIES3 = 6;
514
+ var DEFAULT_BACKOFF3 = 2e3;
515
+ var DEFAULT_RETRY_MAX2 = 6e4;
516
+ function parseRetryAfterMs2(value, now = Date.now()) {
517
+ if (value === null)
518
+ return null;
519
+ const trimmed = value.trim();
520
+ if (trimmed === "")
521
+ return null;
522
+ if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
523
+ const secs = Number(trimmed);
524
+ if (!Number.isFinite(secs) || secs < 0)
525
+ return null;
526
+ return Math.round(secs * 1e3);
527
+ }
528
+ if (!/[A-Za-z]/.test(trimmed))
529
+ return null;
530
+ const ts = Date.parse(trimmed);
531
+ if (!Number.isFinite(ts))
532
+ return null;
533
+ const delta = ts - now;
534
+ return delta > 0 ? delta : 0;
535
+ }
536
+ function sleep3(ms, signal) {
537
+ return new Promise((resolve2, reject) => {
538
+ if (signal?.aborted === true) {
539
+ reject(new Error("aborted"));
540
+ return;
541
+ }
542
+ const timer = setTimeout(() => {
543
+ signal?.removeEventListener("abort", onAbort);
544
+ resolve2();
545
+ }, ms);
546
+ const onAbort = () => {
547
+ clearTimeout(timer);
548
+ reject(new Error("aborted"));
549
+ };
550
+ signal?.addEventListener("abort", onAbort, { once: true });
551
+ });
552
+ }
553
+ function nonRetryable3(message) {
554
+ const err = new Error(message);
555
+ err.nonRetryable = true;
556
+ return err;
557
+ }
558
+ var VoyageRerankProvider = class {
559
+ name;
560
+ model;
561
+ region;
562
+ #baseUrl;
563
+ #apiKey;
564
+ #batchSize;
565
+ #maxRetries;
566
+ #retryBaseMs;
567
+ #retryMaxMs;
568
+ #fetch;
569
+ constructor(opts) {
570
+ if (typeof opts.apiKey !== "string" || opts.apiKey === "") {
571
+ throw new Error("VoyageRerankProvider: apiKey is required");
572
+ }
573
+ if (opts.batchSize !== void 0 && (!Number.isInteger(opts.batchSize) || opts.batchSize <= 0 || opts.batchSize > 1e3)) {
574
+ throw new Error(`VoyageRerankProvider: batchSize must be an integer in 1..1000, got ${opts.batchSize}`);
575
+ }
576
+ this.model = opts.model ?? DEFAULT_MODEL;
577
+ this.name = opts.name ?? `voyage:${this.model}`;
578
+ this.region = opts.region ?? "us";
579
+ this.#baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
580
+ this.#apiKey = opts.apiKey;
581
+ this.#batchSize = opts.batchSize ?? DEFAULT_BATCH3;
582
+ this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES3;
583
+ this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF3;
584
+ this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX2;
585
+ this.#fetch = opts.fetch ?? fetch;
586
+ }
587
+ async rerank(query, candidates, opts) {
588
+ if (candidates.length === 0)
589
+ return [];
590
+ const all = new Array(candidates.length);
591
+ let cursor = 0;
592
+ for (let start = 0; start < candidates.length; start += this.#batchSize) {
593
+ const slice = candidates.slice(start, start + this.#batchSize);
594
+ const scored = await this.#rerankBatch(query, slice, opts?.signal);
595
+ for (const hit of scored) {
596
+ const globalIndex = start + hit.localIndex;
597
+ const cand = candidates[globalIndex];
598
+ all[cursor++] = { id: cand.id, index: globalIndex, score: hit.score };
599
+ }
600
+ }
601
+ all.sort((a, b) => b.score - a.score);
602
+ if (opts?.topK !== void 0 && opts.topK >= 0 && opts.topK < all.length) {
603
+ return all.slice(0, opts.topK);
604
+ }
605
+ return all;
606
+ }
607
+ async #rerankBatch(query, batch, signal) {
608
+ const body = {
609
+ query,
610
+ documents: batch.map((c) => c.text),
611
+ model: this.model,
612
+ return_documents: false,
613
+ truncation: true
614
+ };
615
+ const init = {
616
+ method: "POST",
617
+ headers: {
618
+ "content-type": "application/json",
619
+ authorization: `Bearer ${this.#apiKey}`
620
+ },
621
+ body: JSON.stringify(body)
622
+ };
623
+ if (signal !== void 0)
624
+ init.signal = signal;
625
+ let attempt = 0;
626
+ let lastError = null;
627
+ while (attempt <= this.#maxRetries) {
628
+ try {
629
+ const res = await this.#fetch(`${this.#baseUrl}/rerank`, init);
630
+ if (res.status === 429 || res.status >= 500 && res.status < 600) {
631
+ lastError = new Error(`${this.name}: HTTP ${res.status}`);
632
+ attempt += 1;
633
+ if (attempt > this.#maxRetries)
634
+ break;
635
+ const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
636
+ await sleep3(backoff, signal);
637
+ continue;
638
+ }
639
+ if (!res.ok) {
640
+ const text = await res.text().catch(() => "");
641
+ throw nonRetryable3(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
642
+ }
643
+ const payload = await res.json();
644
+ return this.#decode(payload, batch.length);
645
+ } catch (err) {
646
+ if (err?.name === "AbortError")
647
+ throw err;
648
+ if (err?.nonRetryable === true)
649
+ throw err;
650
+ if (attempt >= this.#maxRetries)
651
+ throw err;
652
+ lastError = err;
653
+ attempt += 1;
654
+ await sleep3(this.#computeBackoff(attempt, null), signal);
655
+ }
656
+ }
657
+ throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
658
+ }
659
+ #computeBackoff(attempt, retryAfterHeader) {
660
+ const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
661
+ const advised = parseRetryAfterMs2(retryAfterHeader);
662
+ const lower = advised === null ? exp : Math.max(exp, advised);
663
+ return Math.min(lower, this.#retryMaxMs);
664
+ }
665
+ #decode(payload, expected) {
666
+ if (!Array.isArray(payload.data) || payload.data.length !== expected) {
667
+ throw nonRetryable3(`${this.name}: expected ${expected} rerank rows, got ${payload.data?.length ?? 0}`);
668
+ }
669
+ return payload.data.map((row) => {
670
+ if (!Number.isInteger(row.index) || row.index < 0 || row.index >= expected) {
671
+ throw nonRetryable3(`${this.name}: invalid index ${row.index} in rerank response`);
672
+ }
673
+ if (typeof row.relevance_score !== "number" || !Number.isFinite(row.relevance_score)) {
674
+ throw nonRetryable3(`${this.name}: invalid relevance_score ${row.relevance_score} at index ${row.index}`);
675
+ }
676
+ return { localIndex: row.index, score: row.relevance_score };
677
+ });
678
+ }
679
+ };
680
+
681
+ // ../rerank-openai-compat/dist/index.js
682
+ var DEFAULT_MODEL2 = "bge-reranker-base";
683
+ var DEFAULT_BATCH4 = 100;
684
+ var DEFAULT_RETRIES4 = 6;
685
+ var DEFAULT_BACKOFF4 = 2e3;
686
+ var DEFAULT_RETRY_MAX3 = 6e4;
687
+ var DEFAULT_TIMEOUT = 18e4;
688
+ function parseRetryAfterMs3(value, now = Date.now()) {
689
+ if (value === null)
690
+ return null;
691
+ const trimmed = value.trim();
692
+ if (trimmed === "")
693
+ return null;
694
+ if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
695
+ const secs = Number(trimmed);
696
+ if (!Number.isFinite(secs) || secs < 0)
697
+ return null;
698
+ return Math.round(secs * 1e3);
699
+ }
700
+ if (!/[A-Za-z]/.test(trimmed))
701
+ return null;
702
+ const ts = Date.parse(trimmed);
703
+ if (!Number.isFinite(ts))
704
+ return null;
705
+ const delta = ts - now;
706
+ return delta > 0 ? delta : 0;
707
+ }
708
+ function sleep4(ms, signal) {
709
+ return new Promise((resolve2, reject) => {
710
+ if (signal?.aborted === true) {
711
+ reject(new Error("aborted"));
712
+ return;
713
+ }
714
+ const timer = setTimeout(() => {
715
+ signal?.removeEventListener("abort", onAbort);
716
+ resolve2();
717
+ }, ms);
718
+ const onAbort = () => {
719
+ clearTimeout(timer);
720
+ reject(new Error("aborted"));
721
+ };
722
+ signal?.addEventListener("abort", onAbort, { once: true });
723
+ });
724
+ }
725
+ function nonRetryable4(message) {
726
+ const err = new Error(message);
727
+ err.nonRetryable = true;
728
+ return err;
729
+ }
730
+ var OpenAICompatRerankProvider = class {
731
+ name;
732
+ model;
733
+ region;
734
+ #baseUrl;
735
+ #apiKey;
736
+ #batchSize;
737
+ #maxRetries;
738
+ #retryBaseMs;
739
+ #retryMaxMs;
740
+ #timeoutMs;
741
+ #fetch;
742
+ constructor(opts) {
743
+ if (typeof opts.baseUrl !== "string" || opts.baseUrl === "") {
744
+ throw new Error("OpenAICompatRerankProvider: baseUrl is required");
745
+ }
746
+ if (opts.batchSize !== void 0 && (!Number.isInteger(opts.batchSize) || opts.batchSize <= 0 || opts.batchSize > 1e3)) {
747
+ throw new Error(`OpenAICompatRerankProvider: batchSize must be an integer in 1..1000, got ${opts.batchSize}`);
748
+ }
749
+ if (opts.timeoutMs !== void 0 && (!Number.isInteger(opts.timeoutMs) || opts.timeoutMs < 0)) {
750
+ throw new Error(`OpenAICompatRerankProvider: timeoutMs must be a non-negative integer (0 disables), got ${opts.timeoutMs}`);
751
+ }
752
+ this.model = opts.model ?? DEFAULT_MODEL2;
753
+ this.name = opts.name ?? `openai-compat:${this.model}`;
754
+ this.region = opts.region ?? "self-hosted";
755
+ this.#baseUrl = opts.baseUrl.replace(/\/+$/, "");
756
+ this.#apiKey = opts.apiKey === void 0 || opts.apiKey === "" ? void 0 : opts.apiKey;
757
+ this.#batchSize = opts.batchSize ?? DEFAULT_BATCH4;
758
+ this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES4;
759
+ this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF4;
760
+ this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX3;
761
+ this.#timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT;
762
+ this.#fetch = opts.fetch ?? fetch;
763
+ }
764
+ async rerank(query, candidates, opts) {
765
+ if (candidates.length === 0)
766
+ return [];
767
+ const all = new Array(candidates.length);
768
+ let cursor = 0;
769
+ for (let start = 0; start < candidates.length; start += this.#batchSize) {
770
+ const slice = candidates.slice(start, start + this.#batchSize);
771
+ const scored = await this.#rerankBatch(query, slice, opts?.signal);
772
+ for (const hit of scored) {
773
+ const globalIndex = start + hit.localIndex;
774
+ const cand = candidates[globalIndex];
775
+ all[cursor++] = { id: cand.id, index: globalIndex, score: hit.score };
776
+ }
777
+ }
778
+ all.sort((a, b) => b.score - a.score);
779
+ if (opts?.topK !== void 0 && opts.topK >= 0 && opts.topK < all.length) {
780
+ return all.slice(0, opts.topK);
781
+ }
782
+ return all;
783
+ }
784
+ async #rerankBatch(query, batch, signal) {
785
+ const body = {
786
+ query,
787
+ documents: batch.map((c) => c.text),
788
+ model: this.model,
789
+ return_documents: false
790
+ };
791
+ const headers = { "content-type": "application/json" };
792
+ if (this.#apiKey !== void 0)
793
+ headers.authorization = `Bearer ${this.#apiKey}`;
794
+ const payloadJson = JSON.stringify(body);
795
+ let attempt = 0;
796
+ let lastError = null;
797
+ while (attempt <= this.#maxRetries) {
798
+ const controller = new AbortController();
799
+ let timedOut = false;
800
+ let timer;
801
+ if (this.#timeoutMs > 0) {
802
+ timer = setTimeout(() => {
803
+ timedOut = true;
804
+ controller.abort();
805
+ }, this.#timeoutMs);
806
+ }
807
+ const onParentAbort = () => controller.abort();
808
+ if (signal !== void 0) {
809
+ if (signal.aborted)
810
+ controller.abort();
811
+ else
812
+ signal.addEventListener("abort", onParentAbort, { once: true });
813
+ }
814
+ const init = {
815
+ method: "POST",
816
+ headers,
817
+ body: payloadJson,
818
+ signal: controller.signal
819
+ };
820
+ try {
821
+ const res = await this.#fetch(`${this.#baseUrl}/rerank`, init);
822
+ if (res.status === 429 || res.status >= 500 && res.status < 600) {
823
+ lastError = new Error(`${this.name}: HTTP ${res.status}`);
824
+ attempt += 1;
825
+ if (attempt > this.#maxRetries)
826
+ break;
827
+ const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
828
+ await sleep4(backoff, signal);
829
+ continue;
830
+ }
831
+ if (!res.ok) {
832
+ const text = await res.text().catch(() => "");
833
+ throw nonRetryable4(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
834
+ }
835
+ const payload = await res.json();
836
+ return this.#decode(payload, batch.length);
837
+ } catch (err) {
838
+ const isAbort = err?.name === "AbortError";
839
+ if (isAbort && !timedOut)
840
+ throw err;
841
+ if (!isAbort && err?.nonRetryable === true)
842
+ throw err;
843
+ const normalized = timedOut ? new Error(`${this.name}: request timed out after ${this.#timeoutMs}ms`) : err;
844
+ if (attempt >= this.#maxRetries)
845
+ throw normalized;
846
+ lastError = normalized;
847
+ attempt += 1;
848
+ await sleep4(this.#computeBackoff(attempt, null), signal);
849
+ } finally {
850
+ if (timer !== void 0)
851
+ clearTimeout(timer);
852
+ if (signal !== void 0)
853
+ signal.removeEventListener("abort", onParentAbort);
854
+ }
855
+ }
856
+ throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
857
+ }
858
+ #computeBackoff(attempt, retryAfterHeader) {
859
+ const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
860
+ const advised = parseRetryAfterMs3(retryAfterHeader);
861
+ const lower = advised === null ? exp : Math.max(exp, advised);
862
+ return Math.min(lower, this.#retryMaxMs);
863
+ }
864
+ #decode(payload, expected) {
865
+ if (!Array.isArray(payload.results) || payload.results.length !== expected) {
866
+ throw nonRetryable4(`${this.name}: expected ${expected} rerank rows, got ${payload.results?.length ?? 0}`);
867
+ }
868
+ return payload.results.map((row) => {
869
+ if (!Number.isInteger(row.index) || row.index < 0 || row.index >= expected) {
870
+ throw nonRetryable4(`${this.name}: invalid index ${row.index} in rerank response`);
871
+ }
872
+ if (typeof row.relevance_score !== "number" || !Number.isFinite(row.relevance_score)) {
873
+ throw nonRetryable4(`${this.name}: invalid relevance_score ${row.relevance_score} at index ${row.index}`);
874
+ }
875
+ return { localIndex: row.index, score: row.relevance_score };
876
+ });
877
+ }
878
+ };
879
+
509
880
  // dist/api-key.js
510
881
  var KEY_PATTERN = /^prom_(live|test)_[A-Za-z0-9]{10,}$/;
511
882
  var API_KEY_ENV = "PROMETHEUS_API_KEY";
@@ -520,6 +891,114 @@ function requireApiKey(env) {
520
891
  return key;
521
892
  }
522
893
 
894
+ // dist/extraction.js
895
+ var SYSTEM_PROMPT = 'You extract durable, atomic facts from a coding agent\'s session notes for long-term project memory. Output ONLY a JSON array of objects {"key":..., "value":...}. Each fact must be ONE self-contained statement that will be useful in a FUTURE session: a decision, a convention, a preference, a stable configuration, or a learned fact about the project. `key` is a short kebab-case slug; `value` is the full fact in one sentence. DROP transient step-by-step narration, anything true only for this one session, and anything obvious. Never invent facts not supported by the notes. If there is nothing durable, output []. Output at most 12 facts.';
896
+ function parseExtraction(raw, maxFacts = 12, maxValueChars = 2e3) {
897
+ const match = raw.match(/\[[\s\S]*\]/);
898
+ if (!match)
899
+ return [];
900
+ let parsed;
901
+ try {
902
+ parsed = JSON.parse(match[0]);
903
+ } catch {
904
+ return [];
905
+ }
906
+ if (!Array.isArray(parsed))
907
+ return [];
908
+ const out = [];
909
+ const seen = /* @__PURE__ */ new Set();
910
+ for (const item of parsed) {
911
+ if (out.length >= maxFacts)
912
+ break;
913
+ if (typeof item !== "object" || item === null)
914
+ continue;
915
+ const rec = item;
916
+ const rawKey = typeof rec.key === "string" ? rec.key : "";
917
+ const value = typeof rec.value === "string" ? rec.value.trim() : "";
918
+ if (value === "")
919
+ continue;
920
+ const key = slugify(rawKey) || slugify(value).slice(0, 48);
921
+ if (key === "" || seen.has(key))
922
+ continue;
923
+ seen.add(key);
924
+ const fact = { key, value: value.slice(0, maxValueChars) };
925
+ out.push(typeof rec.confidence === "number" && rec.confidence >= 0 && rec.confidence <= 1 ? { ...fact, confidence: rec.confidence } : fact);
926
+ }
927
+ return out;
928
+ }
929
+ function slugify(s) {
930
+ return s.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 64);
931
+ }
932
+ var OpenAICompatExtractor = class {
933
+ name;
934
+ model;
935
+ #url;
936
+ #apiKey;
937
+ #maxRetries;
938
+ #retryBaseMs;
939
+ #temperature;
940
+ #fetch;
941
+ constructor(opts) {
942
+ this.name = opts.name;
943
+ this.model = opts.model;
944
+ this.#url = `${opts.baseUrl.replace(/\/+$/, "")}/chat/completions`;
945
+ this.#apiKey = opts.apiKey;
946
+ this.#maxRetries = opts.maxRetries ?? 3;
947
+ this.#retryBaseMs = opts.retryBaseMs ?? 500;
948
+ this.#temperature = opts.temperature ?? 0;
949
+ this.#fetch = opts.fetchImpl ?? fetch;
950
+ }
951
+ async extract(text, opts) {
952
+ const trimmed = text.trim();
953
+ if (trimmed === "")
954
+ return [];
955
+ const body = JSON.stringify({
956
+ model: this.model,
957
+ temperature: this.#temperature,
958
+ messages: [
959
+ { role: "system", content: SYSTEM_PROMPT },
960
+ { role: "user", content: `Session notes:
961
+
962
+ ${trimmed}` }
963
+ ]
964
+ });
965
+ const headers = { "content-type": "application/json" };
966
+ if (this.#apiKey !== void 0 && this.#apiKey !== "") {
967
+ headers.authorization = `Bearer ${this.#apiKey}`;
968
+ }
969
+ let lastErr;
970
+ for (let attempt = 0; attempt <= this.#maxRetries; attempt++) {
971
+ try {
972
+ const res = await this.#fetch(this.#url, {
973
+ method: "POST",
974
+ headers,
975
+ body,
976
+ ...opts?.signal ? { signal: opts.signal } : {}
977
+ });
978
+ if (res.status === 429 || res.status >= 500) {
979
+ lastErr = new Error(`extractor HTTP ${res.status}`);
980
+ } else if (!res.ok) {
981
+ return [];
982
+ } else {
983
+ const json = await res.json();
984
+ const content = json.choices?.[0]?.message?.content ?? "";
985
+ return parseExtraction(content);
986
+ }
987
+ } catch (err) {
988
+ lastErr = err;
989
+ }
990
+ if (attempt < this.#maxRetries) {
991
+ await delay(this.#retryBaseMs * 2 ** attempt);
992
+ }
993
+ }
994
+ void lastErr;
995
+ return [];
996
+ }
997
+ };
998
+ function delay(ms) {
999
+ return new Promise((r) => setTimeout(r, ms));
1000
+ }
1001
+
523
1002
  // dist/sqlite.js
524
1003
  import { randomUUID } from "node:crypto";
525
1004
  import { mkdirSync } from "node:fs";
@@ -747,6 +1226,7 @@ function rowToRecord(row) {
747
1226
  var SqliteMemoryBackend = class {
748
1227
  db;
749
1228
  embedder;
1229
+ reranker;
750
1230
  /** Record ids whose vector is missing/stale, awaiting a batched embed. */
751
1231
  pendingEmbed = /* @__PURE__ */ new Set();
752
1232
  closed = false;
@@ -761,6 +1241,7 @@ var SqliteMemoryBackend = class {
761
1241
  this.db.exec(VEC_SCHEMA);
762
1242
  this.db.exec(`INSERT INTO agent_memory_fts (agent_memory_fts) VALUES ('rebuild')`);
763
1243
  this.embedder = opts.embedder;
1244
+ this.reranker = opts.reranker;
764
1245
  if (this.embedder !== void 0)
765
1246
  this.queueUnembedded();
766
1247
  }
@@ -871,15 +1352,44 @@ var SqliteMemoryBackend = class {
871
1352
  vecHits = [];
872
1353
  }
873
1354
  }
874
- if (vecHits.length === 0)
875
- return ftsHits.slice(0, finalLimit);
876
- if (ftsHits.length === 0)
877
- return vecHits.slice(0, finalLimit);
878
- const fused = reciprocalRankFusion([
879
- { id: "fts", items: ftsHits.map((h) => ({ key: h.record.id, payload: h })) },
880
- { id: "vec", items: vecHits.map((h) => ({ key: h.record.id, payload: h })) }
881
- ], { limit: finalLimit });
882
- return fused.map((f) => f.payload);
1355
+ let pool;
1356
+ if (vecHits.length === 0) {
1357
+ pool = ftsHits;
1358
+ } else if (ftsHits.length === 0) {
1359
+ pool = vecHits;
1360
+ } else {
1361
+ pool = reciprocalRankFusion([
1362
+ { id: "fts", items: ftsHits.map((h) => ({ key: h.record.id, payload: h })) },
1363
+ { id: "vec", items: vecHits.map((h) => ({ key: h.record.id, payload: h })) }
1364
+ ], { limit: poolLimit }).map((f) => f.payload);
1365
+ }
1366
+ const reranked = input.rerank === false ? pool : await this.rerankPool(input.query, pool, finalLimit);
1367
+ return reranked.slice(0, finalLimit);
1368
+ }
1369
+ /**
1370
+ * Reorder a first-stage pool with the cross-encoder reranker, scoring each
1371
+ * candidate's `key + value` jointly against the query. Returns the pool
1372
+ * unchanged when no reranker is configured, the pool is trivial, or the
1373
+ * provider errors.
1374
+ */
1375
+ async rerankPool(query, pool, topK) {
1376
+ if (this.reranker === void 0 || pool.length <= 1)
1377
+ return pool;
1378
+ try {
1379
+ const candidates = pool.map((h) => ({
1380
+ id: h.record.id,
1381
+ text: `${h.record.key}
1382
+ ${h.record.value}`
1383
+ }));
1384
+ const hits = await this.reranker.rerank(query, candidates, { topK });
1385
+ if (hits.length === 0)
1386
+ return pool;
1387
+ const byId = new Map(pool.map((h) => [h.record.id, h]));
1388
+ const reordered = hits.map((h) => byId.get(h.id)).filter((h) => h !== void 0);
1389
+ return reordered.length > 0 ? reordered : pool;
1390
+ } catch {
1391
+ return pool;
1392
+ }
883
1393
  }
884
1394
  /** FTS5 BM25 keyword channel → ranked hits (best first). */
885
1395
  ftsSearch(input, limit) {
@@ -1149,6 +1659,99 @@ function discoverMemoryEmbedder(env) {
1149
1659
  }
1150
1660
  return { id: "none", embedder: void 0 };
1151
1661
  }
1662
+ function discoverMemoryReranker(env) {
1663
+ const forced = (env.PROMETHEUS_MEMORY_RERANK_PROVIDER ?? "none").toLowerCase();
1664
+ if (forced === "" || forced === "none")
1665
+ return { id: "none", provider: null };
1666
+ if (forced === "voyage") {
1667
+ const apiKey = env.VOYAGE_API_KEY;
1668
+ if (apiKey === void 0 || apiKey === "") {
1669
+ throw new Error('PROMETHEUS_MEMORY_RERANK_PROVIDER="voyage" requires VOYAGE_API_KEY.');
1670
+ }
1671
+ const provider = new VoyageRerankProvider({
1672
+ name: "voyage-rerank",
1673
+ apiKey,
1674
+ model: env.VOYAGE_RERANK_MODEL ?? "rerank-2.5",
1675
+ region: "us",
1676
+ baseUrl: env.VOYAGE_BASE_URL ?? "https://api.voyageai.com/v1",
1677
+ maxRetries: intEnv(env, "VOYAGE_RERANK_MAX_RETRIES", 6),
1678
+ retryBaseMs: intEnv(env, "VOYAGE_RERANK_RETRY_BASE_MS", 2e3),
1679
+ batchSize: intEnv(env, "VOYAGE_RERANK_BATCH", 100)
1680
+ });
1681
+ return { id: "voyage", provider };
1682
+ }
1683
+ if (forced === "bge" || forced === "generic") {
1684
+ const baseUrl = env.PROMETHEUS_MEMORY_RERANK_ENDPOINT;
1685
+ if (baseUrl === void 0 || baseUrl === "") {
1686
+ throw new Error(`PROMETHEUS_MEMORY_RERANK_PROVIDER="${forced}" requires PROMETHEUS_MEMORY_RERANK_ENDPOINT.`);
1687
+ }
1688
+ const model = env.PROMETHEUS_MEMORY_RERANK_MODEL ?? "bge-reranker-base";
1689
+ const provider = new OpenAICompatRerankProvider({
1690
+ name: env.PROMETHEUS_MEMORY_RERANK_NAME ?? `bge-rerank:${model}`,
1691
+ model,
1692
+ region: "self-hosted",
1693
+ baseUrl,
1694
+ maxRetries: intEnv(env, "PROMETHEUS_MEMORY_RERANK_MAX_RETRIES", 6),
1695
+ retryBaseMs: intEnv(env, "PROMETHEUS_MEMORY_RERANK_RETRY_BASE_MS", 2e3),
1696
+ batchSize: intEnv(env, "PROMETHEUS_MEMORY_RERANK_BATCH", 100),
1697
+ timeoutMs: intEnv(env, "PROMETHEUS_MEMORY_RERANK_TIMEOUT_MS", 18e4),
1698
+ ...env.PROMETHEUS_MEMORY_RERANK_API_KEY ? { apiKey: env.PROMETHEUS_MEMORY_RERANK_API_KEY } : {}
1699
+ });
1700
+ return { id: "bge", provider };
1701
+ }
1702
+ throw new Error(`unknown PROMETHEUS_MEMORY_RERANK_PROVIDER="${forced}" (expected "none", "voyage", or "bge")`);
1703
+ }
1704
+ function discoverMemoryExtractor(env) {
1705
+ const forced = (env.PROMETHEUS_MEMORY_EXTRACT_PROVIDER ?? "none").toLowerCase();
1706
+ if (forced === "" || forced === "none")
1707
+ return { id: "none", provider: null };
1708
+ if (forced === "mistral") {
1709
+ const apiKey = env.MISTRAL_API_KEY;
1710
+ if (apiKey === void 0 || apiKey === "") {
1711
+ throw new Error('PROMETHEUS_MEMORY_EXTRACT_PROVIDER="mistral" requires MISTRAL_API_KEY.');
1712
+ }
1713
+ return {
1714
+ id: "mistral",
1715
+ provider: new OpenAICompatExtractor({
1716
+ name: "mistral-extract",
1717
+ model: env.PROMETHEUS_MEMORY_EXTRACT_MODEL ?? "mistral-small-latest",
1718
+ baseUrl: env.MISTRAL_BASE_URL ?? "https://api.mistral.ai/v1",
1719
+ apiKey
1720
+ })
1721
+ };
1722
+ }
1723
+ if (forced === "openai") {
1724
+ const apiKey = env.OPENAI_API_KEY;
1725
+ if (apiKey === void 0 || apiKey === "") {
1726
+ throw new Error('PROMETHEUS_MEMORY_EXTRACT_PROVIDER="openai" requires OPENAI_API_KEY.');
1727
+ }
1728
+ return {
1729
+ id: "openai",
1730
+ provider: new OpenAICompatExtractor({
1731
+ name: "openai-extract",
1732
+ model: env.PROMETHEUS_MEMORY_EXTRACT_MODEL ?? "gpt-4o-mini",
1733
+ baseUrl: env.OPENAI_BASE_URL ?? "https://api.openai.com/v1",
1734
+ apiKey
1735
+ })
1736
+ };
1737
+ }
1738
+ if (forced === "generic" || forced === "openai-compat") {
1739
+ const baseUrl = env.PROMETHEUS_MEMORY_EXTRACT_ENDPOINT;
1740
+ if (baseUrl === void 0 || baseUrl === "") {
1741
+ throw new Error(`PROMETHEUS_MEMORY_EXTRACT_PROVIDER="${forced}" requires PROMETHEUS_MEMORY_EXTRACT_ENDPOINT.`);
1742
+ }
1743
+ return {
1744
+ id: "generic",
1745
+ provider: new OpenAICompatExtractor({
1746
+ name: env.PROMETHEUS_MEMORY_EXTRACT_NAME ?? "generic-extract",
1747
+ model: env.PROMETHEUS_MEMORY_EXTRACT_MODEL ?? "default",
1748
+ baseUrl,
1749
+ ...env.PROMETHEUS_MEMORY_EXTRACT_API_KEY ? { apiKey: env.PROMETHEUS_MEMORY_EXTRACT_API_KEY } : {}
1750
+ })
1751
+ };
1752
+ }
1753
+ throw new Error(`unknown PROMETHEUS_MEMORY_EXTRACT_PROVIDER="${forced}" (expected "none", "mistral", "openai", or "generic")`);
1754
+ }
1152
1755
  function composeFromEnv(opts) {
1153
1756
  const env = opts.env;
1154
1757
  const override = (opts.workspaceRootOverride ?? "").trim();
@@ -1160,7 +1763,12 @@ function composeFromEnv(opts) {
1160
1763
  const rawDbPath = env.PROMETHEUS_MEMORY_DB_PATH;
1161
1764
  const dbPath = rawDbPath !== void 0 && rawDbPath !== "" ? rawDbPath : defaultMemoryDbPath();
1162
1765
  const { id: embedderId, embedder } = discoverMemoryEmbedder(env);
1163
- const backend = new SqliteMemoryBackend(dbPath, embedder !== void 0 ? { embedder } : {});
1766
+ const { id: rerankerId, provider: reranker } = discoverMemoryReranker(env);
1767
+ const { id: extractorId, provider: extractor } = discoverMemoryExtractor(env);
1768
+ const backend = new SqliteMemoryBackend(dbPath, {
1769
+ ...embedder !== void 0 ? { embedder } : {},
1770
+ ...reranker !== null ? { reranker } : {}
1771
+ });
1164
1772
  return {
1165
1773
  backend,
1166
1774
  workspaceRoot,
@@ -1169,6 +1777,10 @@ function composeFromEnv(opts) {
1169
1777
  dbPath,
1170
1778
  embeddingsEnabled: embedder !== void 0,
1171
1779
  embedderId,
1780
+ reranker,
1781
+ rerankerId,
1782
+ extractor,
1783
+ extractorId,
1172
1784
  close: () => backend.close()
1173
1785
  };
1174
1786
  }
@@ -1598,7 +2210,7 @@ var setupInput = {
1598
2210
  runtimes: z.array(runtimeEnum).min(1).optional()
1599
2211
  };
1600
2212
  function registerTools(server, deps) {
1601
- const { backend, workspaceRoot, projectId, projectName, dbPath } = deps;
2213
+ const { backend, workspaceRoot, projectId, projectName, dbPath, extractor } = deps;
1602
2214
  server.registerTool("read", {
1603
2215
  title: "Recall agent memory",
1604
2216
  description: "Read agent memory for this project along the scope chain (project \u2192 workspace \u2192 tenant \u2192 system; narrowest scope wins). Syncs `.prometheus/memories/*.md` first, then returns the resolved records plus a prompt-ready `woven` markdown block (token-capped). Call this at the START of a session or task to recall what earlier sessions learned.",
@@ -1662,6 +2274,41 @@ ${p.value}`)
1662
2274
  assertValueSize(texts);
1663
2275
  assertNoSecrets(texts);
1664
2276
  const scope = args.scope ?? "project";
2277
+ let facts = args.facts ?? [];
2278
+ let extractedCount = 0;
2279
+ if (extractor) {
2280
+ const prose = [args.plan ?? "", args.outcome ?? ""].join("\n").trim();
2281
+ if (prose !== "") {
2282
+ try {
2283
+ const mined = await extractor.extract(prose);
2284
+ const have = new Set(facts.map((f) => f.key));
2285
+ const fresh = mined.filter((f) => {
2286
+ if (have.has(f.key))
2287
+ return false;
2288
+ try {
2289
+ assertNoSecrets(`${f.key}
2290
+ ${f.value}`);
2291
+ } catch {
2292
+ return false;
2293
+ }
2294
+ have.add(f.key);
2295
+ return true;
2296
+ });
2297
+ if (fresh.length > 0) {
2298
+ facts = [
2299
+ ...facts,
2300
+ ...fresh.map((f) => ({
2301
+ key: f.key,
2302
+ value: f.value,
2303
+ ...f.confidence !== void 0 ? { confidence: f.confidence } : {}
2304
+ }))
2305
+ ];
2306
+ extractedCount = fresh.length;
2307
+ }
2308
+ } catch {
2309
+ }
2310
+ }
2311
+ }
1665
2312
  const written = await backend.consolidate({
1666
2313
  projectId,
1667
2314
  scope,
@@ -1669,10 +2316,10 @@ ${p.value}`)
1669
2316
  sessionId: args.sessionId,
1670
2317
  plan: args.plan,
1671
2318
  outcome: args.outcome,
1672
- facts: args.facts,
2319
+ facts,
1673
2320
  procedures: args.procedures
1674
2321
  });
1675
- return textResult({ written: written.map(recordToJson) });
2322
+ return textResult({ written: written.map(recordToJson), extracted: extractedCount });
1676
2323
  });
1677
2324
  server.registerTool("search", {
1678
2325
  title: "Search agent memory",
@@ -1787,7 +2434,7 @@ async function main() {
1787
2434
  env,
1788
2435
  ...override !== void 0 && override !== "" ? { workspaceRootOverride: override } : {}
1789
2436
  });
1790
- process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} (via ${via}) project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath} embed=${composed.embedderId}${composed.embeddingsEnabled ? "" : " (keyword-only)"}
2437
+ process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} (via ${via}) project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath} embed=${composed.embedderId}${composed.embeddingsEnabled ? "" : " (keyword-only)"} rerank=${composed.rerankerId} extract=${composed.extractorId}
1791
2438
  `);
1792
2439
  registerTools(server, composed);
1793
2440
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prom.codes/memory-mcp",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "prom.codes Memory — persistent, local-first agent memory as an MCP server.",
5
5
  "type": "module",
6
6
  "bin": {