@stablemodels/qmd-cf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/dist/chunker.d.ts +11 -0
  3. package/dist/chunker.d.ts.map +1 -0
  4. package/dist/chunker.js +199 -0
  5. package/dist/chunker.js.map +1 -0
  6. package/dist/fts.d.ts +19 -0
  7. package/dist/fts.d.ts.map +1 -0
  8. package/dist/fts.js +109 -0
  9. package/dist/fts.js.map +1 -0
  10. package/dist/hash.d.ts +7 -0
  11. package/dist/hash.d.ts.map +1 -0
  12. package/dist/hash.js +14 -0
  13. package/dist/hash.js.map +1 -0
  14. package/dist/index.d.ts +56 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +57 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/qmd.d.ts +158 -0
  19. package/dist/qmd.d.ts.map +1 -0
  20. package/dist/qmd.js +462 -0
  21. package/dist/qmd.js.map +1 -0
  22. package/dist/rrf.d.ts +22 -0
  23. package/dist/rrf.d.ts.map +1 -0
  24. package/dist/rrf.js +92 -0
  25. package/dist/rrf.js.map +1 -0
  26. package/dist/schema.d.ts +14 -0
  27. package/dist/schema.d.ts.map +1 -0
  28. package/dist/schema.js +128 -0
  29. package/dist/schema.js.map +1 -0
  30. package/dist/testing.d.ts +77 -0
  31. package/dist/testing.d.ts.map +1 -0
  32. package/dist/testing.js +242 -0
  33. package/dist/testing.js.map +1 -0
  34. package/dist/types.d.ts +118 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/dist/types.js +9 -0
  37. package/dist/types.js.map +1 -0
  38. package/dist/vector.d.ts +38 -0
  39. package/dist/vector.d.ts.map +1 -0
  40. package/dist/vector.js +174 -0
  41. package/dist/vector.js.map +1 -0
  42. package/package.json +49 -0
  43. package/src/bun-sqlite.d.ts +17 -0
  44. package/src/chunker.ts +250 -0
  45. package/src/fts.ts +140 -0
  46. package/src/hash.ts +13 -0
  47. package/src/index.ts +72 -0
  48. package/src/qmd.ts +706 -0
  49. package/src/rrf.ts +115 -0
  50. package/src/schema.ts +147 -0
  51. package/src/testing.ts +303 -0
  52. package/src/types.ts +124 -0
  53. package/src/vector.ts +236 -0
package/src/fts.ts ADDED
@@ -0,0 +1,140 @@
1
+ import type { FtsResult, SearchOptions } from "./types.js";
2
+
3
+ /**
4
+ * Build an FTS5 query string from a natural language query.
5
+ *
6
+ * Strategy (from qmd):
7
+ * - Single term: prefix match ("term"*)
8
+ * - Multi-term: exact phrase OR NEAR(terms, 10) OR individual terms ORed
9
+ *
10
+ * This gives good recall while still ranking exact matches highest via BM25.
11
+ */
12
+ export function buildFts5Query(query: string): string {
13
+ const terms = query
14
+ .trim()
15
+ .split(/\s+/)
16
+ .filter((t) => t.length > 0)
17
+ // Strip characters that break FTS5 syntax
18
+ .map((t) => t.replace(/['"(){}[\]*^~:]/g, ""));
19
+
20
+ if (terms.length === 0) return '""';
21
+
22
+ if (terms.length === 1) {
23
+ // Single term: prefix match
24
+ return `"${terms[0]}" *`;
25
+ }
26
+
27
+ // Multi-term: combine strategies for best recall
28
+ const phrase = `"${terms.join(" ")}"`;
29
+ const near = `NEAR(${terms.map((t) => `"${t}"`).join(" ")}, 10)`;
30
+ const orTerms = terms.map((t) => `"${t}"`).join(" OR ");
31
+
32
+ return `(${phrase}) OR (${near}) OR (${orTerms})`;
33
+ }
34
+
35
+ /**
36
+ * Normalize a raw BM25 score to (0, 1].
37
+ * SQLite FTS5 bm25() returns negative values where lower (more negative) = better match.
38
+ * We convert to: score = 1 / (1 + abs(raw))
39
+ */
40
+ function normalizeBm25(raw: number): number {
41
+ return Math.abs(raw) / (1 + Math.abs(raw));
42
+ }
43
+
44
+ type FtsRow = {
45
+ doc_id: string;
46
+ seq: number;
47
+ content: string;
48
+ rank: number;
49
+ title: string | null;
50
+ doc_type: string | null;
51
+ namespace: string | null;
52
+ metadata: string | null;
53
+ };
54
+
55
+ /**
56
+ * Execute a full-text search using FTS5 BM25 ranking.
57
+ *
58
+ * BM25 weights: title gets 10x boost, content gets 1x.
59
+ * Results are deduplicated by document (keeping the best-scoring chunk).
60
+ */
61
+ export function searchFts(
62
+ sql: SqlStorage,
63
+ query: string,
64
+ options: SearchOptions = {},
65
+ ): FtsResult[] {
66
+ const ftsQuery = buildFts5Query(query);
67
+ const limit = options.limit ?? 10;
68
+
69
+ // Build WHERE clauses for optional filters
70
+ const filters: string[] = [];
71
+ const bindings: unknown[] = [ftsQuery];
72
+
73
+ if (options.docType) {
74
+ filters.push("d.doc_type = ?");
75
+ bindings.push(options.docType);
76
+ }
77
+ if (options.namespace) {
78
+ if (options.namespace.includes("*")) {
79
+ const prefix = options.namespace.replace(/\*+$/, "").replace(/\/+$/, "");
80
+ filters.push("d.namespace LIKE ?");
81
+ bindings.push(`${prefix}/%`);
82
+ } else {
83
+ filters.push("d.namespace = ?");
84
+ bindings.push(options.namespace);
85
+ }
86
+ }
87
+
88
+ const whereClause = filters.length > 0 ? `AND ${filters.join(" AND ")}` : "";
89
+
90
+ // BM25 weights: doc_id (unindexed, 0), seq (unindexed, 0), title (10.0), content (1.0)
91
+ const rows = sql
92
+ .exec<FtsRow>(
93
+ `
94
+ SELECT
95
+ f.doc_id,
96
+ f.seq,
97
+ f.content,
98
+ bm25(qmd_chunks_fts, 0, 0, 10.0, 1.0) as rank,
99
+ d.title,
100
+ d.doc_type,
101
+ d.namespace,
102
+ d.metadata
103
+ FROM qmd_chunks_fts f
104
+ JOIN qmd_documents d ON d.id = f.doc_id
105
+ WHERE qmd_chunks_fts MATCH ?
106
+ ${whereClause}
107
+ ORDER BY rank
108
+ LIMIT ?
109
+ `,
110
+ ...bindings,
111
+ // Fetch extra to allow dedup
112
+ limit * 3,
113
+ )
114
+ .toArray();
115
+
116
+ // Deduplicate by doc_id, keeping the best-scoring chunk
117
+ const seen = new Map<string, FtsResult>();
118
+
119
+ for (const row of rows) {
120
+ const score = normalizeBm25(row.rank);
121
+ const existing = seen.get(row.doc_id);
122
+
123
+ if (!existing || score > existing.score) {
124
+ seen.set(row.doc_id, {
125
+ docId: row.doc_id,
126
+ score,
127
+ snippet: row.content,
128
+ seq: row.seq as number,
129
+ title: row.title,
130
+ docType: row.doc_type,
131
+ namespace: row.namespace,
132
+ metadata: row.metadata ? JSON.parse(row.metadata as string) : null,
133
+ });
134
+ }
135
+ }
136
+
137
+ return Array.from(seen.values())
138
+ .sort((a, b) => b.score - a.score)
139
+ .slice(0, limit);
140
+ }
package/src/hash.ts ADDED
@@ -0,0 +1,13 @@
1
+ /**
2
+ * FNV-1a 32-bit hash — fast, deterministic, non-cryptographic.
3
+ * Used to detect content changes for skip-on-unchanged indexing.
4
+ * Returns an 8-character lowercase hex string.
5
+ */
6
+ export function fnv1a32(input: string): string {
7
+ let hash = 0x811c9dc5; // FNV offset basis
8
+ for (let i = 0; i < input.length; i++) {
9
+ hash ^= input.charCodeAt(i);
10
+ hash = Math.imul(hash, 0x01000193); // FNV prime
11
+ }
12
+ return (hash >>> 0).toString(16).padStart(8, "0");
13
+ }
package/src/index.ts ADDED
@@ -0,0 +1,72 @@
1
+ /**
2
+ * @stablemodels/qmd-cf — Hybrid full-text + vector search for Cloudflare Durable Objects.
3
+ *
4
+ * A DO-native reimagination of qmd (https://github.com/tobi/qmd).
5
+ *
6
+ * FTS5 runs co-located in the Durable Object's SQLite for zero-latency BM25 keyword search.
7
+ * Optionally, Cloudflare Vectorize adds semantic vector search, fused via Reciprocal Rank Fusion.
8
+ *
9
+ * Cloudflare platform types (SqlStorage, Vectorize, VectorizeVector, etc.) are
10
+ * ambient from @cloudflare/workers-types — consumers access them directly.
11
+ *
12
+ * @example FTS-only (zero external dependencies)
13
+ * ```ts
14
+ * import { Qmd } from "@stablemodels/qmd-cf";
15
+ *
16
+ * export class MyDurableObject extends DurableObject {
17
+ * qmd: Qmd;
18
+ *
19
+ * constructor(ctx: DurableObjectState, env: Env) {
20
+ * super(ctx, env);
21
+ * this.qmd = new Qmd(ctx.storage.sql);
22
+ * }
23
+ *
24
+ * async search(query: string) {
25
+ * return this.qmd.search(query);
26
+ * }
27
+ * }
28
+ * ```
29
+ *
30
+ * @example Hybrid FTS + Vector search
31
+ * ```ts
32
+ * import { Qmd } from "@stablemodels/qmd-cf";
33
+ *
34
+ * export class MyDurableObject extends DurableObject {
35
+ * qmd: Qmd;
36
+ *
37
+ * constructor(ctx: DurableObjectState, env: Env) {
38
+ * super(ctx, env);
39
+ * this.qmd = new Qmd(ctx.storage.sql, {
40
+ * vectorize: env.VECTORIZE,
41
+ * embedFn: (texts) =>
42
+ * env.AI.run("@cf/baai/bge-m3", { text: texts })
43
+ * .then(r => r.data),
44
+ * });
45
+ * }
46
+ * }
47
+ * ```
48
+ */
49
+
50
+ // Main class
51
+ export { Qmd } from "./qmd.js";
52
+
53
+ // Domain types
54
+ export type {
55
+ Document,
56
+ Chunk,
57
+ FtsResult,
58
+ VectorResult,
59
+ SearchResult,
60
+ SearchOptions,
61
+ HybridSearchOptions,
62
+ QmdConfig,
63
+ IndexStats,
64
+ EmbedFn,
65
+ } from "./types.js";
66
+
67
+ // Utilities (useful for custom pipelines)
68
+ export { chunkText } from "./chunker.js";
69
+ export { buildFts5Query } from "./fts.js";
70
+ export { fnv1a32 } from "./hash.js";
71
+ export { reciprocalRankFusion } from "./rrf.js";
72
+ export { formatDocForEmbedding, formatQueryForEmbedding } from "./vector.js";