@quaesitor-textus/mongo 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/LICENSE +180 -0
  2. package/README.md +417 -0
  3. package/dist/adapters/express.cjs +110 -0
  4. package/dist/adapters/express.d.cts +20 -0
  5. package/dist/adapters/express.d.ts +20 -0
  6. package/dist/adapters/express.js +7 -0
  7. package/dist/adapters/fastify.cjs +113 -0
  8. package/dist/adapters/fastify.d.cts +10 -0
  9. package/dist/adapters/fastify.d.ts +10 -0
  10. package/dist/adapters/fastify.js +16 -0
  11. package/dist/adapters/next-app.cjs +120 -0
  12. package/dist/adapters/next-app.d.cts +9 -0
  13. package/dist/adapters/next-app.d.ts +9 -0
  14. package/dist/adapters/next-app.js +23 -0
  15. package/dist/adapters/next-pages.cjs +110 -0
  16. package/dist/adapters/next-pages.d.cts +5 -0
  17. package/dist/adapters/next-pages.d.ts +5 -0
  18. package/dist/adapters/next-pages.js +7 -0
  19. package/dist/chunk-AUIK33V2.js +55 -0
  20. package/dist/chunk-RXTFVXXU.js +42 -0
  21. package/dist/index.cjs +288 -0
  22. package/dist/index.d.cts +51 -0
  23. package/dist/index.d.ts +51 -0
  24. package/dist/index.js +203 -0
  25. package/dist/startSearchSync-Bk7Na8Do.d.cts +39 -0
  26. package/dist/startSearchSync-Bk7Na8Do.d.ts +39 -0
  27. package/package.json +88 -0
  28. package/src/adapters/express.ts +8 -0
  29. package/src/adapters/fastify.ts +19 -0
  30. package/src/adapters/next-app.ts +27 -0
  31. package/src/adapters/next-pages.ts +11 -0
  32. package/src/adapters/shared.ts +61 -0
  33. package/src/buildTextSearchFilter.test.ts +30 -0
  34. package/src/buildTextSearchFilter.ts +34 -0
  35. package/src/computeSearchFields.test.ts +23 -0
  36. package/src/computeSearchFields.ts +31 -0
  37. package/src/config.ts +14 -0
  38. package/src/createLiveSearch.test.ts +48 -0
  39. package/src/createLiveSearch.ts +57 -0
  40. package/src/index.ts +12 -0
  41. package/src/modes.test.ts +20 -0
  42. package/src/modes.ts +24 -0
  43. package/src/parity.test.ts +60 -0
  44. package/src/searchIndexes.test.ts +12 -0
  45. package/src/searchIndexes.ts +22 -0
  46. package/src/sse.test.ts +11 -0
  47. package/src/sse.ts +7 -0
  48. package/src/startSearchSync.test.ts +42 -0
  49. package/src/startSearchSync.ts +91 -0
  50. package/src/version.test.ts +40 -0
  51. package/src/version.ts +41 -0
@@ -0,0 +1,11 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { formatSse, sseComment } from './sse'
3
+
4
+ describe('sse', () => {
5
+ it('formats a data event with trailing blank line', () => {
6
+ expect(formatSse({ type: 'match', item: { _id: 'x' } })).toBe('data: {"type":"match","item":{"_id":"x"}}\n\n')
7
+ })
8
+ it('formats a heartbeat comment', () => {
9
+ expect(sseComment()).toBe(': ping\n\n')
10
+ })
11
+ })
package/src/sse.ts ADDED
@@ -0,0 +1,7 @@
1
+ // Framework-agnostic SSE wire helpers.
2
+ export function formatSse(event: unknown): string {
3
+ return `data: ${JSON.stringify(event)}\n\n`
4
+ }
5
+ export function sseComment(text = 'ping'): string {
6
+ return `: ${text}\n\n`
7
+ }
@@ -0,0 +1,42 @@
1
+ import { describe, it, expect, beforeAll, afterAll } from 'vitest'
2
+ import { MongoClient } from 'mongodb'
3
+ import { startSearchSync } from './startSearchSync'
4
+ import type { MongoSearchConfig } from './config'
5
+
6
+ const URL = process.env.MONGO_URL ?? 'mongodb://localhost:27018/?directConnection=true'
7
+ const config: MongoSearchConfig = { targets: { name: { fields: ['name'] } } }
8
+ let client: MongoClient
9
+ let available = true
10
+
11
+ beforeAll(async () => {
12
+ try { client = await MongoClient.connect(URL, { serverSelectionTimeoutMS: 1500 }) }
13
+ catch { available = false }
14
+ })
15
+ afterAll(async () => { await client?.close() })
16
+
17
+ describe('startSearchSync backfill', () => {
18
+ it('derives a pre-existing doc written before the watcher starts', async () => {
19
+ if (!available) return
20
+ const col = client.db('qt_backfill_test').collection('docs')
21
+ await col.deleteMany({})
22
+ await col.insertOne({ _id: 'pre', name: 'Émile Zola' } as never) // raw, before watcher
23
+ const sync = startSearchSync(col, config, { backfill: true })
24
+ await new Promise(r => setTimeout(r, 1200))
25
+ const doc = await col.findOne({ _id: 'pre' as never }) as any
26
+ expect(doc?._qt?.name?.norm).toBe('emile zola')
27
+ await sync.stop()
28
+ })
29
+
30
+ it('re-derives a doc whose stored version is stale', async () => {
31
+ if (!available) return
32
+ const col = client.db('qt_backfill_test').collection('docs')
33
+ await col.deleteMany({})
34
+ // Doc that already has derived fields but stamped with an obsolete version.
35
+ await col.insertOne({ _id: 'stale', name: 'Wisława', _qt: { name: { norm: 'WRONG', ngrams: [] }, _v: 'old:0' } } as never)
36
+ const sync = startSearchSync(col, config, { backfill: true })
37
+ await new Promise(r => setTimeout(r, 1200))
38
+ const doc = await col.findOne({ _id: 'stale' as never }) as any
39
+ expect(doc?._qt?.name?.norm).toBe('wislawa') // re-derived with current folding
40
+ await sync.stop()
41
+ })
42
+ })
@@ -0,0 +1,91 @@
1
+ import type { ChangeStream, Collection } from 'mongodb'
2
+ import type { MongoSearchConfig } from './config'
3
+ import { DEFAULT_NAMESPACE } from './config'
4
+ import { computeSearchFields } from './computeSearchFields'
5
+ import { searchFieldsVersion } from './version'
6
+
7
+ export type SearchSyncEvent =
8
+ | { type: 'indexing-started' }
9
+ | { type: 'indexing-finished'; count: number; durationMs: number }
10
+ | { type: 'indexed'; id: unknown }
11
+ export type SearchSyncListener = (event: SearchSyncEvent) => void
12
+ export interface SearchSync {
13
+ on(listener: SearchSyncListener): void
14
+ off(listener: SearchSyncListener): void
15
+ stop(): Promise<void>
16
+ }
17
+ export interface StartSearchSyncOptions { idleMs?: number; backfill?: boolean }
18
+
19
+ // Tails the collection change stream, derives search fields, and notifies
20
+ // listeners. Requires a replica set. Emits indexing-started / indexing-finished
21
+ // (debounced burst, for logging) and a per-doc `indexed` event AFTER the derive
22
+ // write resolves (so filters on the derived fields will match). With
23
+ // `backfill: true`, derives any pre-existing documents missing the namespace on
24
+ // start (change streams are forward-only, so this catches docs written before
25
+ // the watcher ran or during downtime — e.g. an external Python writer).
26
+ export function startSearchSync(
27
+ collection: Collection,
28
+ config: MongoSearchConfig,
29
+ options: StartSearchSyncOptions = {},
30
+ ): SearchSync {
31
+ const ns = config.namespace ?? DEFAULT_NAMESPACE
32
+ const { idleMs = 750, backfill = false } = options
33
+ const stream: ChangeStream = collection.watch([], { fullDocument: 'updateLookup' })
34
+ const listeners = new Set<SearchSyncListener>()
35
+ const emit = (e: SearchSyncEvent) => { for (const l of listeners) l(e) }
36
+
37
+ let active = false
38
+ let count = 0
39
+ let startedAt = 0
40
+ let idleTimer: ReturnType<typeof setTimeout> | undefined
41
+
42
+ stream.on('change', (change: any) => {
43
+ if (!['insert', 'update', 'replace'].includes(change.operationType)) return
44
+ const doc = change.fullDocument
45
+ if (!doc) return
46
+ const derived = computeSearchFields(doc, config) as Record<string, unknown>
47
+ // Loop guard: our own echo writes already match -> skip (and don't count).
48
+ if (JSON.stringify(doc[ns]) === JSON.stringify(derived[ns])) return
49
+
50
+ if (!active) { active = true; count = 0; startedAt = Date.now(); emit({ type: 'indexing-started' }) }
51
+ count += 1
52
+ // Emit `indexed` only AFTER the derive write lands, so live match-tests see
53
+ // the derived fields.
54
+ void collection.updateOne({ _id: doc._id }, { $set: { [ns]: derived[ns] } })
55
+ .then(() => emit({ type: 'indexed', id: doc._id }))
56
+ .catch(() => { /* ignore individual write failures */ })
57
+
58
+ if (idleTimer) clearTimeout(idleTimer)
59
+ idleTimer = setTimeout(() => {
60
+ active = false
61
+ emit({ type: 'indexing-finished', count, durationMs: Date.now() - startedAt })
62
+ }, idleMs)
63
+ })
64
+
65
+ // Optional one-time backfill. The stream is already open, so writes arriving
66
+ // during the sweep are handled normally; the loop-guard dedups the overlap.
67
+ if (backfill) void runBackfill()
68
+ async function runBackfill() {
69
+ const startedAt = Date.now()
70
+ let n = 0
71
+ emit({ type: 'indexing-started' })
72
+ // Re-derive documents whose search fields are missing OR were derived under a
73
+ // different version (library upgrade or config change).
74
+ const version = searchFieldsVersion(config)
75
+ const cursor = collection.find({
76
+ $or: [{ [ns]: { $exists: false } }, { [`${ns}._v`]: { $ne: version } }],
77
+ })
78
+ for await (const doc of cursor) {
79
+ const derived = computeSearchFields(doc, config) as Record<string, unknown>
80
+ await collection.updateOne({ _id: doc._id }, { $set: { [ns]: derived[ns] } }).catch(() => {})
81
+ n += 1
82
+ }
83
+ emit({ type: 'indexing-finished', count: n, durationMs: Date.now() - startedAt })
84
+ }
85
+
86
+ return {
87
+ on: (l) => { listeners.add(l) },
88
+ off: (l) => { listeners.delete(l) },
89
+ stop: async () => { if (idleTimer) clearTimeout(idleTimer); listeners.clear(); await stream.close() },
90
+ }
91
+ }
@@ -0,0 +1,40 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { SEARCH_FIELDS_VERSION, searchFieldsVersion } from './version'
3
+ import { computeSearchFields } from './computeSearchFields'
4
+ import type { MongoSearchConfig } from './config'
5
+
6
+ const cfg = (extra?: Partial<MongoSearchConfig>): MongoSearchConfig => ({
7
+ targets: { author: { fields: ['author'] } },
8
+ ...extra,
9
+ })
10
+
11
+ describe('searchFieldsVersion', () => {
12
+ it('starts with the code version prefix', () => {
13
+ expect(searchFieldsVersion(cfg()).startsWith(`${SEARCH_FIELDS_VERSION}:`)).toBe(true)
14
+ })
15
+
16
+ it('is stable for the same config', () => {
17
+ expect(searchFieldsVersion(cfg())).toBe(searchFieldsVersion(cfg()))
18
+ })
19
+
20
+ it('changes when the config changes', () => {
21
+ const a = searchFieldsVersion(cfg())
22
+ const b = searchFieldsVersion(cfg({ ngramSizes: [2, 3, 4] }))
23
+ const c = searchFieldsVersion({ targets: { author: { fields: ['author', 'title'] } } })
24
+ expect(a).not.toBe(b)
25
+ expect(a).not.toBe(c)
26
+ })
27
+
28
+ it('is order-independent over target keys', () => {
29
+ const x: MongoSearchConfig = { targets: { author: { fields: ['author'] }, title: { fields: ['title'] } } }
30
+ const y: MongoSearchConfig = { targets: { title: { fields: ['title'] }, author: { fields: ['author'] } } }
31
+ expect(searchFieldsVersion(x)).toBe(searchFieldsVersion(y))
32
+ })
33
+ })
34
+
35
+ describe('computeSearchFields version stamp', () => {
36
+ it('stamps _v on the derived namespace block', () => {
37
+ const out = computeSearchFields({ author: 'x' }, cfg()) as any
38
+ expect(out._qt._v).toBe(searchFieldsVersion(cfg()))
39
+ })
40
+ })
package/src/version.ts ADDED
@@ -0,0 +1,41 @@
1
+ import type { MongoSearchConfig } from './config'
2
+ import { DEFAULT_NAMESPACE, DEFAULT_NGRAM_SIZES } from './config'
3
+
4
+ // Bump whenever the DERIVED OUTPUT changes for the same input — i.e. any change
5
+ // to normalizeText, toNgrams, buildCorpus, or computeSearchFields's shape.
6
+ // History: 1 = initial; 2 = precomposed-letter folding (ł, ø, ß→ss, …).
7
+ export const SEARCH_FIELDS_VERSION = 2
8
+
9
+ // Order-independent JSON so two equal configs hash the same regardless of key order.
10
+ function stableStringify(v: unknown): string {
11
+ if (v === null || typeof v !== 'object') return JSON.stringify(v) ?? 'null'
12
+ if (Array.isArray(v)) return '[' + v.map(stableStringify).join(',') + ']'
13
+ const obj = v as Record<string, unknown>
14
+ return '{' + Object.keys(obj).sort().map((k) => JSON.stringify(k) + ':' + stableStringify(obj[k])).join(',') + '}'
15
+ }
16
+
17
+ // Compact, dependency-free non-cryptographic hash (cyrb53).
18
+ function cyrb53(str: string): string {
19
+ let h1 = 0xdeadbeef
20
+ let h2 = 0x41c6ce57
21
+ for (let i = 0; i < str.length; i++) {
22
+ const ch = str.charCodeAt(i)
23
+ h1 = Math.imul(h1 ^ ch, 2654435761)
24
+ h2 = Math.imul(h2 ^ ch, 1597334677)
25
+ }
26
+ h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909)
27
+ h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909)
28
+ return (4294967296 * (2097151 & h2) + (h1 >>> 0)).toString(36)
29
+ }
30
+
31
+ // Effective version stamped on each document's derived block: code version plus
32
+ // a fingerprint of the derivation-affecting config (namespace, n-gram sizes,
33
+ // targets). A library upgrade (code version) OR a config change re-derives.
34
+ export function searchFieldsVersion(config: MongoSearchConfig): string {
35
+ const sig = stableStringify({
36
+ ns: config.namespace ?? DEFAULT_NAMESPACE,
37
+ sizes: config.ngramSizes ?? DEFAULT_NGRAM_SIZES,
38
+ targets: config.targets,
39
+ })
40
+ return `${SEARCH_FIELDS_VERSION}:${cyrb53(sig)}`
41
+ }