@quaesitor-textus/mongo 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +180 -0
- package/README.md +417 -0
- package/dist/adapters/express.cjs +110 -0
- package/dist/adapters/express.d.cts +20 -0
- package/dist/adapters/express.d.ts +20 -0
- package/dist/adapters/express.js +7 -0
- package/dist/adapters/fastify.cjs +113 -0
- package/dist/adapters/fastify.d.cts +10 -0
- package/dist/adapters/fastify.d.ts +10 -0
- package/dist/adapters/fastify.js +16 -0
- package/dist/adapters/next-app.cjs +120 -0
- package/dist/adapters/next-app.d.cts +9 -0
- package/dist/adapters/next-app.d.ts +9 -0
- package/dist/adapters/next-app.js +23 -0
- package/dist/adapters/next-pages.cjs +110 -0
- package/dist/adapters/next-pages.d.cts +5 -0
- package/dist/adapters/next-pages.d.ts +5 -0
- package/dist/adapters/next-pages.js +7 -0
- package/dist/chunk-AUIK33V2.js +55 -0
- package/dist/chunk-RXTFVXXU.js +42 -0
- package/dist/index.cjs +288 -0
- package/dist/index.d.cts +51 -0
- package/dist/index.d.ts +51 -0
- package/dist/index.js +203 -0
- package/dist/startSearchSync-Bk7Na8Do.d.cts +39 -0
- package/dist/startSearchSync-Bk7Na8Do.d.ts +39 -0
- package/package.json +88 -0
- package/src/adapters/express.ts +8 -0
- package/src/adapters/fastify.ts +19 -0
- package/src/adapters/next-app.ts +27 -0
- package/src/adapters/next-pages.ts +11 -0
- package/src/adapters/shared.ts +61 -0
- package/src/buildTextSearchFilter.test.ts +30 -0
- package/src/buildTextSearchFilter.ts +34 -0
- package/src/computeSearchFields.test.ts +23 -0
- package/src/computeSearchFields.ts +31 -0
- package/src/config.ts +14 -0
- package/src/createLiveSearch.test.ts +48 -0
- package/src/createLiveSearch.ts +57 -0
- package/src/index.ts +12 -0
- package/src/modes.test.ts +20 -0
- package/src/modes.ts +24 -0
- package/src/parity.test.ts +60 -0
- package/src/searchIndexes.test.ts +12 -0
- package/src/searchIndexes.ts +22 -0
- package/src/sse.test.ts +11 -0
- package/src/sse.ts +7 -0
- package/src/startSearchSync.test.ts +42 -0
- package/src/startSearchSync.ts +91 -0
- package/src/version.test.ts +40 -0
- package/src/version.ts +41 -0
package/src/sse.test.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { formatSse, sseComment } from './sse'
|
|
3
|
+
|
|
4
|
+
describe('sse', () => {
|
|
5
|
+
it('formats a data event with trailing blank line', () => {
|
|
6
|
+
expect(formatSse({ type: 'match', item: { _id: 'x' } })).toBe('data: {"type":"match","item":{"_id":"x"}}\n\n')
|
|
7
|
+
})
|
|
8
|
+
it('formats a heartbeat comment', () => {
|
|
9
|
+
expect(sseComment()).toBe(': ping\n\n')
|
|
10
|
+
})
|
|
11
|
+
})
|
package/src/sse.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { describe, it, expect, beforeAll, afterAll } from 'vitest'
|
|
2
|
+
import { MongoClient } from 'mongodb'
|
|
3
|
+
import { startSearchSync } from './startSearchSync'
|
|
4
|
+
import type { MongoSearchConfig } from './config'
|
|
5
|
+
|
|
6
|
+
const URL = process.env.MONGO_URL ?? 'mongodb://localhost:27018/?directConnection=true'
|
|
7
|
+
const config: MongoSearchConfig = { targets: { name: { fields: ['name'] } } }
|
|
8
|
+
let client: MongoClient
|
|
9
|
+
let available = true
|
|
10
|
+
|
|
11
|
+
beforeAll(async () => {
|
|
12
|
+
try { client = await MongoClient.connect(URL, { serverSelectionTimeoutMS: 1500 }) }
|
|
13
|
+
catch { available = false }
|
|
14
|
+
})
|
|
15
|
+
afterAll(async () => { await client?.close() })
|
|
16
|
+
|
|
17
|
+
describe('startSearchSync backfill', () => {
|
|
18
|
+
it('derives a pre-existing doc written before the watcher starts', async () => {
|
|
19
|
+
if (!available) return
|
|
20
|
+
const col = client.db('qt_backfill_test').collection('docs')
|
|
21
|
+
await col.deleteMany({})
|
|
22
|
+
await col.insertOne({ _id: 'pre', name: 'Émile Zola' } as never) // raw, before watcher
|
|
23
|
+
const sync = startSearchSync(col, config, { backfill: true })
|
|
24
|
+
await new Promise(r => setTimeout(r, 1200))
|
|
25
|
+
const doc = await col.findOne({ _id: 'pre' as never }) as any
|
|
26
|
+
expect(doc?._qt?.name?.norm).toBe('emile zola')
|
|
27
|
+
await sync.stop()
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('re-derives a doc whose stored version is stale', async () => {
|
|
31
|
+
if (!available) return
|
|
32
|
+
const col = client.db('qt_backfill_test').collection('docs')
|
|
33
|
+
await col.deleteMany({})
|
|
34
|
+
// Doc that already has derived fields but stamped with an obsolete version.
|
|
35
|
+
await col.insertOne({ _id: 'stale', name: 'Wisława', _qt: { name: { norm: 'WRONG', ngrams: [] }, _v: 'old:0' } } as never)
|
|
36
|
+
const sync = startSearchSync(col, config, { backfill: true })
|
|
37
|
+
await new Promise(r => setTimeout(r, 1200))
|
|
38
|
+
const doc = await col.findOne({ _id: 'stale' as never }) as any
|
|
39
|
+
expect(doc?._qt?.name?.norm).toBe('wislawa') // re-derived with current folding
|
|
40
|
+
await sync.stop()
|
|
41
|
+
})
|
|
42
|
+
})
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import type { ChangeStream, Collection } from 'mongodb'
|
|
2
|
+
import type { MongoSearchConfig } from './config'
|
|
3
|
+
import { DEFAULT_NAMESPACE } from './config'
|
|
4
|
+
import { computeSearchFields } from './computeSearchFields'
|
|
5
|
+
import { searchFieldsVersion } from './version'
|
|
6
|
+
|
|
7
|
+
export type SearchSyncEvent =
|
|
8
|
+
| { type: 'indexing-started' }
|
|
9
|
+
| { type: 'indexing-finished'; count: number; durationMs: number }
|
|
10
|
+
| { type: 'indexed'; id: unknown }
|
|
11
|
+
export type SearchSyncListener = (event: SearchSyncEvent) => void
|
|
12
|
+
export interface SearchSync {
|
|
13
|
+
on(listener: SearchSyncListener): void
|
|
14
|
+
off(listener: SearchSyncListener): void
|
|
15
|
+
stop(): Promise<void>
|
|
16
|
+
}
|
|
17
|
+
export interface StartSearchSyncOptions { idleMs?: number; backfill?: boolean }
|
|
18
|
+
|
|
19
|
+
// Tails the collection change stream, derives search fields, and notifies
|
|
20
|
+
// listeners. Requires a replica set. Emits indexing-started / indexing-finished
|
|
21
|
+
// (debounced burst, for logging) and a per-doc `indexed` event AFTER the derive
|
|
22
|
+
// write resolves (so filters on the derived fields will match). With
|
|
23
|
+
// `backfill: true`, derives any pre-existing documents missing the namespace on
|
|
24
|
+
// start (change streams are forward-only, so this catches docs written before
|
|
25
|
+
// the watcher ran or during downtime — e.g. an external Python writer).
|
|
26
|
+
export function startSearchSync(
|
|
27
|
+
collection: Collection,
|
|
28
|
+
config: MongoSearchConfig,
|
|
29
|
+
options: StartSearchSyncOptions = {},
|
|
30
|
+
): SearchSync {
|
|
31
|
+
const ns = config.namespace ?? DEFAULT_NAMESPACE
|
|
32
|
+
const { idleMs = 750, backfill = false } = options
|
|
33
|
+
const stream: ChangeStream = collection.watch([], { fullDocument: 'updateLookup' })
|
|
34
|
+
const listeners = new Set<SearchSyncListener>()
|
|
35
|
+
const emit = (e: SearchSyncEvent) => { for (const l of listeners) l(e) }
|
|
36
|
+
|
|
37
|
+
let active = false
|
|
38
|
+
let count = 0
|
|
39
|
+
let startedAt = 0
|
|
40
|
+
let idleTimer: ReturnType<typeof setTimeout> | undefined
|
|
41
|
+
|
|
42
|
+
stream.on('change', (change: any) => {
|
|
43
|
+
if (!['insert', 'update', 'replace'].includes(change.operationType)) return
|
|
44
|
+
const doc = change.fullDocument
|
|
45
|
+
if (!doc) return
|
|
46
|
+
const derived = computeSearchFields(doc, config) as Record<string, unknown>
|
|
47
|
+
// Loop guard: our own echo writes already match -> skip (and don't count).
|
|
48
|
+
if (JSON.stringify(doc[ns]) === JSON.stringify(derived[ns])) return
|
|
49
|
+
|
|
50
|
+
if (!active) { active = true; count = 0; startedAt = Date.now(); emit({ type: 'indexing-started' }) }
|
|
51
|
+
count += 1
|
|
52
|
+
// Emit `indexed` only AFTER the derive write lands, so live match-tests see
|
|
53
|
+
// the derived fields.
|
|
54
|
+
void collection.updateOne({ _id: doc._id }, { $set: { [ns]: derived[ns] } })
|
|
55
|
+
.then(() => emit({ type: 'indexed', id: doc._id }))
|
|
56
|
+
.catch(() => { /* ignore individual write failures */ })
|
|
57
|
+
|
|
58
|
+
if (idleTimer) clearTimeout(idleTimer)
|
|
59
|
+
idleTimer = setTimeout(() => {
|
|
60
|
+
active = false
|
|
61
|
+
emit({ type: 'indexing-finished', count, durationMs: Date.now() - startedAt })
|
|
62
|
+
}, idleMs)
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
// Optional one-time backfill. The stream is already open, so writes arriving
|
|
66
|
+
// during the sweep are handled normally; the loop-guard dedups the overlap.
|
|
67
|
+
if (backfill) void runBackfill()
|
|
68
|
+
async function runBackfill() {
|
|
69
|
+
const startedAt = Date.now()
|
|
70
|
+
let n = 0
|
|
71
|
+
emit({ type: 'indexing-started' })
|
|
72
|
+
// Re-derive documents whose search fields are missing OR were derived under a
|
|
73
|
+
// different version (library upgrade or config change).
|
|
74
|
+
const version = searchFieldsVersion(config)
|
|
75
|
+
const cursor = collection.find({
|
|
76
|
+
$or: [{ [ns]: { $exists: false } }, { [`${ns}._v`]: { $ne: version } }],
|
|
77
|
+
})
|
|
78
|
+
for await (const doc of cursor) {
|
|
79
|
+
const derived = computeSearchFields(doc, config) as Record<string, unknown>
|
|
80
|
+
await collection.updateOne({ _id: doc._id }, { $set: { [ns]: derived[ns] } }).catch(() => {})
|
|
81
|
+
n += 1
|
|
82
|
+
}
|
|
83
|
+
emit({ type: 'indexing-finished', count: n, durationMs: Date.now() - startedAt })
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
on: (l) => { listeners.add(l) },
|
|
88
|
+
off: (l) => { listeners.delete(l) },
|
|
89
|
+
stop: async () => { if (idleTimer) clearTimeout(idleTimer); listeners.clear(); await stream.close() },
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { SEARCH_FIELDS_VERSION, searchFieldsVersion } from './version'
|
|
3
|
+
import { computeSearchFields } from './computeSearchFields'
|
|
4
|
+
import type { MongoSearchConfig } from './config'
|
|
5
|
+
|
|
6
|
+
const cfg = (extra?: Partial<MongoSearchConfig>): MongoSearchConfig => ({
|
|
7
|
+
targets: { author: { fields: ['author'] } },
|
|
8
|
+
...extra,
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
describe('searchFieldsVersion', () => {
|
|
12
|
+
it('starts with the code version prefix', () => {
|
|
13
|
+
expect(searchFieldsVersion(cfg()).startsWith(`${SEARCH_FIELDS_VERSION}:`)).toBe(true)
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
it('is stable for the same config', () => {
|
|
17
|
+
expect(searchFieldsVersion(cfg())).toBe(searchFieldsVersion(cfg()))
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
it('changes when the config changes', () => {
|
|
21
|
+
const a = searchFieldsVersion(cfg())
|
|
22
|
+
const b = searchFieldsVersion(cfg({ ngramSizes: [2, 3, 4] }))
|
|
23
|
+
const c = searchFieldsVersion({ targets: { author: { fields: ['author', 'title'] } } })
|
|
24
|
+
expect(a).not.toBe(b)
|
|
25
|
+
expect(a).not.toBe(c)
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
it('is order-independent over target keys', () => {
|
|
29
|
+
const x: MongoSearchConfig = { targets: { author: { fields: ['author'] }, title: { fields: ['title'] } } }
|
|
30
|
+
const y: MongoSearchConfig = { targets: { title: { fields: ['title'] }, author: { fields: ['author'] } } }
|
|
31
|
+
expect(searchFieldsVersion(x)).toBe(searchFieldsVersion(y))
|
|
32
|
+
})
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
describe('computeSearchFields version stamp', () => {
|
|
36
|
+
it('stamps _v on the derived namespace block', () => {
|
|
37
|
+
const out = computeSearchFields({ author: 'x' }, cfg()) as any
|
|
38
|
+
expect(out._qt._v).toBe(searchFieldsVersion(cfg()))
|
|
39
|
+
})
|
|
40
|
+
})
|
package/src/version.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { MongoSearchConfig } from './config'
|
|
2
|
+
import { DEFAULT_NAMESPACE, DEFAULT_NGRAM_SIZES } from './config'
|
|
3
|
+
|
|
4
|
+
// Bump whenever the DERIVED OUTPUT changes for the same input — i.e. any change
|
|
5
|
+
// to normalizeText, toNgrams, buildCorpus, or computeSearchFields's shape.
|
|
6
|
+
// History: 1 = initial; 2 = precomposed-letter folding (ł, ø, ß→ss, …).
|
|
7
|
+
export const SEARCH_FIELDS_VERSION = 2
|
|
8
|
+
|
|
9
|
+
// Order-independent JSON so two equal configs hash the same regardless of key order.
|
|
10
|
+
function stableStringify(v: unknown): string {
|
|
11
|
+
if (v === null || typeof v !== 'object') return JSON.stringify(v) ?? 'null'
|
|
12
|
+
if (Array.isArray(v)) return '[' + v.map(stableStringify).join(',') + ']'
|
|
13
|
+
const obj = v as Record<string, unknown>
|
|
14
|
+
return '{' + Object.keys(obj).sort().map((k) => JSON.stringify(k) + ':' + stableStringify(obj[k])).join(',') + '}'
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Compact, dependency-free non-cryptographic hash (cyrb53).
|
|
18
|
+
function cyrb53(str: string): string {
|
|
19
|
+
let h1 = 0xdeadbeef
|
|
20
|
+
let h2 = 0x41c6ce57
|
|
21
|
+
for (let i = 0; i < str.length; i++) {
|
|
22
|
+
const ch = str.charCodeAt(i)
|
|
23
|
+
h1 = Math.imul(h1 ^ ch, 2654435761)
|
|
24
|
+
h2 = Math.imul(h2 ^ ch, 1597334677)
|
|
25
|
+
}
|
|
26
|
+
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909)
|
|
27
|
+
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909)
|
|
28
|
+
return (4294967296 * (2097151 & h2) + (h1 >>> 0)).toString(36)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Effective version stamped on each document's derived block: code version plus
|
|
32
|
+
// a fingerprint of the derivation-affecting config (namespace, n-gram sizes,
|
|
33
|
+
// targets). A library upgrade (code version) OR a config change re-derives.
|
|
34
|
+
export function searchFieldsVersion(config: MongoSearchConfig): string {
|
|
35
|
+
const sig = stableStringify({
|
|
36
|
+
ns: config.namespace ?? DEFAULT_NAMESPACE,
|
|
37
|
+
sizes: config.ngramSizes ?? DEFAULT_NGRAM_SIZES,
|
|
38
|
+
targets: config.targets,
|
|
39
|
+
})
|
|
40
|
+
return `${SEARCH_FIELDS_VERSION}:${cyrb53(sig)}`
|
|
41
|
+
}
|