@quaesitor-textus/mongo 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +180 -0
- package/README.md +417 -0
- package/dist/adapters/express.cjs +110 -0
- package/dist/adapters/express.d.cts +20 -0
- package/dist/adapters/express.d.ts +20 -0
- package/dist/adapters/express.js +7 -0
- package/dist/adapters/fastify.cjs +113 -0
- package/dist/adapters/fastify.d.cts +10 -0
- package/dist/adapters/fastify.d.ts +10 -0
- package/dist/adapters/fastify.js +16 -0
- package/dist/adapters/next-app.cjs +120 -0
- package/dist/adapters/next-app.d.cts +9 -0
- package/dist/adapters/next-app.d.ts +9 -0
- package/dist/adapters/next-app.js +23 -0
- package/dist/adapters/next-pages.cjs +110 -0
- package/dist/adapters/next-pages.d.cts +5 -0
- package/dist/adapters/next-pages.d.ts +5 -0
- package/dist/adapters/next-pages.js +7 -0
- package/dist/chunk-AUIK33V2.js +55 -0
- package/dist/chunk-RXTFVXXU.js +42 -0
- package/dist/index.cjs +288 -0
- package/dist/index.d.cts +51 -0
- package/dist/index.d.ts +51 -0
- package/dist/index.js +203 -0
- package/dist/startSearchSync-Bk7Na8Do.d.cts +39 -0
- package/dist/startSearchSync-Bk7Na8Do.d.ts +39 -0
- package/package.json +88 -0
- package/src/adapters/express.ts +8 -0
- package/src/adapters/fastify.ts +19 -0
- package/src/adapters/next-app.ts +27 -0
- package/src/adapters/next-pages.ts +11 -0
- package/src/adapters/shared.ts +61 -0
- package/src/buildTextSearchFilter.test.ts +30 -0
- package/src/buildTextSearchFilter.ts +34 -0
- package/src/computeSearchFields.test.ts +23 -0
- package/src/computeSearchFields.ts +31 -0
- package/src/config.ts +14 -0
- package/src/createLiveSearch.test.ts +48 -0
- package/src/createLiveSearch.ts +57 -0
- package/src/index.ts +12 -0
- package/src/modes.test.ts +20 -0
- package/src/modes.ts +24 -0
- package/src/parity.test.ts +60 -0
- package/src/searchIndexes.test.ts +12 -0
- package/src/searchIndexes.ts +22 -0
- package/src/sse.test.ts +11 -0
- package/src/sse.ts +7 -0
- package/src/startSearchSync.test.ts +42 -0
- package/src/startSearchSync.ts +91 -0
- package/src/version.test.ts +40 -0
- package/src/version.ts +41 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { Collection } from 'mongodb';
|
|
2
|
+
import { SearchOptions } from '@quaesitor-textus/core';
|
|
3
|
+
|
|
4
|
+
interface MongoSearchTarget {
|
|
5
|
+
fields: string[];
|
|
6
|
+
options?: SearchOptions;
|
|
7
|
+
queryModes?: SearchOptions[];
|
|
8
|
+
}
|
|
9
|
+
interface MongoSearchConfig {
|
|
10
|
+
namespace?: string;
|
|
11
|
+
ngramSizes?: number[];
|
|
12
|
+
targets: Record<string, MongoSearchTarget>;
|
|
13
|
+
}
|
|
14
|
+
declare const DEFAULT_NAMESPACE = "_qt";
|
|
15
|
+
declare const DEFAULT_NGRAM_SIZES: number[];
|
|
16
|
+
|
|
17
|
+
type SearchSyncEvent = {
|
|
18
|
+
type: 'indexing-started';
|
|
19
|
+
} | {
|
|
20
|
+
type: 'indexing-finished';
|
|
21
|
+
count: number;
|
|
22
|
+
durationMs: number;
|
|
23
|
+
} | {
|
|
24
|
+
type: 'indexed';
|
|
25
|
+
id: unknown;
|
|
26
|
+
};
|
|
27
|
+
type SearchSyncListener = (event: SearchSyncEvent) => void;
|
|
28
|
+
interface SearchSync {
|
|
29
|
+
on(listener: SearchSyncListener): void;
|
|
30
|
+
off(listener: SearchSyncListener): void;
|
|
31
|
+
stop(): Promise<void>;
|
|
32
|
+
}
|
|
33
|
+
interface StartSearchSyncOptions {
|
|
34
|
+
idleMs?: number;
|
|
35
|
+
backfill?: boolean;
|
|
36
|
+
}
|
|
37
|
+
declare function startSearchSync(collection: Collection, config: MongoSearchConfig, options?: StartSearchSyncOptions): SearchSync;
|
|
38
|
+
|
|
39
|
+
export { DEFAULT_NAMESPACE as D, type MongoSearchTarget as M, type SearchSync as S, type MongoSearchConfig as a, DEFAULT_NGRAM_SIZES as b, type SearchSyncEvent as c, type SearchSyncListener as d, type StartSearchSyncOptions as e, startSearchSync as s };
|
package/package.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@quaesitor-textus/mongo",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Server-side MongoDB companion for quaesitor-textus: n-gram-indexed, diacritic/case-insensitive substring search reproducing the client-side matching, a change-stream sync watcher, and live SSE search (Fastify/Express/Next adapters).",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"keywords": [
|
|
8
|
+
"mongodb",
|
|
9
|
+
"full-text-search",
|
|
10
|
+
"search",
|
|
11
|
+
"ngram",
|
|
12
|
+
"diacritics",
|
|
13
|
+
"sse",
|
|
14
|
+
"change-streams",
|
|
15
|
+
"quaesitor-textus"
|
|
16
|
+
],
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "https://github.com/csillag/quaesitor-textus.git",
|
|
20
|
+
"directory": "packages/mongo"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://github.com/csillag/quaesitor-textus/tree/main/packages/mongo#readme",
|
|
23
|
+
"files": [
|
|
24
|
+
"dist",
|
|
25
|
+
"src"
|
|
26
|
+
],
|
|
27
|
+
"main": "./dist/index.cjs",
|
|
28
|
+
"module": "./dist/index.js",
|
|
29
|
+
"types": "./dist/index.d.ts",
|
|
30
|
+
"exports": {
|
|
31
|
+
".": {
|
|
32
|
+
"types": "./dist/index.d.ts",
|
|
33
|
+
"development": "./src/index.ts",
|
|
34
|
+
"require": "./dist/index.cjs",
|
|
35
|
+
"import": "./dist/index.js"
|
|
36
|
+
},
|
|
37
|
+
"./fastify": {
|
|
38
|
+
"types": "./dist/adapters/fastify.d.ts",
|
|
39
|
+
"development": "./src/adapters/fastify.ts",
|
|
40
|
+
"require": "./dist/adapters/fastify.cjs",
|
|
41
|
+
"import": "./dist/adapters/fastify.js"
|
|
42
|
+
},
|
|
43
|
+
"./express": {
|
|
44
|
+
"types": "./dist/adapters/express.d.ts",
|
|
45
|
+
"development": "./src/adapters/express.ts",
|
|
46
|
+
"require": "./dist/adapters/express.cjs",
|
|
47
|
+
"import": "./dist/adapters/express.js"
|
|
48
|
+
},
|
|
49
|
+
"./next/app": {
|
|
50
|
+
"types": "./dist/adapters/next-app.d.ts",
|
|
51
|
+
"development": "./src/adapters/next-app.ts",
|
|
52
|
+
"require": "./dist/adapters/next-app.cjs",
|
|
53
|
+
"import": "./dist/adapters/next-app.js"
|
|
54
|
+
},
|
|
55
|
+
"./next/pages": {
|
|
56
|
+
"types": "./dist/adapters/next-pages.d.ts",
|
|
57
|
+
"development": "./src/adapters/next-pages.ts",
|
|
58
|
+
"require": "./dist/adapters/next-pages.cjs",
|
|
59
|
+
"import": "./dist/adapters/next-pages.js"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"publishConfig": {
|
|
63
|
+
"access": "public"
|
|
64
|
+
},
|
|
65
|
+
"dependencies": {
|
|
66
|
+
"@quaesitor-textus/core": "0.2.0"
|
|
67
|
+
},
|
|
68
|
+
"peerDependencies": {
|
|
69
|
+
"mongodb": ">=5",
|
|
70
|
+
"fastify": "^4 || ^5"
|
|
71
|
+
},
|
|
72
|
+
"peerDependenciesMeta": {
|
|
73
|
+
"fastify": {
|
|
74
|
+
"optional": true
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"devDependencies": {
|
|
78
|
+
"fastify": "^4.0.0",
|
|
79
|
+
"mongodb": "^6.0.0",
|
|
80
|
+
"tsup": "^8.0.0",
|
|
81
|
+
"typescript": "^5.4.0",
|
|
82
|
+
"vitest": "^2.0.0"
|
|
83
|
+
},
|
|
84
|
+
"scripts": {
|
|
85
|
+
"build": "tsup",
|
|
86
|
+
"test": "vitest run"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// Express adapter. Express's Request/Response are subtypes of Node's
|
|
2
|
+
// IncomingMessage/ServerResponse, so the shared Node-response adapter accepts
|
|
3
|
+
// them directly — no dependency on express itself.
|
|
4
|
+
//
|
|
5
|
+
// import { streamLiveSearch } from '@quaesitor-textus/mongo/express'
|
|
6
|
+
// app.get('/api/live', (req, res) => streamLiveSearch(req, res, { sync, collection, config, filter }))
|
|
7
|
+
export { streamToNodeResponse as streamLiveSearch } from './shared'
|
|
8
|
+
export type { StreamLiveSearchOptions } from './shared'
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { FastifyReply, FastifyRequest } from 'fastify'
|
|
2
|
+
import { runLiveSearch, SSE_HEADERS } from './shared'
|
|
3
|
+
import type { StreamLiveSearchOptions } from './shared'
|
|
4
|
+
|
|
5
|
+
export type { StreamLiveSearchOptions }
|
|
6
|
+
|
|
7
|
+
// Stream a live search to a Fastify reply as Server-Sent Events. Fastify needs
|
|
8
|
+
// `reply.hijack()` so it does not try to send its own response; everything else
|
|
9
|
+
// is the shared wiring.
|
|
10
|
+
export function streamLiveSearch(
|
|
11
|
+
request: FastifyRequest,
|
|
12
|
+
reply: FastifyReply,
|
|
13
|
+
opts: StreamLiveSearchOptions,
|
|
14
|
+
): void {
|
|
15
|
+
reply.raw.writeHead(200, SSE_HEADERS)
|
|
16
|
+
reply.hijack()
|
|
17
|
+
const { stop } = runLiveSearch(opts, (chunk) => reply.raw.write(chunk))
|
|
18
|
+
request.raw.on('close', stop)
|
|
19
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { runLiveSearch, SSE_HEADERS } from './shared'
|
|
2
|
+
import type { StreamLiveSearchOptions } from './shared'
|
|
3
|
+
|
|
4
|
+
export type { StreamLiveSearchOptions }
|
|
5
|
+
|
|
6
|
+
// Next.js App Router (and any Web/Fetch runtime, e.g. edge): returns a streaming
|
|
7
|
+
// Response backed by a ReadableStream. Uses only Web standard APIs, so it has no
|
|
8
|
+
// dependency on next.
|
|
9
|
+
//
|
|
10
|
+
// import { liveSearchResponse } from '@quaesitor-textus/mongo/next/app'
|
|
11
|
+
// export async function GET(request: Request) {
|
|
12
|
+
// // parse the filter from request.url, build the Mongo filter, then:
|
|
13
|
+
// return liveSearchResponse({ sync, collection, config, filter })
|
|
14
|
+
// }
|
|
15
|
+
export function liveSearchResponse(opts: StreamLiveSearchOptions): Response {
|
|
16
|
+
const encoder = new TextEncoder()
|
|
17
|
+
let handle: { stop: () => void } | undefined
|
|
18
|
+
const stream = new ReadableStream<Uint8Array>({
|
|
19
|
+
start(controller) {
|
|
20
|
+
handle = runLiveSearch(opts, (chunk) => controller.enqueue(encoder.encode(chunk)))
|
|
21
|
+
},
|
|
22
|
+
cancel() {
|
|
23
|
+
handle?.stop()
|
|
24
|
+
},
|
|
25
|
+
})
|
|
26
|
+
return new Response(stream, { headers: SSE_HEADERS })
|
|
27
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// Next.js Pages Router adapter. NextApiRequest/NextApiResponse extend Node's
|
|
2
|
+
// http types, so the shared Node-response adapter accepts them directly — no
|
|
3
|
+
// dependency on next itself.
|
|
4
|
+
//
|
|
5
|
+
// import { streamLiveSearch } from '@quaesitor-textus/mongo/next/pages'
|
|
6
|
+
// export default function handler(req, res) {
|
|
7
|
+
// streamLiveSearch(req, res, { sync, collection, config, filter })
|
|
8
|
+
// }
|
|
9
|
+
// export const config = { api: { responseLimit: false } } // allow long-lived SSE
|
|
10
|
+
export { streamToNodeResponse as streamLiveSearch } from './shared'
|
|
11
|
+
export type { StreamLiveSearchOptions } from './shared'
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { Collection, Document, Filter } from 'mongodb'
|
|
2
|
+
import type { IncomingMessage, ServerResponse } from 'http'
|
|
3
|
+
import type { MongoSearchConfig } from '../config'
|
|
4
|
+
import type { SearchSync } from '../startSearchSync'
|
|
5
|
+
import { createLiveSearch } from '../createLiveSearch'
|
|
6
|
+
import { formatSse, sseComment } from '../sse'
|
|
7
|
+
|
|
8
|
+
export const SSE_HEADERS = {
|
|
9
|
+
'Content-Type': 'text/event-stream',
|
|
10
|
+
'Cache-Control': 'no-cache',
|
|
11
|
+
Connection: 'keep-alive',
|
|
12
|
+
} as const
|
|
13
|
+
|
|
14
|
+
export interface StreamLiveSearchOptions {
|
|
15
|
+
sync: SearchSync
|
|
16
|
+
collection: Collection
|
|
17
|
+
config: MongoSearchConfig
|
|
18
|
+
filter: Filter<Document>
|
|
19
|
+
sort?: { field: string; dir: 1 | -1 }
|
|
20
|
+
cap?: number
|
|
21
|
+
heartbeatMs?: number
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Core wiring shared by every adapter: pipe a live search to a write() sink and
|
|
25
|
+
// run a heartbeat. Returns stop() (clears the heartbeat + detaches the watcher
|
|
26
|
+
// listener). All search behavior lives in createLiveSearch; adapters only own
|
|
27
|
+
// their framework's headers and disconnect signal.
|
|
28
|
+
export function runLiveSearch(
|
|
29
|
+
opts: StreamLiveSearchOptions,
|
|
30
|
+
write: (chunk: string) => void,
|
|
31
|
+
): { stop: () => void } {
|
|
32
|
+
const sendEvent = (e: unknown) => write(formatSse(e))
|
|
33
|
+
const live = createLiveSearch({
|
|
34
|
+
sync: opts.sync,
|
|
35
|
+
collection: opts.collection,
|
|
36
|
+
config: opts.config,
|
|
37
|
+
filter: opts.filter,
|
|
38
|
+
sort: opts.sort,
|
|
39
|
+
cap: opts.cap,
|
|
40
|
+
sendEvent,
|
|
41
|
+
})
|
|
42
|
+
const hb = setInterval(() => write(sseComment()), opts.heartbeatMs ?? 25000)
|
|
43
|
+
return {
|
|
44
|
+
stop: () => {
|
|
45
|
+
clearInterval(hb)
|
|
46
|
+
live.stop()
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Adapter for any Node http-style response — Express and Next.js Pages Router
|
|
52
|
+
// pass objects that are subtypes of IncomingMessage/ServerResponse.
|
|
53
|
+
export function streamToNodeResponse(
|
|
54
|
+
req: IncomingMessage,
|
|
55
|
+
res: ServerResponse,
|
|
56
|
+
opts: StreamLiveSearchOptions,
|
|
57
|
+
): void {
|
|
58
|
+
res.writeHead(200, SSE_HEADERS)
|
|
59
|
+
const { stop } = runLiveSearch(opts, (chunk) => res.write(chunk))
|
|
60
|
+
req.on('close', stop)
|
|
61
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { buildTextSearchFilter } from './buildTextSearchFilter'
|
|
3
|
+
import type { MongoSearchConfig } from './config'
|
|
4
|
+
|
|
5
|
+
const config: MongoSearchConfig = {
|
|
6
|
+
targets: { author: { fields: ['author'], queryModes: [{ caseSensitive: true }] } },
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
describe('buildTextSearchFilter', () => {
|
|
10
|
+
it('empty patterns match everything', () => {
|
|
11
|
+
expect(buildTextSearchFilter('author', [], config)).toEqual({})
|
|
12
|
+
})
|
|
13
|
+
it('builds ngram $all + per-pattern verify regex (base mode)', () => {
|
|
14
|
+
const f = buildTextSearchFilter('author', ['café'], config) as any
|
|
15
|
+
const ngram = f.$and[0]['_qt.author.ngrams'].$all
|
|
16
|
+
expect(ngram).toContain('ca') // fully folded ngrams
|
|
17
|
+
expect(f.$and[1]['_qt.author.norm'].$regex).toBe('cafe')
|
|
18
|
+
})
|
|
19
|
+
it('selects the case-sensitive verify field + folding', () => {
|
|
20
|
+
const f = buildTextSearchFilter('author', ['Café'], config, { caseSensitive: true }) as any
|
|
21
|
+
expect(f.$and[1]['_qt.author.norm_cs'].$regex).toBe('Cafe')
|
|
22
|
+
})
|
|
23
|
+
it('escapes regex metacharacters in the verify pattern', () => {
|
|
24
|
+
const f = buildTextSearchFilter('author', ['a.b'], config) as any
|
|
25
|
+
expect(f.$and[1]['_qt.author.norm'].$regex).toBe('a\\.b')
|
|
26
|
+
})
|
|
27
|
+
it('throws on unknown target', () => {
|
|
28
|
+
expect(() => buildTextSearchFilter('nope', ['x'], config)).toThrow(/Unknown search target/)
|
|
29
|
+
})
|
|
30
|
+
})
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { normalizeText, toNgrams } from '@quaesitor-textus/core'
|
|
2
|
+
import type { SearchOptions } from '@quaesitor-textus/core'
|
|
3
|
+
import type { Document, Filter } from 'mongodb'
|
|
4
|
+
import type { MongoSearchConfig } from './config'
|
|
5
|
+
import { DEFAULT_NAMESPACE, DEFAULT_NGRAM_SIZES } from './config'
|
|
6
|
+
import { modeKey, escapeRegex } from './modes'
|
|
7
|
+
|
|
8
|
+
export function buildTextSearchFilter(
|
|
9
|
+
target: string,
|
|
10
|
+
patterns: string[],
|
|
11
|
+
config: MongoSearchConfig,
|
|
12
|
+
options?: SearchOptions,
|
|
13
|
+
): Filter<Document> {
|
|
14
|
+
if (patterns.length === 0) return {}
|
|
15
|
+
const ns = config.namespace ?? DEFAULT_NAMESPACE
|
|
16
|
+
const sizes = config.ngramSizes ?? DEFAULT_NGRAM_SIZES
|
|
17
|
+
const t = config.targets[target]
|
|
18
|
+
if (!t) throw new Error(`Unknown search target: ${target}`)
|
|
19
|
+
const mode = options ?? t.options ?? {}
|
|
20
|
+
|
|
21
|
+
const ngramField = `${ns}.${target}.ngrams`
|
|
22
|
+
const verifyField = `${ns}.${target}.${modeKey(mode)}`
|
|
23
|
+
|
|
24
|
+
// Index-backed superset pre-filter: all fully-folded n-grams of all patterns.
|
|
25
|
+
const ngramTerms = [
|
|
26
|
+
...new Set(patterns.flatMap(p => toNgrams(normalizeText(p, {}), sizes))),
|
|
27
|
+
]
|
|
28
|
+
// Verify: every pattern must be a substring of the mode-folded verify string (AND).
|
|
29
|
+
const verifyConditions = patterns.map(p => ({
|
|
30
|
+
[verifyField]: { $regex: escapeRegex(normalizeText(p, mode)) },
|
|
31
|
+
}))
|
|
32
|
+
|
|
33
|
+
return { $and: [{ [ngramField]: { $all: ngramTerms } }, ...verifyConditions] } as Filter<Document>
|
|
34
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { computeSearchFields } from './computeSearchFields'
|
|
3
|
+
import type { MongoSearchConfig } from './config'
|
|
4
|
+
|
|
5
|
+
const config: MongoSearchConfig = {
|
|
6
|
+
targets: {
|
|
7
|
+
author: { fields: ['author'], queryModes: [{ caseSensitive: true }] },
|
|
8
|
+
},
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
describe('computeSearchFields', () => {
|
|
12
|
+
it('stores fully-folded ngrams and per-mode verify strings', () => {
|
|
13
|
+
const out = computeSearchFields({ author: 'Café' }, config) as any
|
|
14
|
+
expect(out._qt.author.norm).toBe('cafe') // folded: diacritics stripped + lowercased
|
|
15
|
+
expect(out._qt.author.norm_cs).toBe('Cafe') // case-sensitive: diacritics stripped, case kept
|
|
16
|
+
expect(out._qt.author.ngrams).toContain('ca')
|
|
17
|
+
expect(out._qt.author.ngrams).toContain('caf')
|
|
18
|
+
})
|
|
19
|
+
it('respects a custom namespace', () => {
|
|
20
|
+
const out = computeSearchFields({ author: 'x' }, { namespace: 'qt', targets: config.targets }) as any
|
|
21
|
+
expect(out.qt.author).toBeDefined()
|
|
22
|
+
})
|
|
23
|
+
})
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { buildCorpus, normalizeText, toNgrams } from '@quaesitor-textus/core'
|
|
2
|
+
import type { MongoSearchConfig } from './config'
|
|
3
|
+
import { DEFAULT_NAMESPACE, DEFAULT_NGRAM_SIZES } from './config'
|
|
4
|
+
import { modeKey, targetModes } from './modes'
|
|
5
|
+
import { searchFieldsVersion } from './version'
|
|
6
|
+
|
|
7
|
+
export function computeSearchFields(
|
|
8
|
+
doc: unknown,
|
|
9
|
+
config: MongoSearchConfig,
|
|
10
|
+
): Record<string, unknown> {
|
|
11
|
+
const ns = config.namespace ?? DEFAULT_NAMESPACE
|
|
12
|
+
const sizes = config.ngramSizes ?? DEFAULT_NGRAM_SIZES
|
|
13
|
+
const targets: Record<string, unknown> = {}
|
|
14
|
+
|
|
15
|
+
for (const [name, target] of Object.entries(config.targets)) {
|
|
16
|
+
const corpus = buildCorpus(doc, target.fields)
|
|
17
|
+
const entry: Record<string, unknown> = {
|
|
18
|
+
// n-grams are built on the fully-folded corpus (the coarsest fold) so the
|
|
19
|
+
// index is a superset filter valid for every query mode.
|
|
20
|
+
ngrams: toNgrams(normalizeText(corpus, {}), sizes),
|
|
21
|
+
}
|
|
22
|
+
for (const mode of targetModes(target)) {
|
|
23
|
+
entry[modeKey(mode)] = normalizeText(corpus, mode)
|
|
24
|
+
}
|
|
25
|
+
targets[name] = entry
|
|
26
|
+
}
|
|
27
|
+
// Stamp the derivation version (code + config fingerprint) so a stale-aware
|
|
28
|
+
// backfill can detect and re-derive documents after an upgrade or config change.
|
|
29
|
+
targets._v = searchFieldsVersion(config)
|
|
30
|
+
return { [ns]: targets }
|
|
31
|
+
}
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { SearchOptions } from '@quaesitor-textus/core'
|
|
2
|
+
|
|
3
|
+
export interface MongoSearchTarget {
|
|
4
|
+
fields: string[]
|
|
5
|
+
options?: SearchOptions // base/default query mode; defaults to {} (fully folded)
|
|
6
|
+
queryModes?: SearchOptions[] // additional runtime-selectable modes
|
|
7
|
+
}
|
|
8
|
+
export interface MongoSearchConfig {
|
|
9
|
+
namespace?: string // default "_qt"
|
|
10
|
+
ngramSizes?: number[] // default [2, 3]
|
|
11
|
+
targets: Record<string, MongoSearchTarget>
|
|
12
|
+
}
|
|
13
|
+
export const DEFAULT_NAMESPACE = '_qt'
|
|
14
|
+
export const DEFAULT_NGRAM_SIZES = [2, 3]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { describe, it, expect, beforeAll, afterAll } from 'vitest'
|
|
2
|
+
import { MongoClient } from 'mongodb'
|
|
3
|
+
import { computeSearchFields, createSearchIndexes, buildTextSearchFilter, startSearchSync, createLiveSearch } from './index'
|
|
4
|
+
import type { MongoSearchConfig } from './config'
|
|
5
|
+
import type { LiveEvent } from './createLiveSearch'
|
|
6
|
+
|
|
7
|
+
const URL = process.env.MONGO_URL ?? 'mongodb://localhost:27018/?directConnection=true'
|
|
8
|
+
const config: MongoSearchConfig = { targets: { name: { fields: ['name'] } } }
|
|
9
|
+
let client: MongoClient
|
|
10
|
+
let available = true
|
|
11
|
+
|
|
12
|
+
beforeAll(async () => {
|
|
13
|
+
try {
|
|
14
|
+
client = await MongoClient.connect(URL, { serverSelectionTimeoutMS: 1500 })
|
|
15
|
+
const col = client.db('qt_live_test').collection('docs')
|
|
16
|
+
await col.deleteMany({})
|
|
17
|
+
await col.insertMany([{ _id: 'a', name: 'Émile Zola', ...computeSearchFields({ name: 'Émile Zola' }, config) }] as never[])
|
|
18
|
+
await createSearchIndexes(col, config)
|
|
19
|
+
} catch { available = false }
|
|
20
|
+
})
|
|
21
|
+
afterAll(async () => { await client?.close() })
|
|
22
|
+
|
|
23
|
+
describe('createLiveSearch', () => {
|
|
24
|
+
it('emits a snapshot of current matches', async () => {
|
|
25
|
+
if (!available) return
|
|
26
|
+
const col = client.db('qt_live_test').collection('docs')
|
|
27
|
+
const sync = startSearchSync(col, config)
|
|
28
|
+
const events: LiveEvent[] = []
|
|
29
|
+
const live = createLiveSearch({ sync, collection: col, config, filter: buildTextSearchFilter('name', ['zola'], config), sendEvent: e => events.push(e) })
|
|
30
|
+
await new Promise(r => setTimeout(r, 300))
|
|
31
|
+
expect(events[0]?.type).toBe('snapshot')
|
|
32
|
+
expect((events[0] as any).items.map((d: any) => d._id)).toContain('a')
|
|
33
|
+
live.stop(); await sync.stop()
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('pushes a match for a newly-inserted matching doc', async () => {
|
|
37
|
+
if (!available) return
|
|
38
|
+
const col = client.db('qt_live_test').collection('docs')
|
|
39
|
+
const sync = startSearchSync(col, config)
|
|
40
|
+
const events: LiveEvent[] = []
|
|
41
|
+
const live = createLiveSearch({ sync, collection: col, config, filter: buildTextSearchFilter('name', ['borges'], config), sendEvent: e => events.push(e) })
|
|
42
|
+
await new Promise(r => setTimeout(r, 200))
|
|
43
|
+
await col.insertOne({ _id: 'b', name: 'Jorge Luis Borges' } as never) // raw; watcher derives
|
|
44
|
+
await new Promise(r => setTimeout(r, 1500))
|
|
45
|
+
expect(events.some(e => e.type === 'match' && (e as any).item._id === 'b')).toBe(true)
|
|
46
|
+
live.stop(); await sync.stop()
|
|
47
|
+
})
|
|
48
|
+
})
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { Collection, Document, Filter } from 'mongodb'
|
|
2
|
+
import type { MongoSearchConfig } from './config'
|
|
3
|
+
import type { SearchSync, SearchSyncEvent } from './startSearchSync'
|
|
4
|
+
|
|
5
|
+
export type LiveEvent =
|
|
6
|
+
| { type: 'snapshot'; items: Document[] }
|
|
7
|
+
| { type: 'match'; item: Document }
|
|
8
|
+
| { type: 'capped' }
|
|
9
|
+
|
|
10
|
+
export interface CreateLiveSearchOptions {
|
|
11
|
+
sync: SearchSync
|
|
12
|
+
collection: Collection
|
|
13
|
+
config: MongoSearchConfig
|
|
14
|
+
filter: Filter<Document>
|
|
15
|
+
sort?: { field: string; dir: 1 | -1 }
|
|
16
|
+
cap?: number
|
|
17
|
+
sendEvent: (event: LiveEvent) => void
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Transport-agnostic live search: emits the current matching set (capped), then
|
|
21
|
+
// one `match` per newly-indexed document that matches `filter`, then `capped`.
|
|
22
|
+
export function createLiveSearch(opts: CreateLiveSearchOptions): { stop: () => void } {
|
|
23
|
+
const { sync, collection, config: _config, filter, sort, cap = 500, sendEvent } = opts
|
|
24
|
+
const seen = new Set<string>()
|
|
25
|
+
let count = 0
|
|
26
|
+
let capped = false
|
|
27
|
+
|
|
28
|
+
const idOf = (doc: Document) => String(doc._id)
|
|
29
|
+
|
|
30
|
+
// Initial snapshot (sorted for a nicer first paint; client re-sorts anyway).
|
|
31
|
+
const cursor = collection.find(filter)
|
|
32
|
+
if (sort) cursor.sort({ [sort.field]: sort.dir })
|
|
33
|
+
void cursor.limit(cap).toArray().then((items) => {
|
|
34
|
+
for (const it of items) seen.add(idOf(it))
|
|
35
|
+
count = items.length
|
|
36
|
+
sendEvent({ type: 'snapshot', items })
|
|
37
|
+
if (count >= cap) { capped = true; sendEvent({ type: 'capped' }) }
|
|
38
|
+
}).catch(() => sendEvent({ type: 'snapshot', items: [] }))
|
|
39
|
+
|
|
40
|
+
const listener = (e: SearchSyncEvent) => {
|
|
41
|
+
if (e.type !== 'indexed' || capped) return
|
|
42
|
+
void collection.findOne({ $and: [{ _id: e.id as any }, filter] })
|
|
43
|
+
.then((doc) => {
|
|
44
|
+
if (!doc || capped) return
|
|
45
|
+
const id = idOf(doc)
|
|
46
|
+
if (seen.has(id)) return
|
|
47
|
+
seen.add(id)
|
|
48
|
+
count += 1
|
|
49
|
+
sendEvent({ type: 'match', item: doc })
|
|
50
|
+
if (count >= cap) { capped = true; sendEvent({ type: 'capped' }) }
|
|
51
|
+
})
|
|
52
|
+
.catch(() => { /* skip a failed match-test; keep the stream alive */ })
|
|
53
|
+
}
|
|
54
|
+
sync.on(listener)
|
|
55
|
+
|
|
56
|
+
return { stop: () => sync.off(listener) }
|
|
57
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export type { MongoSearchConfig, MongoSearchTarget } from './config'
|
|
2
|
+
export { DEFAULT_NAMESPACE, DEFAULT_NGRAM_SIZES } from './config'
|
|
3
|
+
export { modeKey, targetModes, escapeRegex } from './modes'
|
|
4
|
+
export { computeSearchFields } from './computeSearchFields'
|
|
5
|
+
export { SEARCH_FIELDS_VERSION, searchFieldsVersion } from './version'
|
|
6
|
+
export { searchIndexSpecs, createSearchIndexes } from './searchIndexes'
|
|
7
|
+
export { buildTextSearchFilter } from './buildTextSearchFilter'
|
|
8
|
+
export { startSearchSync } from './startSearchSync'
|
|
9
|
+
export { createLiveSearch } from './createLiveSearch'
|
|
10
|
+
export type { LiveEvent, CreateLiveSearchOptions } from './createLiveSearch'
|
|
11
|
+
export { formatSse, sseComment } from './sse'
|
|
12
|
+
export type { SearchSync, SearchSyncEvent, SearchSyncListener, StartSearchSyncOptions } from './startSearchSync'
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { modeKey, targetModes, escapeRegex } from './modes'
|
|
3
|
+
|
|
4
|
+
describe('modeKey', () => {
|
|
5
|
+
it('base mode is norm', () => { expect(modeKey()).toBe('norm'); expect(modeKey({})).toBe('norm') })
|
|
6
|
+
it('case-sensitive', () => { expect(modeKey({ caseSensitive: true })).toBe('norm_cs') })
|
|
7
|
+
it('diacritic-sensitive', () => { expect(modeKey({ diacriticSensitive: true })).toBe('norm_ds') })
|
|
8
|
+
it('both', () => { expect(modeKey({ caseSensitive: true, diacriticSensitive: true })).toBe('norm_cs_ds') })
|
|
9
|
+
})
|
|
10
|
+
describe('targetModes', () => {
|
|
11
|
+
it('includes base + queryModes, deduped', () => {
|
|
12
|
+
const modes = targetModes({ fields: ['a'], queryModes: [{ caseSensitive: true }, {}] })
|
|
13
|
+
expect(modes.map(modeKey)).toEqual(['norm', 'norm_cs'])
|
|
14
|
+
})
|
|
15
|
+
})
|
|
16
|
+
describe('escapeRegex', () => {
|
|
17
|
+
it('escapes regex metacharacters', () => {
|
|
18
|
+
expect(escapeRegex('a.*b')).toBe('a\\.\\*b')
|
|
19
|
+
})
|
|
20
|
+
})
|
package/src/modes.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { SearchOptions } from '@quaesitor-textus/core'
|
|
2
|
+
import type { MongoSearchTarget } from './config'
|
|
3
|
+
|
|
4
|
+
export function modeKey(o: SearchOptions = {}): string {
|
|
5
|
+
let k = 'norm'
|
|
6
|
+
if (o.caseSensitive) k += '_cs'
|
|
7
|
+
if (o.diacriticSensitive) k += '_ds'
|
|
8
|
+
return k
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function targetModes(t: MongoSearchTarget): SearchOptions[] {
|
|
12
|
+
const modes = [t.options ?? {}, ...(t.queryModes ?? [])]
|
|
13
|
+
const seen = new Set<string>()
|
|
14
|
+
const out: SearchOptions[] = []
|
|
15
|
+
for (const m of modes) {
|
|
16
|
+
const k = modeKey(m)
|
|
17
|
+
if (!seen.has(k)) { seen.add(k); out.push(m) }
|
|
18
|
+
}
|
|
19
|
+
return out
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function escapeRegex(s: string): string {
|
|
23
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
24
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { describe, it, expect, beforeAll, afterAll } from 'vitest'
|
|
2
|
+
import { MongoClient } from 'mongodb'
|
|
3
|
+
import { matchItem, buildCorpus } from '@quaesitor-textus/core'
|
|
4
|
+
import { computeSearchFields, createSearchIndexes, buildTextSearchFilter } from './index'
|
|
5
|
+
import type { MongoSearchConfig } from './config'
|
|
6
|
+
|
|
7
|
+
const URL = process.env.MONGO_URL ?? 'mongodb://localhost:27018/?replicaSet=rs0'
|
|
8
|
+
const config: MongoSearchConfig = {
|
|
9
|
+
targets: { name: { fields: ['name'], queryModes: [{ caseSensitive: true }] } },
|
|
10
|
+
}
|
|
11
|
+
const DOCS = [
|
|
12
|
+
{ name: 'Gabriel García Márquez' },
|
|
13
|
+
{ name: 'GARCIA lopez' },
|
|
14
|
+
{ name: 'Wei Ng' },
|
|
15
|
+
{ name: 'Plain Author' },
|
|
16
|
+
{ name: 'café society' },
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
let client: MongoClient
|
|
20
|
+
let available = true
|
|
21
|
+
|
|
22
|
+
beforeAll(async () => {
|
|
23
|
+
try {
|
|
24
|
+
client = await MongoClient.connect(URL, { serverSelectionTimeoutMS: 1500 })
|
|
25
|
+
const col = client.db('qt_parity_test').collection('docs')
|
|
26
|
+
await col.deleteMany({})
|
|
27
|
+
await col.insertMany(DOCS.map(d => ({ ...d, ...computeSearchFields(d, config) })))
|
|
28
|
+
await createSearchIndexes(col, config)
|
|
29
|
+
} catch {
|
|
30
|
+
available = false
|
|
31
|
+
}
|
|
32
|
+
})
|
|
33
|
+
afterAll(async () => { await client?.close() })
|
|
34
|
+
|
|
35
|
+
async function serverMatches(patterns: string[], options?: any): Promise<string[]> {
|
|
36
|
+
const col = client.db('qt_parity_test').collection('docs')
|
|
37
|
+
const filter = buildTextSearchFilter('name', patterns, config, options)
|
|
38
|
+
const rows = await col.find(filter).toArray()
|
|
39
|
+
return rows.map(r => r.name).sort()
|
|
40
|
+
}
|
|
41
|
+
function clientMatches(patterns: string[], options?: any): string[] {
|
|
42
|
+
return DOCS.filter(d => matchItem(buildCorpus(d, ['name']), patterns, options))
|
|
43
|
+
.map(d => d.name).sort()
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
describe('client↔server parity', () => {
|
|
47
|
+
const cases: Array<{ patterns: string[]; options?: any }> = [
|
|
48
|
+
{ patterns: ['garcia'] }, // diacritic + case insensitive
|
|
49
|
+
{ patterns: ['ng'] }, // 2-char (bigram path)
|
|
50
|
+
{ patterns: ['cafe'] }, // diacritic fold
|
|
51
|
+
{ patterns: ['garcia', 'marquez'] }, // multi-pattern AND
|
|
52
|
+
{ patterns: ['GARCIA'], options: { caseSensitive: true } }, // case-sensitive mode
|
|
53
|
+
]
|
|
54
|
+
for (const c of cases) {
|
|
55
|
+
it(`parity: ${JSON.stringify(c)}`, async () => {
|
|
56
|
+
if (!available) return
|
|
57
|
+
expect(await serverMatches(c.patterns, c.options)).toEqual(clientMatches(c.patterns, c.options))
|
|
58
|
+
})
|
|
59
|
+
}
|
|
60
|
+
})
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { searchIndexSpecs } from './searchIndexes'
|
|
3
|
+
|
|
4
|
+
describe('searchIndexSpecs', () => {
|
|
5
|
+
it('one multikey index per target', () => {
|
|
6
|
+
const specs = searchIndexSpecs({ targets: { author: { fields: ['author'] }, title: { fields: ['title'] } } })
|
|
7
|
+
expect(specs).toEqual([
|
|
8
|
+
{ key: { '_qt.author.ngrams': 1 }, name: '_qt_author_ngrams' },
|
|
9
|
+
{ key: { '_qt.title.ngrams': 1 }, name: '_qt_title_ngrams' },
|
|
10
|
+
])
|
|
11
|
+
})
|
|
12
|
+
})
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Collection } from 'mongodb'
|
|
2
|
+
import type { MongoSearchConfig } from './config'
|
|
3
|
+
import { DEFAULT_NAMESPACE } from './config'
|
|
4
|
+
|
|
5
|
+
export function searchIndexSpecs(
|
|
6
|
+
config: MongoSearchConfig,
|
|
7
|
+
): Array<{ key: Record<string, 1>; name: string }> {
|
|
8
|
+
const ns = config.namespace ?? DEFAULT_NAMESPACE
|
|
9
|
+
return Object.keys(config.targets).map(name => ({
|
|
10
|
+
key: { [`${ns}.${name}.ngrams`]: 1 },
|
|
11
|
+
name: `${ns}_${name}_ngrams`,
|
|
12
|
+
}))
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function createSearchIndexes(
|
|
16
|
+
collection: Collection,
|
|
17
|
+
config: MongoSearchConfig,
|
|
18
|
+
): Promise<void> {
|
|
19
|
+
for (const spec of searchIndexSpecs(config)) {
|
|
20
|
+
await collection.createIndex(spec.key, { name: spec.name })
|
|
21
|
+
}
|
|
22
|
+
}
|