@open-mercato/search 0.4.2-canary-c02407ff85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +678 -0
- package/build.mjs +92 -0
- package/dist/di.js +157 -0
- package/dist/di.js.map +7 -0
- package/dist/fulltext/drivers/index.js +21 -0
- package/dist/fulltext/drivers/index.js.map +7 -0
- package/dist/fulltext/drivers/meilisearch/index.js +320 -0
- package/dist/fulltext/drivers/meilisearch/index.js.map +7 -0
- package/dist/fulltext/index.js +7 -0
- package/dist/fulltext/index.js.map +7 -0
- package/dist/fulltext/types.js +1 -0
- package/dist/fulltext/types.js.map +7 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +7 -0
- package/dist/indexer/index.js +8 -0
- package/dist/indexer/index.js.map +7 -0
- package/dist/indexer/search-indexer.js +848 -0
- package/dist/indexer/search-indexer.js.map +7 -0
- package/dist/indexer/subscribers/delete.js +41 -0
- package/dist/indexer/subscribers/delete.js.map +7 -0
- package/dist/lib/debug.js +34 -0
- package/dist/lib/debug.js.map +7 -0
- package/dist/lib/fallback-presenter.js +107 -0
- package/dist/lib/fallback-presenter.js.map +7 -0
- package/dist/lib/field-policy.js +75 -0
- package/dist/lib/field-policy.js.map +7 -0
- package/dist/lib/index.js +19 -0
- package/dist/lib/index.js.map +7 -0
- package/dist/lib/merger.js +93 -0
- package/dist/lib/merger.js.map +7 -0
- package/dist/lib/presenter-enricher.js +192 -0
- package/dist/lib/presenter-enricher.js.map +7 -0
- package/dist/modules/search/acl.js +14 -0
- package/dist/modules/search/acl.js.map +7 -0
- package/dist/modules/search/ai-tools.js +284 -0
- package/dist/modules/search/ai-tools.js.map +7 -0
- package/dist/modules/search/api/embeddings/reindex/cancel/route.js +65 -0
- package/dist/modules/search/api/embeddings/reindex/cancel/route.js.map +7 -0
- package/dist/modules/search/api/embeddings/reindex/route.js +165 -0
- package/dist/modules/search/api/embeddings/reindex/route.js.map +7 -0
- package/dist/modules/search/api/embeddings/route.js +246 -0
- package/dist/modules/search/api/embeddings/route.js.map +7 -0
- package/dist/modules/search/api/index/route.js +245 -0
- package/dist/modules/search/api/index/route.js.map +7 -0
- package/dist/modules/search/api/reindex/cancel/route.js +65 -0
- package/dist/modules/search/api/reindex/cancel/route.js.map +7 -0
- package/dist/modules/search/api/reindex/route.js +332 -0
- package/dist/modules/search/api/reindex/route.js.map +7 -0
- package/dist/modules/search/api/search/global/route.js +100 -0
- package/dist/modules/search/api/search/global/route.js.map +7 -0
- package/dist/modules/search/api/search/route.js +101 -0
- package/dist/modules/search/api/search/route.js.map +7 -0
- package/dist/modules/search/api/settings/fulltext/route.js +55 -0
- package/dist/modules/search/api/settings/fulltext/route.js.map +7 -0
- package/dist/modules/search/api/settings/global-search/route.js +80 -0
- package/dist/modules/search/api/settings/global-search/route.js.map +7 -0
- package/dist/modules/search/api/settings/route.js +118 -0
- package/dist/modules/search/api/settings/route.js.map +7 -0
- package/dist/modules/search/api/settings/vector-store/route.js +77 -0
- package/dist/modules/search/api/settings/vector-store/route.js.map +7 -0
- package/dist/modules/search/backend/config/search/page.js +10 -0
- package/dist/modules/search/backend/config/search/page.js.map +7 -0
- package/dist/modules/search/backend/config/search/page.meta.js +24 -0
- package/dist/modules/search/backend/config/search/page.meta.js.map +7 -0
- package/dist/modules/search/cli.js +698 -0
- package/dist/modules/search/cli.js.map +7 -0
- package/dist/modules/search/di.js +32 -0
- package/dist/modules/search/di.js.map +7 -0
- package/dist/modules/search/frontend/components/GlobalSearchDialog.js +357 -0
- package/dist/modules/search/frontend/components/GlobalSearchDialog.js.map +7 -0
- package/dist/modules/search/frontend/components/HybridSearchTable.js +343 -0
- package/dist/modules/search/frontend/components/HybridSearchTable.js.map +7 -0
- package/dist/modules/search/frontend/components/SearchSettingsPageClient.js +303 -0
- package/dist/modules/search/frontend/components/SearchSettingsPageClient.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js +360 -0
- package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js +101 -0
- package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/VectorSearchSection.js +608 -0
- package/dist/modules/search/frontend/components/sections/VectorSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/index.js +9 -0
- package/dist/modules/search/frontend/index.js.map +7 -0
- package/dist/modules/search/frontend/utils.js +41 -0
- package/dist/modules/search/frontend/utils.js.map +7 -0
- package/dist/modules/search/i18n/de.json +61 -0
- package/dist/modules/search/i18n/en.json +72 -0
- package/dist/modules/search/i18n/es.json +61 -0
- package/dist/modules/search/i18n/pl.json +61 -0
- package/dist/modules/search/index.js +11 -0
- package/dist/modules/search/index.js.map +7 -0
- package/dist/modules/search/lib/auto-indexing.js +29 -0
- package/dist/modules/search/lib/auto-indexing.js.map +7 -0
- package/dist/modules/search/lib/embedding-config.js +131 -0
- package/dist/modules/search/lib/embedding-config.js.map +7 -0
- package/dist/modules/search/lib/global-search-config.js +45 -0
- package/dist/modules/search/lib/global-search-config.js.map +7 -0
- package/dist/modules/search/lib/reindex-lock.js +99 -0
- package/dist/modules/search/lib/reindex-lock.js.map +7 -0
- package/dist/modules/search/subscribers/fulltext_upsert.js +64 -0
- package/dist/modules/search/subscribers/fulltext_upsert.js.map +7 -0
- package/dist/modules/search/subscribers/vector_delete.js +58 -0
- package/dist/modules/search/subscribers/vector_delete.js.map +7 -0
- package/dist/modules/search/subscribers/vector_purge.js +142 -0
- package/dist/modules/search/subscribers/vector_purge.js.map +7 -0
- package/dist/modules/search/subscribers/vector_upsert.js +58 -0
- package/dist/modules/search/subscribers/vector_upsert.js.map +7 -0
- package/dist/modules/search/workers/fulltext-index.worker.js +240 -0
- package/dist/modules/search/workers/fulltext-index.worker.js.map +7 -0
- package/dist/modules/search/workers/vector-index.worker.js +234 -0
- package/dist/modules/search/workers/vector-index.worker.js.map +7 -0
- package/dist/queue/fulltext-indexing.js +15 -0
- package/dist/queue/fulltext-indexing.js.map +7 -0
- package/dist/queue/index.js +3 -0
- package/dist/queue/index.js.map +7 -0
- package/dist/queue/vector-indexing.js +15 -0
- package/dist/queue/vector-indexing.js.map +7 -0
- package/dist/service.js +286 -0
- package/dist/service.js.map +7 -0
- package/dist/strategies/fulltext.strategy.js +116 -0
- package/dist/strategies/fulltext.strategy.js.map +7 -0
- package/dist/strategies/index.js +12 -0
- package/dist/strategies/index.js.map +7 -0
- package/dist/strategies/token.strategy.js +80 -0
- package/dist/strategies/token.strategy.js.map +7 -0
- package/dist/strategies/vector.strategy.js +137 -0
- package/dist/strategies/vector.strategy.js.map +7 -0
- package/dist/types.js +1 -0
- package/dist/types.js.map +7 -0
- package/dist/vector/drivers/chromadb/index.js +44 -0
- package/dist/vector/drivers/chromadb/index.js.map +7 -0
- package/dist/vector/drivers/index.js +9 -0
- package/dist/vector/drivers/index.js.map +7 -0
- package/dist/vector/drivers/pgvector/index.js +509 -0
- package/dist/vector/drivers/pgvector/index.js.map +7 -0
- package/dist/vector/drivers/qdrant/index.js +44 -0
- package/dist/vector/drivers/qdrant/index.js.map +7 -0
- package/dist/vector/index.js +4 -0
- package/dist/vector/index.js.map +7 -0
- package/dist/vector/lib/vector-logs.js +33 -0
- package/dist/vector/lib/vector-logs.js.map +7 -0
- package/dist/vector/services/checksum.js +20 -0
- package/dist/vector/services/checksum.js.map +7 -0
- package/dist/vector/services/embedding.js +222 -0
- package/dist/vector/services/embedding.js.map +7 -0
- package/dist/vector/services/index.js +4 -0
- package/dist/vector/services/index.js.map +7 -0
- package/dist/vector/services/vector-index.service.js +960 -0
- package/dist/vector/services/vector-index.service.js.map +7 -0
- package/dist/vector/types/pg.d.js +1 -0
- package/dist/vector/types/pg.d.js.map +7 -0
- package/dist/vector/types.js +75 -0
- package/dist/vector/types.js.map +7 -0
- package/jest.config.cjs +19 -0
- package/package.json +142 -0
- package/src/__tests__/queue.test.ts +148 -0
- package/src/__tests__/service.test.ts +345 -0
- package/src/__tests__/workers.test.ts +319 -0
- package/src/di.ts +291 -0
- package/src/fulltext/drivers/index.ts +41 -0
- package/src/fulltext/drivers/meilisearch/index.ts +410 -0
- package/src/fulltext/index.ts +13 -0
- package/src/fulltext/types.ts +115 -0
- package/src/index.ts +36 -0
- package/src/indexer/index.ts +13 -0
- package/src/indexer/search-indexer.ts +1141 -0
- package/src/indexer/subscribers/delete.ts +49 -0
- package/src/lib/debug.ts +46 -0
- package/src/lib/fallback-presenter.ts +106 -0
- package/src/lib/field-policy.ts +169 -0
- package/src/lib/index.ts +13 -0
- package/src/lib/merger.ts +159 -0
- package/src/lib/presenter-enricher.ts +323 -0
- package/src/modules/search/README.md +694 -0
- package/src/modules/search/acl.ts +10 -0
- package/src/modules/search/ai-tools.ts +467 -0
- package/src/modules/search/api/embeddings/reindex/cancel/route.ts +77 -0
- package/src/modules/search/api/embeddings/reindex/route.ts +197 -0
- package/src/modules/search/api/embeddings/route.ts +304 -0
- package/src/modules/search/api/index/route.ts +297 -0
- package/src/modules/search/api/reindex/cancel/route.ts +77 -0
- package/src/modules/search/api/reindex/route.ts +419 -0
- package/src/modules/search/api/search/global/route.ts +120 -0
- package/src/modules/search/api/search/route.ts +121 -0
- package/src/modules/search/api/settings/fulltext/route.ts +82 -0
- package/src/modules/search/api/settings/global-search/route.ts +91 -0
- package/src/modules/search/api/settings/route.ts +187 -0
- package/src/modules/search/api/settings/vector-store/route.ts +105 -0
- package/src/modules/search/backend/config/search/page.meta.ts +22 -0
- package/src/modules/search/backend/config/search/page.tsx +12 -0
- package/src/modules/search/cli.ts +818 -0
- package/src/modules/search/di.ts +50 -0
- package/src/modules/search/frontend/components/GlobalSearchDialog.tsx +436 -0
- package/src/modules/search/frontend/components/HybridSearchTable.tsx +418 -0
- package/src/modules/search/frontend/components/SearchSettingsPageClient.tsx +476 -0
- package/src/modules/search/frontend/components/sections/FulltextSearchSection.tsx +624 -0
- package/src/modules/search/frontend/components/sections/GlobalSearchSection.tsx +124 -0
- package/src/modules/search/frontend/components/sections/VectorSearchSection.tsx +943 -0
- package/src/modules/search/frontend/index.ts +3 -0
- package/src/modules/search/frontend/utils.ts +82 -0
- package/src/modules/search/i18n/de.json +61 -0
- package/src/modules/search/i18n/en.json +72 -0
- package/src/modules/search/i18n/es.json +61 -0
- package/src/modules/search/i18n/pl.json +61 -0
- package/src/modules/search/index.ts +9 -0
- package/src/modules/search/lib/auto-indexing.ts +35 -0
- package/src/modules/search/lib/embedding-config.ts +161 -0
- package/src/modules/search/lib/global-search-config.ts +69 -0
- package/src/modules/search/lib/reindex-lock.ts +201 -0
- package/src/modules/search/subscribers/fulltext_upsert.ts +83 -0
- package/src/modules/search/subscribers/vector_delete.ts +75 -0
- package/src/modules/search/subscribers/vector_purge.ts +161 -0
- package/src/modules/search/subscribers/vector_upsert.ts +75 -0
- package/src/modules/search/workers/fulltext-index.worker.ts +318 -0
- package/src/modules/search/workers/vector-index.worker.ts +292 -0
- package/src/queue/fulltext-indexing.ts +87 -0
- package/src/queue/index.ts +2 -0
- package/src/queue/vector-indexing.ts +66 -0
- package/src/service.ts +397 -0
- package/src/strategies/fulltext.strategy.ts +155 -0
- package/src/strategies/index.ts +17 -0
- package/src/strategies/token.strategy.ts +153 -0
- package/src/strategies/vector.strategy.ts +234 -0
- package/src/types.ts +38 -0
- package/src/vector/drivers/chromadb/index.ts +49 -0
- package/src/vector/drivers/index.ts +4 -0
- package/src/vector/drivers/pgvector/index.ts +627 -0
- package/src/vector/drivers/qdrant/index.ts +49 -0
- package/src/vector/index.ts +3 -0
- package/src/vector/lib/vector-logs.ts +46 -0
- package/src/vector/services/checksum.ts +18 -0
- package/src/vector/services/embedding.ts +275 -0
- package/src/vector/services/index.ts +3 -0
- package/src/vector/services/vector-index.service.ts +1234 -0
- package/src/vector/types/pg.d.ts +1 -0
- package/src/vector/types.ts +220 -0
- package/tsconfig.json +9 -0
- package/watch.mjs +6 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { SearchIndexer } from '../search-indexer'
|
|
2
|
+
import type { EntityId } from '@open-mercato/shared/modules/entities'
|
|
3
|
+
import type { SearchDeletePayload } from '@open-mercato/shared/modules/search'
|
|
4
|
+
import { searchDebugWarn, searchError } from '../../lib/debug'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Event subscriber metadata.
|
|
8
|
+
*/
|
|
9
|
+
export const metadata = {
|
|
10
|
+
event: 'search.delete_record',
|
|
11
|
+
persistent: false,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Factory to create the search delete subscriber handler.
|
|
16
|
+
*/
|
|
17
|
+
export function createSearchDeleteSubscriber(indexer: SearchIndexer) {
|
|
18
|
+
return async function handle(payload: SearchDeletePayload): Promise<void> {
|
|
19
|
+
const entityId = String(payload?.entityId || '') as EntityId
|
|
20
|
+
const recordId = String(payload?.recordId || '')
|
|
21
|
+
const tenantId = String(payload?.tenantId || '')
|
|
22
|
+
|
|
23
|
+
if (!entityId || !recordId || !tenantId) {
|
|
24
|
+
searchDebugWarn('search.delete_record', 'Missing required fields', {
|
|
25
|
+
entityId,
|
|
26
|
+
recordId,
|
|
27
|
+
tenantId,
|
|
28
|
+
})
|
|
29
|
+
return
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
await indexer.deleteRecord({
|
|
34
|
+
entityId,
|
|
35
|
+
recordId,
|
|
36
|
+
tenantId,
|
|
37
|
+
})
|
|
38
|
+
} catch (error) {
|
|
39
|
+
searchError('search.delete_record', 'Failed to delete record', {
|
|
40
|
+
entityId,
|
|
41
|
+
recordId,
|
|
42
|
+
error: error instanceof Error ? error.message : error,
|
|
43
|
+
})
|
|
44
|
+
throw error
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export default createSearchDeleteSubscriber
|
package/src/lib/debug.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Debug utilities for search module.
|
|
3
|
+
*
|
|
4
|
+
* Set OM_SEARCH_DEBUG=true to enable debug logging.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export function isSearchDebugEnabled(): boolean {
|
|
8
|
+
const raw = (process.env.OM_SEARCH_DEBUG ?? '').toLowerCase()
|
|
9
|
+
return raw === '1' || raw === 'true' || raw === 'yes' || raw === 'on'
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Log a debug message if OM_SEARCH_DEBUG is enabled.
|
|
14
|
+
*/
|
|
15
|
+
export function searchDebug(prefix: string, message: string, data?: Record<string, unknown>): void {
|
|
16
|
+
if (!isSearchDebugEnabled()) return
|
|
17
|
+
if (data) {
|
|
18
|
+
console.log(`[${prefix}] ${message}`, data)
|
|
19
|
+
} else {
|
|
20
|
+
console.log(`[${prefix}] ${message}`)
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Log a warning message if OM_SEARCH_DEBUG is enabled.
|
|
26
|
+
*/
|
|
27
|
+
export function searchDebugWarn(prefix: string, message: string, data?: Record<string, unknown>): void {
|
|
28
|
+
if (!isSearchDebugEnabled()) return
|
|
29
|
+
if (data) {
|
|
30
|
+
console.warn(`[${prefix}] ${message}`, data)
|
|
31
|
+
} else {
|
|
32
|
+
console.warn(`[${prefix}] ${message}`)
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Log an error message (always logs, not gated by debug flag).
|
|
38
|
+
* Errors should always be visible for troubleshooting.
|
|
39
|
+
*/
|
|
40
|
+
export function searchError(prefix: string, message: string, data?: Record<string, unknown>): void {
|
|
41
|
+
if (data) {
|
|
42
|
+
console.error(`[${prefix}] ${message}`, data)
|
|
43
|
+
} else {
|
|
44
|
+
console.error(`[${prefix}] ${message}`)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import type { SearchResultPresenter } from '@open-mercato/shared/modules/search'
|
|
2
|
+
|
|
3
|
+
// Fields to check for title, in priority order
|
|
4
|
+
const TITLE_FIELDS = [
|
|
5
|
+
'display_name', 'displayName',
|
|
6
|
+
'name', 'title', 'label',
|
|
7
|
+
'full_name', 'fullName',
|
|
8
|
+
'brand_name', 'brandName',
|
|
9
|
+
'legal_name', 'legalName',
|
|
10
|
+
'first_name', 'firstName',
|
|
11
|
+
'last_name', 'lastName',
|
|
12
|
+
'preferred_name', 'preferredName',
|
|
13
|
+
'email', 'primary_email', 'primaryEmail',
|
|
14
|
+
'code', 'sku', 'reference',
|
|
15
|
+
'identifier', 'slug',
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
// Fields to check for subtitle
|
|
19
|
+
const SUBTITLE_FIELDS = [
|
|
20
|
+
'description', 'summary', 'notes',
|
|
21
|
+
'email', 'primary_email', 'primaryEmail',
|
|
22
|
+
'phone', 'primary_phone', 'primaryPhone',
|
|
23
|
+
'status', 'type', 'kind', 'category',
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
function findFirstValue(doc: Record<string, unknown>, fields: string[]): string | null {
|
|
27
|
+
for (const field of fields) {
|
|
28
|
+
const value = doc[field]
|
|
29
|
+
if (value != null && String(value).trim().length > 0) {
|
|
30
|
+
return String(value).trim()
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return null
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function findAnyStringValue(doc: Record<string, unknown>, excludeFields: Set<string>): string | null {
|
|
37
|
+
// Skip these fields as they're not meaningful for display
|
|
38
|
+
const skipFields = new Set([
|
|
39
|
+
'id', 'tenant_id', 'tenantId', 'organization_id', 'organizationId',
|
|
40
|
+
'created_at', 'createdAt', 'updated_at', 'updatedAt', 'deleted_at', 'deletedAt',
|
|
41
|
+
...excludeFields,
|
|
42
|
+
])
|
|
43
|
+
|
|
44
|
+
for (const [key, value] of Object.entries(doc)) {
|
|
45
|
+
if (skipFields.has(key)) continue
|
|
46
|
+
if (key.startsWith('cf:') || key.startsWith('cf_')) continue
|
|
47
|
+
if (typeof value === 'string' && value.trim().length > 0 && value.length < 200) {
|
|
48
|
+
return value.trim()
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return null
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function formatEntityLabel(entityId: string): string {
|
|
55
|
+
const entityName = entityId.split(':')[1] ?? entityId
|
|
56
|
+
return entityName
|
|
57
|
+
.replace(/_/g, ' ')
|
|
58
|
+
.replace(/\b\w/g, (c) => c.toUpperCase())
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Extract a presenter from doc fields when no search.ts config exists.
|
|
63
|
+
*
|
|
64
|
+
* TODO: This is a basic implementation. Future improvements could include:
|
|
65
|
+
* - Entity-type specific field mappings
|
|
66
|
+
* - Smarter field combination (e.g., first_name + last_name)
|
|
67
|
+
* - Custom field (cf:*) inspection for user-defined display fields
|
|
68
|
+
* - Configuration for default presenter fields per entity type
|
|
69
|
+
*/
|
|
70
|
+
export function extractFallbackPresenter(
|
|
71
|
+
doc: Record<string, unknown>,
|
|
72
|
+
entityId: string,
|
|
73
|
+
recordId: string,
|
|
74
|
+
): SearchResultPresenter {
|
|
75
|
+
const entityLabel = formatEntityLabel(entityId)
|
|
76
|
+
|
|
77
|
+
// 1. Try common title fields
|
|
78
|
+
let title = findFirstValue(doc, TITLE_FIELDS)
|
|
79
|
+
|
|
80
|
+
// 2. If no title found, try any string field
|
|
81
|
+
if (!title) {
|
|
82
|
+
title = findAnyStringValue(doc, new Set(SUBTITLE_FIELDS))
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// 3. Last resort: use entity label + truncated record ID
|
|
86
|
+
if (!title) {
|
|
87
|
+
const shortId = recordId.length > 8 ? recordId.slice(0, 8) + '...' : recordId
|
|
88
|
+
title = `${entityLabel} ${shortId}`
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Build subtitle from multiple relevant fields to show more context
|
|
92
|
+
const subtitleParts: string[] = []
|
|
93
|
+
for (const field of SUBTITLE_FIELDS) {
|
|
94
|
+
const value = doc[field]
|
|
95
|
+
if (value != null && String(value).trim().length > 0 && String(value) !== title) {
|
|
96
|
+
subtitleParts.push(String(value).trim())
|
|
97
|
+
if (subtitleParts.length >= 3) break // Limit to 3 parts
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
title,
|
|
103
|
+
subtitle: subtitleParts.length > 0 ? subtitleParts.join(' · ').slice(0, 120) : undefined,
|
|
104
|
+
badge: entityLabel,
|
|
105
|
+
}
|
|
106
|
+
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import type { SearchFieldPolicy } from '../types'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Encryption map entry as stored in the database.
|
|
5
|
+
* Matches the structure from entities/data/entities EncryptionMap.
|
|
6
|
+
*/
|
|
7
|
+
export type EncryptionMapEntry = {
|
|
8
|
+
field: string
|
|
9
|
+
hashField?: string | null
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Configuration for field extraction.
|
|
14
|
+
*/
|
|
15
|
+
export type FieldExtractionConfig = {
|
|
16
|
+
/** Encryption map entries from the database */
|
|
17
|
+
encryptedFields?: EncryptionMapEntry[]
|
|
18
|
+
/** Additional field policy from entity search config */
|
|
19
|
+
fieldPolicy?: SearchFieldPolicy
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extract only searchable (non-sensitive) fields from a record.
|
|
24
|
+
* This ensures encrypted and sensitive fields are never sent to external search providers.
|
|
25
|
+
*
|
|
26
|
+
* Field filtering logic:
|
|
27
|
+
* 1. Exclude fields in encryption map (they contain encrypted data)
|
|
28
|
+
* 2. Exclude fields in fieldPolicy.excluded
|
|
29
|
+
* 3. Exclude fields in fieldPolicy.hashOnly (should only use hash-based search)
|
|
30
|
+
* 4. If fieldPolicy.searchable is defined, only include those fields (whitelist mode)
|
|
31
|
+
*
|
|
32
|
+
* @param fields - All fields from the record
|
|
33
|
+
* @param config - Extraction configuration with encryption map and field policy
|
|
34
|
+
* @returns Object containing only safe-to-index fields
|
|
35
|
+
*/
|
|
36
|
+
export function extractSearchableFields(
|
|
37
|
+
fields: Record<string, unknown>,
|
|
38
|
+
config?: FieldExtractionConfig,
|
|
39
|
+
): Record<string, unknown> {
|
|
40
|
+
const encryptedFieldSet = new Set<string>(
|
|
41
|
+
config?.encryptedFields?.map((e) => e.field) ?? [],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
const policy = config?.fieldPolicy
|
|
45
|
+
const searchableWhitelist = policy?.searchable ? new Set(policy.searchable) : null
|
|
46
|
+
const excludedBlacklist = new Set([
|
|
47
|
+
...(policy?.excluded ?? []),
|
|
48
|
+
...(policy?.hashOnly ?? []),
|
|
49
|
+
])
|
|
50
|
+
|
|
51
|
+
const result: Record<string, unknown> = {}
|
|
52
|
+
|
|
53
|
+
for (const [field, value] of Object.entries(fields)) {
|
|
54
|
+
// Skip null/undefined values
|
|
55
|
+
if (value == null) continue
|
|
56
|
+
|
|
57
|
+
// Skip encrypted fields
|
|
58
|
+
if (encryptedFieldSet.has(field)) continue
|
|
59
|
+
|
|
60
|
+
// Skip explicitly excluded fields
|
|
61
|
+
if (excludedBlacklist.has(field)) continue
|
|
62
|
+
|
|
63
|
+
// If whitelist is defined, only include whitelisted fields
|
|
64
|
+
if (searchableWhitelist && !searchableWhitelist.has(field)) continue
|
|
65
|
+
|
|
66
|
+
result[field] = value
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return result
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Extract fields that should use hash-based search only.
|
|
74
|
+
* These are typically encrypted fields that have corresponding hash columns.
|
|
75
|
+
*
|
|
76
|
+
* @param fields - All fields from the record
|
|
77
|
+
* @param config - Extraction configuration with encryption map and field policy
|
|
78
|
+
* @returns Object containing field values for hash-based search
|
|
79
|
+
*/
|
|
80
|
+
export function extractHashOnlyFields(
|
|
81
|
+
fields: Record<string, unknown>,
|
|
82
|
+
config?: FieldExtractionConfig,
|
|
83
|
+
): Record<string, unknown> {
|
|
84
|
+
const hashOnlyFromPolicy = new Set(config?.fieldPolicy?.hashOnly ?? [])
|
|
85
|
+
|
|
86
|
+
// Fields with hashField in encryption map are also hash-searchable
|
|
87
|
+
const hashFieldsFromEncryption = new Set<string>(
|
|
88
|
+
config?.encryptedFields
|
|
89
|
+
?.filter((e) => e.hashField)
|
|
90
|
+
.map((e) => e.field) ?? [],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
const result: Record<string, unknown> = {}
|
|
94
|
+
|
|
95
|
+
for (const [field, value] of Object.entries(fields)) {
|
|
96
|
+
if (value == null) continue
|
|
97
|
+
|
|
98
|
+
if (hashOnlyFromPolicy.has(field) || hashFieldsFromEncryption.has(field)) {
|
|
99
|
+
result[field] = value
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return result
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Build a complete field classification for a record.
|
|
108
|
+
* Useful for debugging and understanding how fields will be indexed.
|
|
109
|
+
*
|
|
110
|
+
* @param fields - All fields from the record
|
|
111
|
+
* @param config - Extraction configuration
|
|
112
|
+
* @returns Classification of each field
|
|
113
|
+
*/
|
|
114
|
+
export function classifyFields(
|
|
115
|
+
fields: Record<string, unknown>,
|
|
116
|
+
config?: FieldExtractionConfig,
|
|
117
|
+
): {
|
|
118
|
+
searchable: string[]
|
|
119
|
+
hashOnly: string[]
|
|
120
|
+
excluded: string[]
|
|
121
|
+
} {
|
|
122
|
+
const searchable: string[] = []
|
|
123
|
+
const hashOnly: string[] = []
|
|
124
|
+
const excluded: string[] = []
|
|
125
|
+
|
|
126
|
+
const encryptedFieldSet = new Set<string>(
|
|
127
|
+
config?.encryptedFields?.map((e) => e.field) ?? [],
|
|
128
|
+
)
|
|
129
|
+
const hashFieldsFromEncryption = new Set<string>(
|
|
130
|
+
config?.encryptedFields
|
|
131
|
+
?.filter((e) => e.hashField)
|
|
132
|
+
.map((e) => e.field) ?? [],
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
const policy = config?.fieldPolicy
|
|
136
|
+
const searchableWhitelist = policy?.searchable ? new Set(policy.searchable) : null
|
|
137
|
+
const hashOnlyFromPolicy = new Set(policy?.hashOnly ?? [])
|
|
138
|
+
const excludedFromPolicy = new Set(policy?.excluded ?? [])
|
|
139
|
+
|
|
140
|
+
for (const field of Object.keys(fields)) {
|
|
141
|
+
// Check explicit exclusions
|
|
142
|
+
if (excludedFromPolicy.has(field)) {
|
|
143
|
+
excluded.push(field)
|
|
144
|
+
continue
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Check hash-only
|
|
148
|
+
if (hashOnlyFromPolicy.has(field) || hashFieldsFromEncryption.has(field)) {
|
|
149
|
+
hashOnly.push(field)
|
|
150
|
+
continue
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Check encrypted (without hash)
|
|
154
|
+
if (encryptedFieldSet.has(field) && !hashFieldsFromEncryption.has(field)) {
|
|
155
|
+
excluded.push(field)
|
|
156
|
+
continue
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Check whitelist if defined
|
|
160
|
+
if (searchableWhitelist && !searchableWhitelist.has(field)) {
|
|
161
|
+
excluded.push(field)
|
|
162
|
+
continue
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
searchable.push(field)
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return { searchable, hashOnly, excluded }
|
|
169
|
+
}
|
package/src/lib/index.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export {
|
|
2
|
+
mergeAndRankResults,
|
|
3
|
+
deduplicateResults,
|
|
4
|
+
normalizeScores,
|
|
5
|
+
} from './merger'
|
|
6
|
+
|
|
7
|
+
export {
|
|
8
|
+
extractSearchableFields,
|
|
9
|
+
extractHashOnlyFields,
|
|
10
|
+
classifyFields,
|
|
11
|
+
type EncryptionMapEntry,
|
|
12
|
+
type FieldExtractionConfig,
|
|
13
|
+
} from './field-policy'
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import type { SearchResult, ResultMergeConfig, SearchStrategyId } from '../types'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Default RRF constant (k=60 is standard in literature).
|
|
5
|
+
* Higher values reduce the influence of ranking position.
|
|
6
|
+
*/
|
|
7
|
+
const RRF_K = 60
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Reciprocal Rank Fusion (RRF) algorithm for combining results from multiple search strategies.
|
|
11
|
+
*
|
|
12
|
+
* RRF is a simple but effective method for combining ranked lists. For each result,
|
|
13
|
+
* it computes: score = sum(weight / (k + rank)) across all lists containing that result.
|
|
14
|
+
*
|
|
15
|
+
* Reference: Cormack, G.V., Clarke, C.L.A., & Buettcher, S. (2009).
|
|
16
|
+
* "Reciprocal rank fusion outperforms condorcet and individual rank learning methods"
|
|
17
|
+
* https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
|
|
18
|
+
*
|
|
19
|
+
* @param results - Array of search results from multiple strategies
|
|
20
|
+
* @param config - Merge configuration with weights and thresholds
|
|
21
|
+
* @returns Merged and ranked results
|
|
22
|
+
*/
|
|
23
|
+
export function mergeAndRankResults(
|
|
24
|
+
results: SearchResult[],
|
|
25
|
+
config: ResultMergeConfig,
|
|
26
|
+
): SearchResult[] {
|
|
27
|
+
if (results.length === 0) return []
|
|
28
|
+
|
|
29
|
+
// Group results by source strategy for rank calculation
|
|
30
|
+
const bySource = new Map<SearchStrategyId, SearchResult[]>()
|
|
31
|
+
for (const result of results) {
|
|
32
|
+
const list = bySource.get(result.source) ?? []
|
|
33
|
+
list.push(result)
|
|
34
|
+
bySource.set(result.source, list)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Track seen results with their RRF scores
|
|
38
|
+
// bestContribution tracks the highest single RRF contribution for the kept result object
|
|
39
|
+
const seen = new Map<string, { result: SearchResult; rrf: number; sources: Set<SearchStrategyId>; bestContribution: number }>()
|
|
40
|
+
|
|
41
|
+
// Calculate RRF score for each result
|
|
42
|
+
for (const [source, sourceResults] of bySource) {
|
|
43
|
+
const weight = config.strategyWeights?.[source] ?? 1.0
|
|
44
|
+
|
|
45
|
+
for (let rank = 0; rank < sourceResults.length; rank++) {
|
|
46
|
+
const result = sourceResults[rank]
|
|
47
|
+
const key = `${result.entityId}:${result.recordId}`
|
|
48
|
+
const rrfScore = weight / (RRF_K + rank + 1)
|
|
49
|
+
|
|
50
|
+
const existing = seen.get(key)
|
|
51
|
+
if (existing) {
|
|
52
|
+
// Combine RRF scores for duplicates found in multiple strategies
|
|
53
|
+
existing.rrf += rrfScore
|
|
54
|
+
existing.sources.add(source)
|
|
55
|
+
|
|
56
|
+
// Merge presenter data - prefer result that has it
|
|
57
|
+
// This ensures token results get enriched with presenter from meilisearch/vector
|
|
58
|
+
const hasExistingPresenter = existing.result.presenter?.title != null
|
|
59
|
+
const hasNewPresenter = result.presenter?.title != null
|
|
60
|
+
|
|
61
|
+
if (!hasExistingPresenter && hasNewPresenter) {
|
|
62
|
+
// Current result has no presenter, new one does - take new one's presenter
|
|
63
|
+
existing.result = {
|
|
64
|
+
...existing.result,
|
|
65
|
+
presenter: result.presenter,
|
|
66
|
+
url: existing.result.url ?? result.url,
|
|
67
|
+
links: existing.result.links ?? result.links,
|
|
68
|
+
}
|
|
69
|
+
existing.bestContribution = Math.max(existing.bestContribution, rrfScore)
|
|
70
|
+
} else if (hasExistingPresenter && hasNewPresenter && rrfScore > existing.bestContribution) {
|
|
71
|
+
// Both have presenter, keep the one with better RRF contribution (not raw score)
|
|
72
|
+
existing.result = { ...result }
|
|
73
|
+
existing.bestContribution = rrfScore
|
|
74
|
+
} else if (!hasExistingPresenter && !hasNewPresenter && rrfScore > existing.bestContribution) {
|
|
75
|
+
// Neither has presenter, keep result with better RRF contribution
|
|
76
|
+
existing.result = { ...result }
|
|
77
|
+
existing.bestContribution = rrfScore
|
|
78
|
+
}
|
|
79
|
+
// If existing has presenter and new doesn't, keep existing (do nothing)
|
|
80
|
+
} else {
|
|
81
|
+
seen.set(key, {
|
|
82
|
+
result: { ...result },
|
|
83
|
+
rrf: rrfScore,
|
|
84
|
+
sources: new Set([source]),
|
|
85
|
+
bestContribution: rrfScore,
|
|
86
|
+
})
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Convert to array with final RRF scores
|
|
92
|
+
let merged = Array.from(seen.values()).map(({ result, rrf, sources }) => ({
|
|
93
|
+
...result,
|
|
94
|
+
score: rrf,
|
|
95
|
+
metadata: {
|
|
96
|
+
...result.metadata,
|
|
97
|
+
_sources: Array.from(sources),
|
|
98
|
+
_rrfScore: rrf,
|
|
99
|
+
},
|
|
100
|
+
}))
|
|
101
|
+
|
|
102
|
+
// Apply minimum score threshold
|
|
103
|
+
if (config.minScore != null) {
|
|
104
|
+
merged = merged.filter((r) => r.score >= config.minScore!)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Sort by RRF score descending
|
|
108
|
+
merged.sort((a, b) => b.score - a.score)
|
|
109
|
+
|
|
110
|
+
return merged
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Simple deduplication without RRF scoring.
|
|
115
|
+
* Keeps the highest-scored result for each entity+record pair.
|
|
116
|
+
*
|
|
117
|
+
* @param results - Array of search results
|
|
118
|
+
* @returns Deduplicated results sorted by score
|
|
119
|
+
*/
|
|
120
|
+
export function deduplicateResults(results: SearchResult[]): SearchResult[] {
|
|
121
|
+
const seen = new Map<string, SearchResult>()
|
|
122
|
+
|
|
123
|
+
for (const result of results) {
|
|
124
|
+
const key = `${result.entityId}:${result.recordId}`
|
|
125
|
+
const existing = seen.get(key)
|
|
126
|
+
|
|
127
|
+
if (!existing || result.score > existing.score) {
|
|
128
|
+
seen.set(key, result)
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return Array.from(seen.values()).sort((a, b) => b.score - a.score)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Normalize scores to 0-1 range using min-max normalization.
|
|
137
|
+
* Useful when combining strategies with different score scales.
|
|
138
|
+
*
|
|
139
|
+
* @param results - Array of search results
|
|
140
|
+
* @returns Results with normalized scores
|
|
141
|
+
*/
|
|
142
|
+
export function normalizeScores(results: SearchResult[]): SearchResult[] {
|
|
143
|
+
if (results.length === 0) return []
|
|
144
|
+
|
|
145
|
+
const scores = results.map((r) => r.score)
|
|
146
|
+
const minScore = Math.min(...scores)
|
|
147
|
+
const maxScore = Math.max(...scores)
|
|
148
|
+
const range = maxScore - minScore
|
|
149
|
+
|
|
150
|
+
if (range === 0) {
|
|
151
|
+
// All scores are the same, normalize to 1.0
|
|
152
|
+
return results.map((r) => ({ ...r, score: 1.0 }))
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return results.map((r) => ({
|
|
156
|
+
...r,
|
|
157
|
+
score: (r.score - minScore) / range,
|
|
158
|
+
}))
|
|
159
|
+
}
|