@open-mercato/search 0.4.2-canary-c02407ff85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +678 -0
- package/build.mjs +92 -0
- package/dist/di.js +157 -0
- package/dist/di.js.map +7 -0
- package/dist/fulltext/drivers/index.js +21 -0
- package/dist/fulltext/drivers/index.js.map +7 -0
- package/dist/fulltext/drivers/meilisearch/index.js +320 -0
- package/dist/fulltext/drivers/meilisearch/index.js.map +7 -0
- package/dist/fulltext/index.js +7 -0
- package/dist/fulltext/index.js.map +7 -0
- package/dist/fulltext/types.js +1 -0
- package/dist/fulltext/types.js.map +7 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +7 -0
- package/dist/indexer/index.js +8 -0
- package/dist/indexer/index.js.map +7 -0
- package/dist/indexer/search-indexer.js +848 -0
- package/dist/indexer/search-indexer.js.map +7 -0
- package/dist/indexer/subscribers/delete.js +41 -0
- package/dist/indexer/subscribers/delete.js.map +7 -0
- package/dist/lib/debug.js +34 -0
- package/dist/lib/debug.js.map +7 -0
- package/dist/lib/fallback-presenter.js +107 -0
- package/dist/lib/fallback-presenter.js.map +7 -0
- package/dist/lib/field-policy.js +75 -0
- package/dist/lib/field-policy.js.map +7 -0
- package/dist/lib/index.js +19 -0
- package/dist/lib/index.js.map +7 -0
- package/dist/lib/merger.js +93 -0
- package/dist/lib/merger.js.map +7 -0
- package/dist/lib/presenter-enricher.js +192 -0
- package/dist/lib/presenter-enricher.js.map +7 -0
- package/dist/modules/search/acl.js +14 -0
- package/dist/modules/search/acl.js.map +7 -0
- package/dist/modules/search/ai-tools.js +284 -0
- package/dist/modules/search/ai-tools.js.map +7 -0
- package/dist/modules/search/api/embeddings/reindex/cancel/route.js +65 -0
- package/dist/modules/search/api/embeddings/reindex/cancel/route.js.map +7 -0
- package/dist/modules/search/api/embeddings/reindex/route.js +165 -0
- package/dist/modules/search/api/embeddings/reindex/route.js.map +7 -0
- package/dist/modules/search/api/embeddings/route.js +246 -0
- package/dist/modules/search/api/embeddings/route.js.map +7 -0
- package/dist/modules/search/api/index/route.js +245 -0
- package/dist/modules/search/api/index/route.js.map +7 -0
- package/dist/modules/search/api/reindex/cancel/route.js +65 -0
- package/dist/modules/search/api/reindex/cancel/route.js.map +7 -0
- package/dist/modules/search/api/reindex/route.js +332 -0
- package/dist/modules/search/api/reindex/route.js.map +7 -0
- package/dist/modules/search/api/search/global/route.js +100 -0
- package/dist/modules/search/api/search/global/route.js.map +7 -0
- package/dist/modules/search/api/search/route.js +101 -0
- package/dist/modules/search/api/search/route.js.map +7 -0
- package/dist/modules/search/api/settings/fulltext/route.js +55 -0
- package/dist/modules/search/api/settings/fulltext/route.js.map +7 -0
- package/dist/modules/search/api/settings/global-search/route.js +80 -0
- package/dist/modules/search/api/settings/global-search/route.js.map +7 -0
- package/dist/modules/search/api/settings/route.js +118 -0
- package/dist/modules/search/api/settings/route.js.map +7 -0
- package/dist/modules/search/api/settings/vector-store/route.js +77 -0
- package/dist/modules/search/api/settings/vector-store/route.js.map +7 -0
- package/dist/modules/search/backend/config/search/page.js +10 -0
- package/dist/modules/search/backend/config/search/page.js.map +7 -0
- package/dist/modules/search/backend/config/search/page.meta.js +24 -0
- package/dist/modules/search/backend/config/search/page.meta.js.map +7 -0
- package/dist/modules/search/cli.js +698 -0
- package/dist/modules/search/cli.js.map +7 -0
- package/dist/modules/search/di.js +32 -0
- package/dist/modules/search/di.js.map +7 -0
- package/dist/modules/search/frontend/components/GlobalSearchDialog.js +357 -0
- package/dist/modules/search/frontend/components/GlobalSearchDialog.js.map +7 -0
- package/dist/modules/search/frontend/components/HybridSearchTable.js +343 -0
- package/dist/modules/search/frontend/components/HybridSearchTable.js.map +7 -0
- package/dist/modules/search/frontend/components/SearchSettingsPageClient.js +303 -0
- package/dist/modules/search/frontend/components/SearchSettingsPageClient.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js +360 -0
- package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js +101 -0
- package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/VectorSearchSection.js +608 -0
- package/dist/modules/search/frontend/components/sections/VectorSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/index.js +9 -0
- package/dist/modules/search/frontend/index.js.map +7 -0
- package/dist/modules/search/frontend/utils.js +41 -0
- package/dist/modules/search/frontend/utils.js.map +7 -0
- package/dist/modules/search/i18n/de.json +61 -0
- package/dist/modules/search/i18n/en.json +72 -0
- package/dist/modules/search/i18n/es.json +61 -0
- package/dist/modules/search/i18n/pl.json +61 -0
- package/dist/modules/search/index.js +11 -0
- package/dist/modules/search/index.js.map +7 -0
- package/dist/modules/search/lib/auto-indexing.js +29 -0
- package/dist/modules/search/lib/auto-indexing.js.map +7 -0
- package/dist/modules/search/lib/embedding-config.js +131 -0
- package/dist/modules/search/lib/embedding-config.js.map +7 -0
- package/dist/modules/search/lib/global-search-config.js +45 -0
- package/dist/modules/search/lib/global-search-config.js.map +7 -0
- package/dist/modules/search/lib/reindex-lock.js +99 -0
- package/dist/modules/search/lib/reindex-lock.js.map +7 -0
- package/dist/modules/search/subscribers/fulltext_upsert.js +64 -0
- package/dist/modules/search/subscribers/fulltext_upsert.js.map +7 -0
- package/dist/modules/search/subscribers/vector_delete.js +58 -0
- package/dist/modules/search/subscribers/vector_delete.js.map +7 -0
- package/dist/modules/search/subscribers/vector_purge.js +142 -0
- package/dist/modules/search/subscribers/vector_purge.js.map +7 -0
- package/dist/modules/search/subscribers/vector_upsert.js +58 -0
- package/dist/modules/search/subscribers/vector_upsert.js.map +7 -0
- package/dist/modules/search/workers/fulltext-index.worker.js +240 -0
- package/dist/modules/search/workers/fulltext-index.worker.js.map +7 -0
- package/dist/modules/search/workers/vector-index.worker.js +234 -0
- package/dist/modules/search/workers/vector-index.worker.js.map +7 -0
- package/dist/queue/fulltext-indexing.js +15 -0
- package/dist/queue/fulltext-indexing.js.map +7 -0
- package/dist/queue/index.js +3 -0
- package/dist/queue/index.js.map +7 -0
- package/dist/queue/vector-indexing.js +15 -0
- package/dist/queue/vector-indexing.js.map +7 -0
- package/dist/service.js +286 -0
- package/dist/service.js.map +7 -0
- package/dist/strategies/fulltext.strategy.js +116 -0
- package/dist/strategies/fulltext.strategy.js.map +7 -0
- package/dist/strategies/index.js +12 -0
- package/dist/strategies/index.js.map +7 -0
- package/dist/strategies/token.strategy.js +80 -0
- package/dist/strategies/token.strategy.js.map +7 -0
- package/dist/strategies/vector.strategy.js +137 -0
- package/dist/strategies/vector.strategy.js.map +7 -0
- package/dist/types.js +1 -0
- package/dist/types.js.map +7 -0
- package/dist/vector/drivers/chromadb/index.js +44 -0
- package/dist/vector/drivers/chromadb/index.js.map +7 -0
- package/dist/vector/drivers/index.js +9 -0
- package/dist/vector/drivers/index.js.map +7 -0
- package/dist/vector/drivers/pgvector/index.js +509 -0
- package/dist/vector/drivers/pgvector/index.js.map +7 -0
- package/dist/vector/drivers/qdrant/index.js +44 -0
- package/dist/vector/drivers/qdrant/index.js.map +7 -0
- package/dist/vector/index.js +4 -0
- package/dist/vector/index.js.map +7 -0
- package/dist/vector/lib/vector-logs.js +33 -0
- package/dist/vector/lib/vector-logs.js.map +7 -0
- package/dist/vector/services/checksum.js +20 -0
- package/dist/vector/services/checksum.js.map +7 -0
- package/dist/vector/services/embedding.js +222 -0
- package/dist/vector/services/embedding.js.map +7 -0
- package/dist/vector/services/index.js +4 -0
- package/dist/vector/services/index.js.map +7 -0
- package/dist/vector/services/vector-index.service.js +960 -0
- package/dist/vector/services/vector-index.service.js.map +7 -0
- package/dist/vector/types/pg.d.js +1 -0
- package/dist/vector/types/pg.d.js.map +7 -0
- package/dist/vector/types.js +75 -0
- package/dist/vector/types.js.map +7 -0
- package/jest.config.cjs +19 -0
- package/package.json +142 -0
- package/src/__tests__/queue.test.ts +148 -0
- package/src/__tests__/service.test.ts +345 -0
- package/src/__tests__/workers.test.ts +319 -0
- package/src/di.ts +291 -0
- package/src/fulltext/drivers/index.ts +41 -0
- package/src/fulltext/drivers/meilisearch/index.ts +410 -0
- package/src/fulltext/index.ts +13 -0
- package/src/fulltext/types.ts +115 -0
- package/src/index.ts +36 -0
- package/src/indexer/index.ts +13 -0
- package/src/indexer/search-indexer.ts +1141 -0
- package/src/indexer/subscribers/delete.ts +49 -0
- package/src/lib/debug.ts +46 -0
- package/src/lib/fallback-presenter.ts +106 -0
- package/src/lib/field-policy.ts +169 -0
- package/src/lib/index.ts +13 -0
- package/src/lib/merger.ts +159 -0
- package/src/lib/presenter-enricher.ts +323 -0
- package/src/modules/search/README.md +694 -0
- package/src/modules/search/acl.ts +10 -0
- package/src/modules/search/ai-tools.ts +467 -0
- package/src/modules/search/api/embeddings/reindex/cancel/route.ts +77 -0
- package/src/modules/search/api/embeddings/reindex/route.ts +197 -0
- package/src/modules/search/api/embeddings/route.ts +304 -0
- package/src/modules/search/api/index/route.ts +297 -0
- package/src/modules/search/api/reindex/cancel/route.ts +77 -0
- package/src/modules/search/api/reindex/route.ts +419 -0
- package/src/modules/search/api/search/global/route.ts +120 -0
- package/src/modules/search/api/search/route.ts +121 -0
- package/src/modules/search/api/settings/fulltext/route.ts +82 -0
- package/src/modules/search/api/settings/global-search/route.ts +91 -0
- package/src/modules/search/api/settings/route.ts +187 -0
- package/src/modules/search/api/settings/vector-store/route.ts +105 -0
- package/src/modules/search/backend/config/search/page.meta.ts +22 -0
- package/src/modules/search/backend/config/search/page.tsx +12 -0
- package/src/modules/search/cli.ts +818 -0
- package/src/modules/search/di.ts +50 -0
- package/src/modules/search/frontend/components/GlobalSearchDialog.tsx +436 -0
- package/src/modules/search/frontend/components/HybridSearchTable.tsx +418 -0
- package/src/modules/search/frontend/components/SearchSettingsPageClient.tsx +476 -0
- package/src/modules/search/frontend/components/sections/FulltextSearchSection.tsx +624 -0
- package/src/modules/search/frontend/components/sections/GlobalSearchSection.tsx +124 -0
- package/src/modules/search/frontend/components/sections/VectorSearchSection.tsx +943 -0
- package/src/modules/search/frontend/index.ts +3 -0
- package/src/modules/search/frontend/utils.ts +82 -0
- package/src/modules/search/i18n/de.json +61 -0
- package/src/modules/search/i18n/en.json +72 -0
- package/src/modules/search/i18n/es.json +61 -0
- package/src/modules/search/i18n/pl.json +61 -0
- package/src/modules/search/index.ts +9 -0
- package/src/modules/search/lib/auto-indexing.ts +35 -0
- package/src/modules/search/lib/embedding-config.ts +161 -0
- package/src/modules/search/lib/global-search-config.ts +69 -0
- package/src/modules/search/lib/reindex-lock.ts +201 -0
- package/src/modules/search/subscribers/fulltext_upsert.ts +83 -0
- package/src/modules/search/subscribers/vector_delete.ts +75 -0
- package/src/modules/search/subscribers/vector_purge.ts +161 -0
- package/src/modules/search/subscribers/vector_upsert.ts +75 -0
- package/src/modules/search/workers/fulltext-index.worker.ts +318 -0
- package/src/modules/search/workers/vector-index.worker.ts +292 -0
- package/src/queue/fulltext-indexing.ts +87 -0
- package/src/queue/index.ts +2 -0
- package/src/queue/vector-indexing.ts +66 -0
- package/src/service.ts +397 -0
- package/src/strategies/fulltext.strategy.ts +155 -0
- package/src/strategies/index.ts +17 -0
- package/src/strategies/token.strategy.ts +153 -0
- package/src/strategies/vector.strategy.ts +234 -0
- package/src/types.ts +38 -0
- package/src/vector/drivers/chromadb/index.ts +49 -0
- package/src/vector/drivers/index.ts +4 -0
- package/src/vector/drivers/pgvector/index.ts +627 -0
- package/src/vector/drivers/qdrant/index.ts +49 -0
- package/src/vector/index.ts +3 -0
- package/src/vector/lib/vector-logs.ts +46 -0
- package/src/vector/services/checksum.ts +18 -0
- package/src/vector/services/embedding.ts +275 -0
- package/src/vector/services/index.ts +3 -0
- package/src/vector/services/vector-index.service.ts +1234 -0
- package/src/vector/types/pg.d.ts +1 -0
- package/src/vector/types.ts +220 -0
- package/tsconfig.json +9 -0
- package/watch.mjs +6 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import type { Knex } from 'knex'
|
|
2
|
+
import type {
|
|
3
|
+
SearchStrategy,
|
|
4
|
+
SearchStrategyId,
|
|
5
|
+
SearchOptions,
|
|
6
|
+
SearchResult,
|
|
7
|
+
IndexableRecord,
|
|
8
|
+
} from '../types'
|
|
9
|
+
import type { EntityId } from '@open-mercato/shared/modules/entities'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Configuration for TokenSearchStrategy.
|
|
13
|
+
*/
|
|
14
|
+
export type TokenStrategyConfig = {
|
|
15
|
+
/** Minimum number of query tokens that must match (0-1 ratio, default 0.5) */
|
|
16
|
+
minMatchRatio?: number
|
|
17
|
+
/** Default limit for search results */
|
|
18
|
+
defaultLimit?: number
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* TokenSearchStrategy provides hash-based search using the existing search_tokens table.
|
|
23
|
+
* This strategy is always available and serves as a fallback when other strategies fail.
|
|
24
|
+
*
|
|
25
|
+
* It tokenizes queries into hashes and matches against pre-indexed token hashes,
|
|
26
|
+
* enabling search on encrypted fields without exposing plaintext to external services.
|
|
27
|
+
*/
|
|
28
|
+
export class TokenSearchStrategy implements SearchStrategy {
|
|
29
|
+
readonly id: SearchStrategyId = 'tokens'
|
|
30
|
+
readonly name = 'Token Search'
|
|
31
|
+
readonly priority = 10 // Lowest priority, always available as fallback
|
|
32
|
+
|
|
33
|
+
private readonly minMatchRatio: number
|
|
34
|
+
private readonly defaultLimit: number
|
|
35
|
+
|
|
36
|
+
constructor(
|
|
37
|
+
private readonly knex: Knex,
|
|
38
|
+
config?: TokenStrategyConfig,
|
|
39
|
+
) {
|
|
40
|
+
this.minMatchRatio = config?.minMatchRatio ?? 0.5
|
|
41
|
+
this.defaultLimit = config?.defaultLimit ?? 50
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async isAvailable(): Promise<boolean> {
|
|
45
|
+
return true // Always available
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async ensureReady(): Promise<void> {
|
|
49
|
+
// No initialization needed
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async search(query: string, options: SearchOptions): Promise<SearchResult[]> {
|
|
53
|
+
// Dynamically import tokenization to avoid circular dependencies
|
|
54
|
+
const { tokenizeText } = await import('@open-mercato/shared/lib/search/tokenize')
|
|
55
|
+
const { resolveSearchConfig } = await import('@open-mercato/shared/lib/search/config')
|
|
56
|
+
|
|
57
|
+
const config = resolveSearchConfig()
|
|
58
|
+
if (!config.enabled) return []
|
|
59
|
+
|
|
60
|
+
const { hashes } = tokenizeText(query, config)
|
|
61
|
+
if (hashes.length === 0) return []
|
|
62
|
+
|
|
63
|
+
const minMatches = Math.max(1, Math.ceil(hashes.length * this.minMatchRatio))
|
|
64
|
+
const limit = options.limit ?? this.defaultLimit
|
|
65
|
+
|
|
66
|
+
let queryBuilder = this.knex('search_tokens')
|
|
67
|
+
.select('entity_type', 'entity_id')
|
|
68
|
+
.count('* as match_count')
|
|
69
|
+
.whereIn('token_hash', hashes)
|
|
70
|
+
.where('tenant_id', options.tenantId)
|
|
71
|
+
.groupBy('entity_type', 'entity_id')
|
|
72
|
+
.havingRaw('COUNT(DISTINCT token_hash) >= ?', [minMatches])
|
|
73
|
+
.orderByRaw('COUNT(DISTINCT token_hash) DESC')
|
|
74
|
+
.limit(limit)
|
|
75
|
+
|
|
76
|
+
if (options.organizationId) {
|
|
77
|
+
queryBuilder = queryBuilder.where('organization_id', options.organizationId)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (options.entityTypes?.length) {
|
|
81
|
+
queryBuilder = queryBuilder.whereIn('entity_type', options.entityTypes)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const rows = await queryBuilder as Array<{ entity_type: string; entity_id: string; match_count: string | number }>
|
|
85
|
+
|
|
86
|
+
return rows.map((row) => {
|
|
87
|
+
const matchCount = typeof row.match_count === 'string'
|
|
88
|
+
? parseInt(row.match_count, 10)
|
|
89
|
+
: row.match_count
|
|
90
|
+
// Calculate score based on match ratio
|
|
91
|
+
const score = matchCount / hashes.length
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
entityId: row.entity_type as EntityId,
|
|
95
|
+
recordId: row.entity_id,
|
|
96
|
+
score,
|
|
97
|
+
source: this.id,
|
|
98
|
+
}
|
|
99
|
+
})
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async index(record: IndexableRecord): Promise<void> {
|
|
103
|
+
// Dynamically import to avoid circular dependencies
|
|
104
|
+
const { replaceSearchTokensForRecord } = await import(
|
|
105
|
+
'@open-mercato/core/modules/query_index/lib/search-tokens'
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
await replaceSearchTokensForRecord(this.knex, {
|
|
109
|
+
entityType: record.entityId,
|
|
110
|
+
recordId: record.recordId,
|
|
111
|
+
tenantId: record.tenantId,
|
|
112
|
+
organizationId: record.organizationId,
|
|
113
|
+
doc: record.fields,
|
|
114
|
+
})
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async delete(entityId: EntityId, recordId: string, tenantId: string): Promise<void> {
|
|
118
|
+
// Dynamically import to avoid circular dependencies
|
|
119
|
+
const { deleteSearchTokensForRecord } = await import(
|
|
120
|
+
'@open-mercato/core/modules/query_index/lib/search-tokens'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
await deleteSearchTokensForRecord(this.knex, {
|
|
124
|
+
entityType: entityId,
|
|
125
|
+
recordId,
|
|
126
|
+
tenantId,
|
|
127
|
+
})
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
async bulkIndex(records: IndexableRecord[]): Promise<void> {
|
|
131
|
+
if (records.length === 0) return
|
|
132
|
+
|
|
133
|
+
const { replaceSearchTokensForBatch } = await import(
|
|
134
|
+
'@open-mercato/core/modules/query_index/lib/search-tokens'
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
const payloads = records.map((record) => ({
|
|
138
|
+
entityType: record.entityId,
|
|
139
|
+
recordId: record.recordId,
|
|
140
|
+
tenantId: record.tenantId,
|
|
141
|
+
organizationId: record.organizationId,
|
|
142
|
+
doc: record.fields as Record<string, unknown>,
|
|
143
|
+
}))
|
|
144
|
+
|
|
145
|
+
await replaceSearchTokensForBatch(this.knex, payloads)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async purge(entityId: EntityId, tenantId: string): Promise<void> {
|
|
149
|
+
await this.knex('search_tokens')
|
|
150
|
+
.where({ entity_type: entityId, tenant_id: tenantId })
|
|
151
|
+
.del()
|
|
152
|
+
}
|
|
153
|
+
}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import { createHash } from 'crypto'
|
|
2
|
+
import type {
|
|
3
|
+
SearchStrategy,
|
|
4
|
+
SearchStrategyId,
|
|
5
|
+
SearchOptions,
|
|
6
|
+
SearchResult,
|
|
7
|
+
IndexableRecord,
|
|
8
|
+
} from '../types'
|
|
9
|
+
import type { EntityId } from '@open-mercato/shared/modules/entities'
|
|
10
|
+
import type { VectorDriver, VectorDriverDocument } from '../vector/types'
|
|
11
|
+
import { searchDebugWarn } from '../lib/debug'
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Embedding service interface - minimal subset needed by VectorSearchStrategy.
|
|
15
|
+
*/
|
|
16
|
+
export interface EmbeddingService {
|
|
17
|
+
createEmbedding(text: string): Promise<number[]>
|
|
18
|
+
available: boolean
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Configuration for VectorSearchStrategy.
|
|
23
|
+
*/
|
|
24
|
+
export type VectorStrategyConfig = {
|
|
25
|
+
/** Default limit for search results */
|
|
26
|
+
defaultLimit?: number
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* VectorSearchStrategy provides semantic search using embeddings.
|
|
31
|
+
* It wraps the existing vector module infrastructure.
|
|
32
|
+
*/
|
|
33
|
+
export class VectorSearchStrategy implements SearchStrategy {
|
|
34
|
+
readonly id: SearchStrategyId = 'vector'
|
|
35
|
+
readonly name = 'Vector Search'
|
|
36
|
+
readonly priority = 20 // Medium priority
|
|
37
|
+
|
|
38
|
+
private readonly defaultLimit: number
|
|
39
|
+
private ready = false
|
|
40
|
+
private readyPromise: Promise<void> | null = null
|
|
41
|
+
|
|
42
|
+
constructor(
|
|
43
|
+
private readonly embeddingService: EmbeddingService,
|
|
44
|
+
private readonly vectorDriver: VectorDriver,
|
|
45
|
+
config?: VectorStrategyConfig,
|
|
46
|
+
) {
|
|
47
|
+
this.defaultLimit = config?.defaultLimit ?? 20
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async isAvailable(): Promise<boolean> {
|
|
51
|
+
return this.embeddingService.available
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async ensureReady(): Promise<void> {
|
|
55
|
+
if (this.ready) return
|
|
56
|
+
if (!this.readyPromise) {
|
|
57
|
+
this.readyPromise = this.vectorDriver.ensureReady().then(() => {
|
|
58
|
+
this.ready = true
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
return this.readyPromise
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async search(query: string, options: SearchOptions): Promise<SearchResult[]> {
|
|
65
|
+
await this.ensureReady()
|
|
66
|
+
const embedding = await this.embeddingService.createEmbedding(query)
|
|
67
|
+
|
|
68
|
+
// Build filter - only include organizationId if it's a real value
|
|
69
|
+
// The pgvector driver treats null as "only records with null org_id",
|
|
70
|
+
// but we want null/undefined to mean "no organization filter"
|
|
71
|
+
const filter: {
|
|
72
|
+
tenantId: string
|
|
73
|
+
organizationId?: string | null
|
|
74
|
+
entityIds?: EntityId[]
|
|
75
|
+
} = {
|
|
76
|
+
tenantId: options.tenantId,
|
|
77
|
+
entityIds: options.entityTypes as EntityId[],
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Only add organizationId filter if it's a real org ID
|
|
81
|
+
if (options.organizationId) {
|
|
82
|
+
filter.organizationId = options.organizationId
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const results = await this.vectorDriver.query({
|
|
86
|
+
vector: embedding,
|
|
87
|
+
limit: options.limit ?? this.defaultLimit,
|
|
88
|
+
filter,
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
return results.map((hit) => ({
|
|
92
|
+
entityId: hit.entityId,
|
|
93
|
+
recordId: hit.recordId,
|
|
94
|
+
score: hit.score,
|
|
95
|
+
source: this.id,
|
|
96
|
+
presenter: hit.presenter ?? undefined,
|
|
97
|
+
url: hit.primaryLinkHref ?? hit.url ?? undefined,
|
|
98
|
+
links: hit.links?.map((link) => ({
|
|
99
|
+
href: link.href,
|
|
100
|
+
label: link.label ?? '',
|
|
101
|
+
kind: link.kind,
|
|
102
|
+
})),
|
|
103
|
+
metadata: hit.payload ?? undefined,
|
|
104
|
+
}))
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async index(record: IndexableRecord): Promise<void> {
|
|
108
|
+
await this.ensureReady()
|
|
109
|
+
// Use text from buildSource if available, otherwise fall back to generic extraction
|
|
110
|
+
const textContent = record.text
|
|
111
|
+
? (Array.isArray(record.text) ? record.text.join('\n') : record.text)
|
|
112
|
+
: this.buildTextContent(record)
|
|
113
|
+
if (!textContent) return
|
|
114
|
+
|
|
115
|
+
const embedding = await this.embeddingService.createEmbedding(textContent)
|
|
116
|
+
|
|
117
|
+
const doc: VectorDriverDocument = {
|
|
118
|
+
entityId: record.entityId as EntityId,
|
|
119
|
+
recordId: record.recordId,
|
|
120
|
+
tenantId: record.tenantId,
|
|
121
|
+
organizationId: record.organizationId,
|
|
122
|
+
checksum: this.computeChecksum(record),
|
|
123
|
+
embedding,
|
|
124
|
+
url: record.url,
|
|
125
|
+
presenter: record.presenter,
|
|
126
|
+
links: record.links,
|
|
127
|
+
driverId: this.vectorDriver.id,
|
|
128
|
+
resultTitle: record.presenter?.title ?? record.recordId,
|
|
129
|
+
resultSubtitle: record.presenter?.subtitle,
|
|
130
|
+
resultIcon: record.presenter?.icon,
|
|
131
|
+
resultBadge: record.presenter?.badge,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
await this.vectorDriver.upsert(doc)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async delete(entityId: EntityId, recordId: string, tenantId: string): Promise<void> {
|
|
138
|
+
await this.ensureReady()
|
|
139
|
+
await this.vectorDriver.delete(entityId, recordId, tenantId)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async purge(entityId: EntityId, tenantId: string): Promise<void> {
|
|
143
|
+
await this.ensureReady()
|
|
144
|
+
if (this.vectorDriver.purge) {
|
|
145
|
+
await this.vectorDriver.purge(entityId, tenantId)
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Build text content from record fields for embedding.
|
|
151
|
+
*/
|
|
152
|
+
private buildTextContent(record: IndexableRecord): string {
|
|
153
|
+
const parts: string[] = []
|
|
154
|
+
|
|
155
|
+
// Add presenter info
|
|
156
|
+
if (record.presenter?.title) {
|
|
157
|
+
parts.push(record.presenter.title)
|
|
158
|
+
}
|
|
159
|
+
if (record.presenter?.subtitle) {
|
|
160
|
+
parts.push(record.presenter.subtitle)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Add string fields from record
|
|
164
|
+
for (const [, value] of Object.entries(record.fields)) {
|
|
165
|
+
if (typeof value === 'string' && value.trim()) {
|
|
166
|
+
parts.push(value)
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return parts.join(' ').trim()
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Compute a checksum for change detection using SHA-256.
|
|
175
|
+
* Uses checksumSource from buildSource if available, otherwise uses fields/presenter/url.
|
|
176
|
+
*/
|
|
177
|
+
private computeChecksum(record: IndexableRecord): string {
|
|
178
|
+
const source = record.checksumSource !== undefined
|
|
179
|
+
? record.checksumSource
|
|
180
|
+
: {
|
|
181
|
+
fields: record.fields,
|
|
182
|
+
presenter: record.presenter,
|
|
183
|
+
url: record.url,
|
|
184
|
+
}
|
|
185
|
+
const content = JSON.stringify(source)
|
|
186
|
+
return createHash('sha256').update(content).digest('hex').slice(0, 16)
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* List entries in the vector index (for admin/debugging).
|
|
191
|
+
*/
|
|
192
|
+
async listEntries(options: {
|
|
193
|
+
tenantId: string
|
|
194
|
+
organizationId?: string | null
|
|
195
|
+
entityId?: string
|
|
196
|
+
limit?: number
|
|
197
|
+
offset?: number
|
|
198
|
+
}): Promise<Array<{
|
|
199
|
+
entityId: string
|
|
200
|
+
recordId: string
|
|
201
|
+
tenantId: string
|
|
202
|
+
organizationId: string | null
|
|
203
|
+
presenter?: unknown
|
|
204
|
+
url?: string
|
|
205
|
+
}>> {
|
|
206
|
+
await this.ensureReady()
|
|
207
|
+
// Delegate to vector driver's list method if available
|
|
208
|
+
const listMethod = (this.vectorDriver as unknown as {
|
|
209
|
+
list?: (options: {
|
|
210
|
+
tenantId: string
|
|
211
|
+
organizationId?: string | null
|
|
212
|
+
entityId?: string
|
|
213
|
+
limit?: number
|
|
214
|
+
offset?: number
|
|
215
|
+
}) => Promise<unknown[]>
|
|
216
|
+
}).list
|
|
217
|
+
|
|
218
|
+
if (typeof listMethod === 'function') {
|
|
219
|
+
const entries = await listMethod.call(this.vectorDriver, options)
|
|
220
|
+
return entries as Array<{
|
|
221
|
+
entityId: string
|
|
222
|
+
recordId: string
|
|
223
|
+
tenantId: string
|
|
224
|
+
organizationId: string | null
|
|
225
|
+
presenter?: unknown
|
|
226
|
+
url?: string
|
|
227
|
+
}>
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Fallback: return empty array if driver doesn't support listing
|
|
231
|
+
searchDebugWarn('VectorSearchStrategy', 'Vector driver does not support listing entries')
|
|
232
|
+
return []
|
|
233
|
+
}
|
|
234
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-export all search types from shared package.
|
|
3
|
+
* This allows consumers to import from '@open-mercato/search/types' directly.
|
|
4
|
+
*/
|
|
5
|
+
export type {
|
|
6
|
+
// Strategy identifiers
|
|
7
|
+
SearchStrategyId,
|
|
8
|
+
|
|
9
|
+
// Result types
|
|
10
|
+
SearchResult,
|
|
11
|
+
SearchResultPresenter,
|
|
12
|
+
SearchResultLink,
|
|
13
|
+
|
|
14
|
+
// Search options
|
|
15
|
+
SearchOptions,
|
|
16
|
+
|
|
17
|
+
// Indexable record
|
|
18
|
+
IndexableRecord,
|
|
19
|
+
|
|
20
|
+
// Strategy interface
|
|
21
|
+
SearchStrategy,
|
|
22
|
+
|
|
23
|
+
// Service configuration
|
|
24
|
+
ResultMergeConfig,
|
|
25
|
+
SearchServiceOptions,
|
|
26
|
+
PresenterEnricherFn,
|
|
27
|
+
|
|
28
|
+
// Module configuration
|
|
29
|
+
SearchBuildContext,
|
|
30
|
+
SearchIndexSource,
|
|
31
|
+
SearchFieldPolicy,
|
|
32
|
+
SearchEntityConfig,
|
|
33
|
+
SearchModuleConfig,
|
|
34
|
+
|
|
35
|
+
// Event payloads
|
|
36
|
+
SearchIndexPayload,
|
|
37
|
+
SearchDeletePayload,
|
|
38
|
+
} from '@open-mercato/shared/modules/search'
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
VectorDriver,
|
|
3
|
+
VectorDriverDocument,
|
|
4
|
+
VectorDriverQuery,
|
|
5
|
+
VectorDriverQueryResult,
|
|
6
|
+
VectorDriverCountParams,
|
|
7
|
+
} from '../../types'
|
|
8
|
+
|
|
9
|
+
function notImplemented(method: string): never {
|
|
10
|
+
throw new Error(`[vector.chromadb] ${method} not implemented yet`)
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function createChromaDbDriver(): VectorDriver {
|
|
14
|
+
return {
|
|
15
|
+
id: 'chromadb',
|
|
16
|
+
async ensureReady() {
|
|
17
|
+
notImplemented('ensureReady')
|
|
18
|
+
},
|
|
19
|
+
async upsert(doc: VectorDriverDocument) {
|
|
20
|
+
void doc
|
|
21
|
+
notImplemented('upsert')
|
|
22
|
+
},
|
|
23
|
+
async delete(entityId: string, recordId: string, tenantId: string) {
|
|
24
|
+
void entityId
|
|
25
|
+
void recordId
|
|
26
|
+
void tenantId
|
|
27
|
+
notImplemented('delete')
|
|
28
|
+
},
|
|
29
|
+
async getChecksum(entityId: string, recordId: string, tenantId: string) {
|
|
30
|
+
void entityId
|
|
31
|
+
void recordId
|
|
32
|
+
void tenantId
|
|
33
|
+
notImplemented('getChecksum')
|
|
34
|
+
},
|
|
35
|
+
async query(input: VectorDriverQuery): Promise<VectorDriverQueryResult[]> {
|
|
36
|
+
void input
|
|
37
|
+
notImplemented('query')
|
|
38
|
+
},
|
|
39
|
+
async purge(entityId: string, tenantId: string) {
|
|
40
|
+
void entityId
|
|
41
|
+
void tenantId
|
|
42
|
+
notImplemented('purge')
|
|
43
|
+
},
|
|
44
|
+
async count(params: VectorDriverCountParams) {
|
|
45
|
+
void params
|
|
46
|
+
notImplemented('count')
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
}
|