@open-mercato/search 0.4.2-canary-c02407ff85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +678 -0
- package/build.mjs +92 -0
- package/dist/di.js +157 -0
- package/dist/di.js.map +7 -0
- package/dist/fulltext/drivers/index.js +21 -0
- package/dist/fulltext/drivers/index.js.map +7 -0
- package/dist/fulltext/drivers/meilisearch/index.js +320 -0
- package/dist/fulltext/drivers/meilisearch/index.js.map +7 -0
- package/dist/fulltext/index.js +7 -0
- package/dist/fulltext/index.js.map +7 -0
- package/dist/fulltext/types.js +1 -0
- package/dist/fulltext/types.js.map +7 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +7 -0
- package/dist/indexer/index.js +8 -0
- package/dist/indexer/index.js.map +7 -0
- package/dist/indexer/search-indexer.js +848 -0
- package/dist/indexer/search-indexer.js.map +7 -0
- package/dist/indexer/subscribers/delete.js +41 -0
- package/dist/indexer/subscribers/delete.js.map +7 -0
- package/dist/lib/debug.js +34 -0
- package/dist/lib/debug.js.map +7 -0
- package/dist/lib/fallback-presenter.js +107 -0
- package/dist/lib/fallback-presenter.js.map +7 -0
- package/dist/lib/field-policy.js +75 -0
- package/dist/lib/field-policy.js.map +7 -0
- package/dist/lib/index.js +19 -0
- package/dist/lib/index.js.map +7 -0
- package/dist/lib/merger.js +93 -0
- package/dist/lib/merger.js.map +7 -0
- package/dist/lib/presenter-enricher.js +192 -0
- package/dist/lib/presenter-enricher.js.map +7 -0
- package/dist/modules/search/acl.js +14 -0
- package/dist/modules/search/acl.js.map +7 -0
- package/dist/modules/search/ai-tools.js +284 -0
- package/dist/modules/search/ai-tools.js.map +7 -0
- package/dist/modules/search/api/embeddings/reindex/cancel/route.js +65 -0
- package/dist/modules/search/api/embeddings/reindex/cancel/route.js.map +7 -0
- package/dist/modules/search/api/embeddings/reindex/route.js +165 -0
- package/dist/modules/search/api/embeddings/reindex/route.js.map +7 -0
- package/dist/modules/search/api/embeddings/route.js +246 -0
- package/dist/modules/search/api/embeddings/route.js.map +7 -0
- package/dist/modules/search/api/index/route.js +245 -0
- package/dist/modules/search/api/index/route.js.map +7 -0
- package/dist/modules/search/api/reindex/cancel/route.js +65 -0
- package/dist/modules/search/api/reindex/cancel/route.js.map +7 -0
- package/dist/modules/search/api/reindex/route.js +332 -0
- package/dist/modules/search/api/reindex/route.js.map +7 -0
- package/dist/modules/search/api/search/global/route.js +100 -0
- package/dist/modules/search/api/search/global/route.js.map +7 -0
- package/dist/modules/search/api/search/route.js +101 -0
- package/dist/modules/search/api/search/route.js.map +7 -0
- package/dist/modules/search/api/settings/fulltext/route.js +55 -0
- package/dist/modules/search/api/settings/fulltext/route.js.map +7 -0
- package/dist/modules/search/api/settings/global-search/route.js +80 -0
- package/dist/modules/search/api/settings/global-search/route.js.map +7 -0
- package/dist/modules/search/api/settings/route.js +118 -0
- package/dist/modules/search/api/settings/route.js.map +7 -0
- package/dist/modules/search/api/settings/vector-store/route.js +77 -0
- package/dist/modules/search/api/settings/vector-store/route.js.map +7 -0
- package/dist/modules/search/backend/config/search/page.js +10 -0
- package/dist/modules/search/backend/config/search/page.js.map +7 -0
- package/dist/modules/search/backend/config/search/page.meta.js +24 -0
- package/dist/modules/search/backend/config/search/page.meta.js.map +7 -0
- package/dist/modules/search/cli.js +698 -0
- package/dist/modules/search/cli.js.map +7 -0
- package/dist/modules/search/di.js +32 -0
- package/dist/modules/search/di.js.map +7 -0
- package/dist/modules/search/frontend/components/GlobalSearchDialog.js +357 -0
- package/dist/modules/search/frontend/components/GlobalSearchDialog.js.map +7 -0
- package/dist/modules/search/frontend/components/HybridSearchTable.js +343 -0
- package/dist/modules/search/frontend/components/HybridSearchTable.js.map +7 -0
- package/dist/modules/search/frontend/components/SearchSettingsPageClient.js +303 -0
- package/dist/modules/search/frontend/components/SearchSettingsPageClient.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js +360 -0
- package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js +101 -0
- package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/components/sections/VectorSearchSection.js +608 -0
- package/dist/modules/search/frontend/components/sections/VectorSearchSection.js.map +7 -0
- package/dist/modules/search/frontend/index.js +9 -0
- package/dist/modules/search/frontend/index.js.map +7 -0
- package/dist/modules/search/frontend/utils.js +41 -0
- package/dist/modules/search/frontend/utils.js.map +7 -0
- package/dist/modules/search/i18n/de.json +61 -0
- package/dist/modules/search/i18n/en.json +72 -0
- package/dist/modules/search/i18n/es.json +61 -0
- package/dist/modules/search/i18n/pl.json +61 -0
- package/dist/modules/search/index.js +11 -0
- package/dist/modules/search/index.js.map +7 -0
- package/dist/modules/search/lib/auto-indexing.js +29 -0
- package/dist/modules/search/lib/auto-indexing.js.map +7 -0
- package/dist/modules/search/lib/embedding-config.js +131 -0
- package/dist/modules/search/lib/embedding-config.js.map +7 -0
- package/dist/modules/search/lib/global-search-config.js +45 -0
- package/dist/modules/search/lib/global-search-config.js.map +7 -0
- package/dist/modules/search/lib/reindex-lock.js +99 -0
- package/dist/modules/search/lib/reindex-lock.js.map +7 -0
- package/dist/modules/search/subscribers/fulltext_upsert.js +64 -0
- package/dist/modules/search/subscribers/fulltext_upsert.js.map +7 -0
- package/dist/modules/search/subscribers/vector_delete.js +58 -0
- package/dist/modules/search/subscribers/vector_delete.js.map +7 -0
- package/dist/modules/search/subscribers/vector_purge.js +142 -0
- package/dist/modules/search/subscribers/vector_purge.js.map +7 -0
- package/dist/modules/search/subscribers/vector_upsert.js +58 -0
- package/dist/modules/search/subscribers/vector_upsert.js.map +7 -0
- package/dist/modules/search/workers/fulltext-index.worker.js +240 -0
- package/dist/modules/search/workers/fulltext-index.worker.js.map +7 -0
- package/dist/modules/search/workers/vector-index.worker.js +234 -0
- package/dist/modules/search/workers/vector-index.worker.js.map +7 -0
- package/dist/queue/fulltext-indexing.js +15 -0
- package/dist/queue/fulltext-indexing.js.map +7 -0
- package/dist/queue/index.js +3 -0
- package/dist/queue/index.js.map +7 -0
- package/dist/queue/vector-indexing.js +15 -0
- package/dist/queue/vector-indexing.js.map +7 -0
- package/dist/service.js +286 -0
- package/dist/service.js.map +7 -0
- package/dist/strategies/fulltext.strategy.js +116 -0
- package/dist/strategies/fulltext.strategy.js.map +7 -0
- package/dist/strategies/index.js +12 -0
- package/dist/strategies/index.js.map +7 -0
- package/dist/strategies/token.strategy.js +80 -0
- package/dist/strategies/token.strategy.js.map +7 -0
- package/dist/strategies/vector.strategy.js +137 -0
- package/dist/strategies/vector.strategy.js.map +7 -0
- package/dist/types.js +1 -0
- package/dist/types.js.map +7 -0
- package/dist/vector/drivers/chromadb/index.js +44 -0
- package/dist/vector/drivers/chromadb/index.js.map +7 -0
- package/dist/vector/drivers/index.js +9 -0
- package/dist/vector/drivers/index.js.map +7 -0
- package/dist/vector/drivers/pgvector/index.js +509 -0
- package/dist/vector/drivers/pgvector/index.js.map +7 -0
- package/dist/vector/drivers/qdrant/index.js +44 -0
- package/dist/vector/drivers/qdrant/index.js.map +7 -0
- package/dist/vector/index.js +4 -0
- package/dist/vector/index.js.map +7 -0
- package/dist/vector/lib/vector-logs.js +33 -0
- package/dist/vector/lib/vector-logs.js.map +7 -0
- package/dist/vector/services/checksum.js +20 -0
- package/dist/vector/services/checksum.js.map +7 -0
- package/dist/vector/services/embedding.js +222 -0
- package/dist/vector/services/embedding.js.map +7 -0
- package/dist/vector/services/index.js +4 -0
- package/dist/vector/services/index.js.map +7 -0
- package/dist/vector/services/vector-index.service.js +960 -0
- package/dist/vector/services/vector-index.service.js.map +7 -0
- package/dist/vector/types/pg.d.js +1 -0
- package/dist/vector/types/pg.d.js.map +7 -0
- package/dist/vector/types.js +75 -0
- package/dist/vector/types.js.map +7 -0
- package/jest.config.cjs +19 -0
- package/package.json +142 -0
- package/src/__tests__/queue.test.ts +148 -0
- package/src/__tests__/service.test.ts +345 -0
- package/src/__tests__/workers.test.ts +319 -0
- package/src/di.ts +291 -0
- package/src/fulltext/drivers/index.ts +41 -0
- package/src/fulltext/drivers/meilisearch/index.ts +410 -0
- package/src/fulltext/index.ts +13 -0
- package/src/fulltext/types.ts +115 -0
- package/src/index.ts +36 -0
- package/src/indexer/index.ts +13 -0
- package/src/indexer/search-indexer.ts +1141 -0
- package/src/indexer/subscribers/delete.ts +49 -0
- package/src/lib/debug.ts +46 -0
- package/src/lib/fallback-presenter.ts +106 -0
- package/src/lib/field-policy.ts +169 -0
- package/src/lib/index.ts +13 -0
- package/src/lib/merger.ts +159 -0
- package/src/lib/presenter-enricher.ts +323 -0
- package/src/modules/search/README.md +694 -0
- package/src/modules/search/acl.ts +10 -0
- package/src/modules/search/ai-tools.ts +467 -0
- package/src/modules/search/api/embeddings/reindex/cancel/route.ts +77 -0
- package/src/modules/search/api/embeddings/reindex/route.ts +197 -0
- package/src/modules/search/api/embeddings/route.ts +304 -0
- package/src/modules/search/api/index/route.ts +297 -0
- package/src/modules/search/api/reindex/cancel/route.ts +77 -0
- package/src/modules/search/api/reindex/route.ts +419 -0
- package/src/modules/search/api/search/global/route.ts +120 -0
- package/src/modules/search/api/search/route.ts +121 -0
- package/src/modules/search/api/settings/fulltext/route.ts +82 -0
- package/src/modules/search/api/settings/global-search/route.ts +91 -0
- package/src/modules/search/api/settings/route.ts +187 -0
- package/src/modules/search/api/settings/vector-store/route.ts +105 -0
- package/src/modules/search/backend/config/search/page.meta.ts +22 -0
- package/src/modules/search/backend/config/search/page.tsx +12 -0
- package/src/modules/search/cli.ts +818 -0
- package/src/modules/search/di.ts +50 -0
- package/src/modules/search/frontend/components/GlobalSearchDialog.tsx +436 -0
- package/src/modules/search/frontend/components/HybridSearchTable.tsx +418 -0
- package/src/modules/search/frontend/components/SearchSettingsPageClient.tsx +476 -0
- package/src/modules/search/frontend/components/sections/FulltextSearchSection.tsx +624 -0
- package/src/modules/search/frontend/components/sections/GlobalSearchSection.tsx +124 -0
- package/src/modules/search/frontend/components/sections/VectorSearchSection.tsx +943 -0
- package/src/modules/search/frontend/index.ts +3 -0
- package/src/modules/search/frontend/utils.ts +82 -0
- package/src/modules/search/i18n/de.json +61 -0
- package/src/modules/search/i18n/en.json +72 -0
- package/src/modules/search/i18n/es.json +61 -0
- package/src/modules/search/i18n/pl.json +61 -0
- package/src/modules/search/index.ts +9 -0
- package/src/modules/search/lib/auto-indexing.ts +35 -0
- package/src/modules/search/lib/embedding-config.ts +161 -0
- package/src/modules/search/lib/global-search-config.ts +69 -0
- package/src/modules/search/lib/reindex-lock.ts +201 -0
- package/src/modules/search/subscribers/fulltext_upsert.ts +83 -0
- package/src/modules/search/subscribers/vector_delete.ts +75 -0
- package/src/modules/search/subscribers/vector_purge.ts +161 -0
- package/src/modules/search/subscribers/vector_upsert.ts +75 -0
- package/src/modules/search/workers/fulltext-index.worker.ts +318 -0
- package/src/modules/search/workers/vector-index.worker.ts +292 -0
- package/src/queue/fulltext-indexing.ts +87 -0
- package/src/queue/index.ts +2 -0
- package/src/queue/vector-indexing.ts +66 -0
- package/src/service.ts +397 -0
- package/src/strategies/fulltext.strategy.ts +155 -0
- package/src/strategies/index.ts +17 -0
- package/src/strategies/token.strategy.ts +153 -0
- package/src/strategies/vector.strategy.ts +234 -0
- package/src/types.ts +38 -0
- package/src/vector/drivers/chromadb/index.ts +49 -0
- package/src/vector/drivers/index.ts +4 -0
- package/src/vector/drivers/pgvector/index.ts +627 -0
- package/src/vector/drivers/qdrant/index.ts +49 -0
- package/src/vector/index.ts +3 -0
- package/src/vector/lib/vector-logs.ts +46 -0
- package/src/vector/services/checksum.ts +18 -0
- package/src/vector/services/embedding.ts +275 -0
- package/src/vector/services/index.ts +3 -0
- package/src/vector/services/vector-index.service.ts +1234 -0
- package/src/vector/types/pg.d.ts +1 -0
- package/src/vector/types.ts +220 -0
- package/tsconfig.json +9 -0
- package/watch.mjs +6 -0
|
@@ -0,0 +1,1141 @@
|
|
|
1
|
+
import type { SearchService } from '../service'
|
|
2
|
+
import type {
|
|
3
|
+
SearchModuleConfig,
|
|
4
|
+
SearchEntityConfig,
|
|
5
|
+
SearchBuildContext,
|
|
6
|
+
IndexableRecord,
|
|
7
|
+
SearchResultPresenter,
|
|
8
|
+
SearchResultLink,
|
|
9
|
+
} from '../types'
|
|
10
|
+
import type { FullTextSearchStrategy } from '../strategies/fulltext.strategy'
|
|
11
|
+
import type { EntityId } from '@open-mercato/shared/modules/entities'
|
|
12
|
+
import type { QueryEngine } from '@open-mercato/shared/lib/query/types'
|
|
13
|
+
import type { Queue } from '@open-mercato/queue'
|
|
14
|
+
import type { FulltextIndexJobPayload } from '../queue/fulltext-indexing'
|
|
15
|
+
import type { VectorIndexJobPayload, VectorBatchRecord } from '../queue/vector-indexing'
|
|
16
|
+
import { searchDebug, searchDebugWarn, searchError } from '../lib/debug'
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Maximum number of pages to process during reindex to prevent infinite loops.
|
|
20
|
+
* At 50 records per page, this allows up to 500,000 records per entity.
|
|
21
|
+
*/
|
|
22
|
+
const MAX_PAGES = 10000
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Parameters for indexing a record.
|
|
26
|
+
*/
|
|
27
|
+
export type IndexRecordParams = {
|
|
28
|
+
entityId: EntityId
|
|
29
|
+
recordId: string
|
|
30
|
+
tenantId: string
|
|
31
|
+
organizationId?: string | null
|
|
32
|
+
record: Record<string, unknown>
|
|
33
|
+
customFields?: Record<string, unknown>
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Parameters for deleting a record from the search index.
|
|
38
|
+
*/
|
|
39
|
+
export type DeleteRecordParams = {
|
|
40
|
+
entityId: EntityId
|
|
41
|
+
recordId: string
|
|
42
|
+
tenantId: string
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Parameters for purging all records of an entity type.
|
|
47
|
+
*/
|
|
48
|
+
export type PurgeEntityParams = {
|
|
49
|
+
entityId: EntityId
|
|
50
|
+
tenantId: string
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Parameters for reindexing an entity to fulltext search.
|
|
55
|
+
*/
|
|
56
|
+
export type ReindexEntityParams = {
|
|
57
|
+
entityId: EntityId
|
|
58
|
+
tenantId: string
|
|
59
|
+
organizationId?: string | null
|
|
60
|
+
/** Whether to recreate the index first (default: true) */
|
|
61
|
+
recreateIndex?: boolean
|
|
62
|
+
/** Callback for progress tracking */
|
|
63
|
+
onProgress?: (progress: ReindexProgress) => void
|
|
64
|
+
/** Whether to use queue for batch processing (default: false) */
|
|
65
|
+
useQueue?: boolean
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Parameters for reindexing all entities to fulltext search.
|
|
70
|
+
*/
|
|
71
|
+
export type ReindexAllParams = {
|
|
72
|
+
tenantId: string
|
|
73
|
+
organizationId?: string | null
|
|
74
|
+
/** Whether to recreate the index first (default: true) */
|
|
75
|
+
recreateIndex?: boolean
|
|
76
|
+
/** Callback for progress tracking */
|
|
77
|
+
onProgress?: (progress: ReindexProgress) => void
|
|
78
|
+
/** Whether to use queue for batch processing (default: false) */
|
|
79
|
+
useQueue?: boolean
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Progress information during reindex.
|
|
84
|
+
*/
|
|
85
|
+
export type ReindexProgress = {
|
|
86
|
+
entityId: EntityId
|
|
87
|
+
phase: 'starting' | 'fetching' | 'indexing' | 'complete'
|
|
88
|
+
processed: number
|
|
89
|
+
total?: number
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Result of a reindex operation.
|
|
94
|
+
*/
|
|
95
|
+
export type ReindexResult = {
|
|
96
|
+
success: boolean
|
|
97
|
+
entitiesProcessed: number
|
|
98
|
+
recordsIndexed: number
|
|
99
|
+
/** Number of records dropped due to missing id or other validation failures */
|
|
100
|
+
recordsDropped?: number
|
|
101
|
+
/** Number of jobs enqueued (when useQueue is true) */
|
|
102
|
+
jobsEnqueued?: number
|
|
103
|
+
errors: Array<{ entityId: EntityId; error: string }>
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Optional dependencies for SearchIndexer.
|
|
108
|
+
*/
|
|
109
|
+
export type SearchIndexerOptions = {
|
|
110
|
+
queryEngine?: QueryEngine
|
|
111
|
+
/** Queue for fulltext batch indexing */
|
|
112
|
+
fulltextQueue?: Queue<FulltextIndexJobPayload>
|
|
113
|
+
/** Queue for vector batch indexing */
|
|
114
|
+
vectorQueue?: Queue<VectorIndexJobPayload>
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* SearchIndexer orchestrates indexing operations by resolving entity configs
|
|
119
|
+
* and building IndexableRecords for the SearchService.
|
|
120
|
+
*/
|
|
121
|
+
export class SearchIndexer {
|
|
122
|
+
private readonly entityConfigMap: Map<EntityId, SearchEntityConfig>
|
|
123
|
+
private readonly queryEngine?: QueryEngine
|
|
124
|
+
private readonly fulltextQueue?: Queue<FulltextIndexJobPayload>
|
|
125
|
+
private readonly vectorQueue?: Queue<VectorIndexJobPayload>
|
|
126
|
+
|
|
127
|
+
constructor(
|
|
128
|
+
private readonly searchService: SearchService,
|
|
129
|
+
private readonly moduleConfigs: SearchModuleConfig[],
|
|
130
|
+
options?: SearchIndexerOptions,
|
|
131
|
+
) {
|
|
132
|
+
this.entityConfigMap = new Map()
|
|
133
|
+
this.queryEngine = options?.queryEngine
|
|
134
|
+
this.fulltextQueue = options?.fulltextQueue
|
|
135
|
+
this.vectorQueue = options?.vectorQueue
|
|
136
|
+
for (const moduleConfig of moduleConfigs) {
|
|
137
|
+
for (const entityConfig of moduleConfig.entities) {
|
|
138
|
+
if (entityConfig.enabled !== false) {
|
|
139
|
+
this.entityConfigMap.set(entityConfig.entityId as EntityId, entityConfig)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Get the entity config for a given entity ID.
|
|
147
|
+
*/
|
|
148
|
+
getEntityConfig(entityId: EntityId): SearchEntityConfig | undefined {
|
|
149
|
+
return this.entityConfigMap.get(entityId)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Get all configured entity configs.
|
|
154
|
+
*/
|
|
155
|
+
getAllEntityConfigs(): SearchEntityConfig[] {
|
|
156
|
+
return Array.from(this.entityConfigMap.values())
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Check if an entity is configured for search indexing.
|
|
161
|
+
*/
|
|
162
|
+
isEntityEnabled(entityId: EntityId): boolean {
|
|
163
|
+
const config = this.entityConfigMap.get(entityId)
|
|
164
|
+
return config?.enabled !== false
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Index a record in the search service.
|
|
169
|
+
*/
|
|
170
|
+
async indexRecord(params: IndexRecordParams): Promise<void> {
|
|
171
|
+
const config = this.entityConfigMap.get(params.entityId)
|
|
172
|
+
if (!config || config.enabled === false) {
|
|
173
|
+
return // Entity not configured for search
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const buildContext: SearchBuildContext = {
|
|
177
|
+
record: params.record,
|
|
178
|
+
customFields: params.customFields ?? {},
|
|
179
|
+
organizationId: params.organizationId,
|
|
180
|
+
tenantId: params.tenantId,
|
|
181
|
+
queryEngine: this.queryEngine,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Try buildSource first (provides text, presenter, links, checksumSource)
|
|
185
|
+
let text: string | string[] | undefined
|
|
186
|
+
let presenter: SearchResultPresenter | undefined
|
|
187
|
+
let url: string | undefined
|
|
188
|
+
let links: SearchResultLink[] | undefined
|
|
189
|
+
let checksumSource: unknown | undefined
|
|
190
|
+
|
|
191
|
+
if (config.buildSource) {
|
|
192
|
+
try {
|
|
193
|
+
const source = await config.buildSource(buildContext)
|
|
194
|
+
if (source) {
|
|
195
|
+
text = source.text
|
|
196
|
+
if (source.presenter) presenter = source.presenter
|
|
197
|
+
if (source.links) links = source.links
|
|
198
|
+
if (source.checksumSource !== undefined) checksumSource = source.checksumSource
|
|
199
|
+
}
|
|
200
|
+
} catch (error) {
|
|
201
|
+
searchDebugWarn('SearchIndexer', 'buildSource failed', {
|
|
202
|
+
entityId: params.entityId,
|
|
203
|
+
recordId: params.recordId,
|
|
204
|
+
error: error instanceof Error ? error.message : error,
|
|
205
|
+
})
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Fall back to formatResult if no presenter from buildSource
|
|
210
|
+
if (!presenter && config.formatResult) {
|
|
211
|
+
try {
|
|
212
|
+
const result = await config.formatResult(buildContext)
|
|
213
|
+
if (result) presenter = result
|
|
214
|
+
} catch (error) {
|
|
215
|
+
searchDebugWarn('SearchIndexer', 'formatResult failed', {
|
|
216
|
+
entityId: params.entityId,
|
|
217
|
+
recordId: params.recordId,
|
|
218
|
+
error: error instanceof Error ? error.message : error,
|
|
219
|
+
})
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Resolve URL if not already set
|
|
224
|
+
if (!url && config.resolveUrl) {
|
|
225
|
+
try {
|
|
226
|
+
const result = await config.resolveUrl(buildContext)
|
|
227
|
+
if (result) url = result
|
|
228
|
+
} catch (error) {
|
|
229
|
+
searchDebugWarn('SearchIndexer', 'resolveUrl failed', {
|
|
230
|
+
entityId: params.entityId,
|
|
231
|
+
recordId: params.recordId,
|
|
232
|
+
error: error instanceof Error ? error.message : error,
|
|
233
|
+
})
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Resolve links if not already set
|
|
238
|
+
if (!links && config.resolveLinks) {
|
|
239
|
+
try {
|
|
240
|
+
const result = await config.resolveLinks(buildContext)
|
|
241
|
+
if (result) links = result
|
|
242
|
+
} catch (error) {
|
|
243
|
+
searchDebugWarn('SearchIndexer', 'resolveLinks failed', {
|
|
244
|
+
entityId: params.entityId,
|
|
245
|
+
recordId: params.recordId,
|
|
246
|
+
error: error instanceof Error ? error.message : error,
|
|
247
|
+
})
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Build IndexableRecord
|
|
252
|
+
const indexableRecord: IndexableRecord = {
|
|
253
|
+
entityId: params.entityId,
|
|
254
|
+
recordId: params.recordId,
|
|
255
|
+
tenantId: params.tenantId,
|
|
256
|
+
organizationId: params.organizationId,
|
|
257
|
+
fields: params.record,
|
|
258
|
+
presenter,
|
|
259
|
+
url,
|
|
260
|
+
links,
|
|
261
|
+
text,
|
|
262
|
+
checksumSource,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
await this.searchService.index(indexableRecord)
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Index a record by ID (loads the record from database first).
|
|
270
|
+
* Used by workers that only have record identifiers.
|
|
271
|
+
*/
|
|
272
|
+
async indexRecordById(params: {
|
|
273
|
+
entityId: EntityId
|
|
274
|
+
recordId: string
|
|
275
|
+
tenantId: string
|
|
276
|
+
organizationId?: string | null
|
|
277
|
+
}): Promise<{ action: 'indexed' | 'skipped'; reason?: string }> {
|
|
278
|
+
if (!this.queryEngine) {
|
|
279
|
+
return { action: 'skipped', reason: 'queryEngine not available' }
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const config = this.entityConfigMap.get(params.entityId)
|
|
283
|
+
if (!config || config.enabled === false) {
|
|
284
|
+
return { action: 'skipped', reason: 'entity not configured' }
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Load record from database
|
|
288
|
+
try {
|
|
289
|
+
const result = await this.queryEngine.query(params.entityId, {
|
|
290
|
+
tenantId: params.tenantId,
|
|
291
|
+
organizationId: params.organizationId ?? undefined,
|
|
292
|
+
filters: { id: params.recordId },
|
|
293
|
+
includeCustomFields: true,
|
|
294
|
+
page: { page: 1, pageSize: 1 },
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
const record = result.items[0] as Record<string, unknown> | undefined
|
|
298
|
+
if (!record) {
|
|
299
|
+
return { action: 'skipped', reason: 'record not found' }
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Extract custom fields
|
|
303
|
+
const customFields: Record<string, unknown> = {}
|
|
304
|
+
for (const [key, value] of Object.entries(record)) {
|
|
305
|
+
if (key.startsWith('cf:') || key.startsWith('cf_')) {
|
|
306
|
+
customFields[key.slice(3)] = value
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
await this.indexRecord({
|
|
311
|
+
entityId: params.entityId,
|
|
312
|
+
recordId: params.recordId,
|
|
313
|
+
tenantId: params.tenantId,
|
|
314
|
+
organizationId: params.organizationId,
|
|
315
|
+
record,
|
|
316
|
+
customFields,
|
|
317
|
+
})
|
|
318
|
+
|
|
319
|
+
return { action: 'indexed' }
|
|
320
|
+
} catch (error) {
|
|
321
|
+
searchError('SearchIndexer', 'Failed to load record for indexing', {
|
|
322
|
+
entityId: params.entityId,
|
|
323
|
+
recordId: params.recordId,
|
|
324
|
+
error: error instanceof Error ? error.message : error,
|
|
325
|
+
})
|
|
326
|
+
throw error
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Delete a record from the search index.
|
|
332
|
+
*/
|
|
333
|
+
async deleteRecord(params: DeleteRecordParams): Promise<void> {
|
|
334
|
+
await this.searchService.delete(params.entityId, params.recordId, params.tenantId)
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Purge all records of an entity type from the search index.
|
|
339
|
+
*/
|
|
340
|
+
async purgeEntity(params: PurgeEntityParams): Promise<void> {
|
|
341
|
+
await this.searchService.purge(params.entityId, params.tenantId)
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Reindex an entity via all configured strategies (including vector).
|
|
346
|
+
* This is the general reindex method that works with all search strategies.
|
|
347
|
+
*/
|
|
348
|
+
async reindexEntity(params: {
|
|
349
|
+
entityId: EntityId
|
|
350
|
+
tenantId: string
|
|
351
|
+
organizationId?: string | null
|
|
352
|
+
purgeFirst?: boolean
|
|
353
|
+
}): Promise<ReindexResult> {
|
|
354
|
+
if (!this.queryEngine) {
|
|
355
|
+
return {
|
|
356
|
+
success: false,
|
|
357
|
+
entitiesProcessed: 0,
|
|
358
|
+
recordsIndexed: 0,
|
|
359
|
+
errors: [{ entityId: params.entityId, error: 'Query engine not available' }],
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
const config = this.entityConfigMap.get(params.entityId)
|
|
364
|
+
if (!config || config.enabled === false) {
|
|
365
|
+
return {
|
|
366
|
+
success: false,
|
|
367
|
+
entitiesProcessed: 0,
|
|
368
|
+
recordsIndexed: 0,
|
|
369
|
+
errors: [{ entityId: params.entityId, error: 'Entity not configured for search' }],
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const result: ReindexResult = {
|
|
374
|
+
success: true,
|
|
375
|
+
entitiesProcessed: 1,
|
|
376
|
+
recordsIndexed: 0,
|
|
377
|
+
errors: [],
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Optionally purge first
|
|
381
|
+
if (params.purgeFirst) {
|
|
382
|
+
try {
|
|
383
|
+
await this.searchService.purge(params.entityId, params.tenantId)
|
|
384
|
+
} catch (error) {
|
|
385
|
+
searchDebugWarn('SearchIndexer', 'Failed to purge entity before reindex', {
|
|
386
|
+
entityId: params.entityId,
|
|
387
|
+
error: error instanceof Error ? error.message : error,
|
|
388
|
+
})
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Paginate through all records
|
|
393
|
+
let page = 1
|
|
394
|
+
const pageSize = 200
|
|
395
|
+
let hasMore = true
|
|
396
|
+
|
|
397
|
+
while (hasMore && page <= MAX_PAGES) {
|
|
398
|
+
try {
|
|
399
|
+
const queryResult = await this.queryEngine.query(params.entityId, {
|
|
400
|
+
tenantId: params.tenantId,
|
|
401
|
+
organizationId: params.organizationId ?? undefined,
|
|
402
|
+
includeCustomFields: true,
|
|
403
|
+
page: { page, pageSize },
|
|
404
|
+
})
|
|
405
|
+
|
|
406
|
+
const items = queryResult.items as Record<string, unknown>[]
|
|
407
|
+
if (items.length === 0) {
|
|
408
|
+
hasMore = false
|
|
409
|
+
break
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Build and index records
|
|
413
|
+
const { records } = await this.buildIndexableRecords(
|
|
414
|
+
params.entityId,
|
|
415
|
+
params.tenantId,
|
|
416
|
+
params.organizationId ?? null,
|
|
417
|
+
items,
|
|
418
|
+
config,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
// Index each record via SearchService (sends to all strategies)
|
|
422
|
+
for (const record of records) {
|
|
423
|
+
try {
|
|
424
|
+
await this.searchService.index(record)
|
|
425
|
+
result.recordsIndexed++
|
|
426
|
+
} catch (error) {
|
|
427
|
+
searchDebugWarn('SearchIndexer', 'Failed to index record', {
|
|
428
|
+
entityId: params.entityId,
|
|
429
|
+
recordId: record.recordId,
|
|
430
|
+
error: error instanceof Error ? error.message : error,
|
|
431
|
+
})
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
page++
|
|
436
|
+
hasMore = items.length === pageSize
|
|
437
|
+
} catch (error) {
|
|
438
|
+
result.success = false
|
|
439
|
+
result.errors.push({
|
|
440
|
+
entityId: params.entityId,
|
|
441
|
+
error: error instanceof Error ? error.message : String(error),
|
|
442
|
+
})
|
|
443
|
+
break
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
return result
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Reindex all enabled entities via all configured strategies.
|
|
452
|
+
*/
|
|
453
|
+
async reindexAll(params: {
|
|
454
|
+
tenantId: string
|
|
455
|
+
organizationId?: string | null
|
|
456
|
+
purgeFirst?: boolean
|
|
457
|
+
}): Promise<ReindexResult> {
|
|
458
|
+
const result: ReindexResult = {
|
|
459
|
+
success: true,
|
|
460
|
+
entitiesProcessed: 0,
|
|
461
|
+
recordsIndexed: 0,
|
|
462
|
+
errors: [],
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
const enabledEntities = this.listEnabledEntities()
|
|
466
|
+
|
|
467
|
+
for (const entityId of enabledEntities) {
|
|
468
|
+
const entityResult = await this.reindexEntity({
|
|
469
|
+
entityId,
|
|
470
|
+
tenantId: params.tenantId,
|
|
471
|
+
organizationId: params.organizationId,
|
|
472
|
+
purgeFirst: params.purgeFirst,
|
|
473
|
+
})
|
|
474
|
+
|
|
475
|
+
result.entitiesProcessed++
|
|
476
|
+
result.recordsIndexed += entityResult.recordsIndexed
|
|
477
|
+
result.errors.push(...entityResult.errors)
|
|
478
|
+
|
|
479
|
+
if (!entityResult.success) {
|
|
480
|
+
result.success = false
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
return result
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Bulk index multiple records.
|
|
489
|
+
*/
|
|
490
|
+
async bulkIndexRecords(params: IndexRecordParams[]): Promise<void> {
|
|
491
|
+
const indexableRecords: IndexableRecord[] = []
|
|
492
|
+
|
|
493
|
+
for (const param of params) {
|
|
494
|
+
const config = this.entityConfigMap.get(param.entityId)
|
|
495
|
+
if (!config || config.enabled === false) {
|
|
496
|
+
continue
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
const buildContext: SearchBuildContext = {
|
|
500
|
+
record: param.record,
|
|
501
|
+
customFields: param.customFields ?? {},
|
|
502
|
+
organizationId: param.organizationId,
|
|
503
|
+
tenantId: param.tenantId,
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
let presenter: SearchResultPresenter | undefined
|
|
507
|
+
if (config.formatResult) {
|
|
508
|
+
try {
|
|
509
|
+
const result = await config.formatResult(buildContext)
|
|
510
|
+
if (result) presenter = result
|
|
511
|
+
} catch {
|
|
512
|
+
// Skip presenter on error
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
let url: string | undefined
|
|
517
|
+
if (config.resolveUrl) {
|
|
518
|
+
try {
|
|
519
|
+
const result = await config.resolveUrl(buildContext)
|
|
520
|
+
if (result) url = result
|
|
521
|
+
} catch {
|
|
522
|
+
// Skip URL on error
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
let links: SearchResultLink[] | undefined
|
|
527
|
+
if (config.resolveLinks) {
|
|
528
|
+
try {
|
|
529
|
+
const result = await config.resolveLinks(buildContext)
|
|
530
|
+
if (result) links = result
|
|
531
|
+
} catch {
|
|
532
|
+
// Skip links on error
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
indexableRecords.push({
|
|
537
|
+
entityId: param.entityId,
|
|
538
|
+
recordId: param.recordId,
|
|
539
|
+
tenantId: param.tenantId,
|
|
540
|
+
organizationId: param.organizationId,
|
|
541
|
+
fields: param.record,
|
|
542
|
+
presenter,
|
|
543
|
+
url,
|
|
544
|
+
links,
|
|
545
|
+
})
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if (indexableRecords.length > 0) {
|
|
549
|
+
await this.searchService.bulkIndex(indexableRecords)
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* List all enabled entity IDs from the module configurations.
|
|
555
|
+
*/
|
|
556
|
+
listEnabledEntities(): EntityId[] {
|
|
557
|
+
return Array.from(this.entityConfigMap.keys())
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
/**
|
|
561
|
+
* Get the fulltext strategy from the search service.
|
|
562
|
+
*/
|
|
563
|
+
private getFulltextStrategy(): FullTextSearchStrategy | undefined {
|
|
564
|
+
const strategy = this.searchService.getStrategy('fulltext')
|
|
565
|
+
if (!strategy) return undefined
|
|
566
|
+
return strategy as FullTextSearchStrategy
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
/**
|
|
570
|
+
* Reindex a single entity type to fulltext search.
|
|
571
|
+
* This fetches all records from the database and re-indexes them to fulltext only.
|
|
572
|
+
*
|
|
573
|
+
* When `useQueue` is true, batches are enqueued for background processing by workers.
|
|
574
|
+
* When `useQueue` is false (default), batches are indexed directly (blocking).
|
|
575
|
+
*/
|
|
576
|
+
async reindexEntityToFulltext(params: ReindexEntityParams): Promise<ReindexResult> {
|
|
577
|
+
const result: ReindexResult = {
|
|
578
|
+
success: true,
|
|
579
|
+
entitiesProcessed: 0,
|
|
580
|
+
recordsIndexed: 0,
|
|
581
|
+
recordsDropped: 0,
|
|
582
|
+
jobsEnqueued: 0,
|
|
583
|
+
errors: [],
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
const fulltext = this.getFulltextStrategy()
|
|
587
|
+
if (!fulltext) {
|
|
588
|
+
result.success = false
|
|
589
|
+
result.errors.push({ entityId: params.entityId, error: 'Fulltext strategy not available' })
|
|
590
|
+
return result
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// If useQueue is requested but no queue is available, return error
|
|
594
|
+
if (params.useQueue && !this.fulltextQueue) {
|
|
595
|
+
result.success = false
|
|
596
|
+
result.errors.push({ entityId: params.entityId, error: 'Fulltext queue not configured for queue-based reindexing' })
|
|
597
|
+
return result
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
if (!this.queryEngine) {
|
|
601
|
+
result.success = false
|
|
602
|
+
result.errors.push({ entityId: params.entityId, error: 'QueryEngine not available for reindexing' })
|
|
603
|
+
return result
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
const config = this.entityConfigMap.get(params.entityId)
|
|
607
|
+
if (!config) {
|
|
608
|
+
result.success = false
|
|
609
|
+
result.errors.push({ entityId: params.entityId, error: 'Entity not configured for search' })
|
|
610
|
+
return result
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
try {
|
|
614
|
+
params.onProgress?.({
|
|
615
|
+
entityId: params.entityId,
|
|
616
|
+
phase: 'starting',
|
|
617
|
+
processed: 0,
|
|
618
|
+
})
|
|
619
|
+
|
|
620
|
+
// Recreate index if requested (default: true)
|
|
621
|
+
if (params.recreateIndex !== false) {
|
|
622
|
+
await fulltext.recreateIndex(params.tenantId)
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
// Fetch and index records with pagination
|
|
626
|
+
const pageSize = 200
|
|
627
|
+
let page = 1
|
|
628
|
+
let totalProcessed = 0
|
|
629
|
+
let jobsEnqueued = 0
|
|
630
|
+
|
|
631
|
+
for (;;) {
|
|
632
|
+
params.onProgress?.({
|
|
633
|
+
entityId: params.entityId,
|
|
634
|
+
phase: 'fetching',
|
|
635
|
+
processed: totalProcessed,
|
|
636
|
+
})
|
|
637
|
+
|
|
638
|
+
try {
|
|
639
|
+
const queryResult = await this.queryEngine.query(params.entityId, {
|
|
640
|
+
tenantId: params.tenantId,
|
|
641
|
+
organizationId: params.organizationId ?? undefined,
|
|
642
|
+
page: { page, pageSize },
|
|
643
|
+
})
|
|
644
|
+
|
|
645
|
+
if (!queryResult.items.length) {
|
|
646
|
+
break
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
params.onProgress?.({
|
|
650
|
+
entityId: params.entityId,
|
|
651
|
+
phase: 'indexing',
|
|
652
|
+
processed: totalProcessed,
|
|
653
|
+
total: queryResult.total,
|
|
654
|
+
})
|
|
655
|
+
|
|
656
|
+
// Build IndexableRecords for this batch
|
|
657
|
+
const { records: indexableRecords, dropped } = await this.buildIndexableRecords(
|
|
658
|
+
params.entityId,
|
|
659
|
+
params.tenantId,
|
|
660
|
+
params.organizationId ?? null,
|
|
661
|
+
queryResult.items,
|
|
662
|
+
config,
|
|
663
|
+
)
|
|
664
|
+
result.recordsDropped = (result.recordsDropped ?? 0) + dropped
|
|
665
|
+
|
|
666
|
+
// Index to fulltext - either via queue or directly
|
|
667
|
+
if (indexableRecords.length > 0) {
|
|
668
|
+
if (params.useQueue && this.fulltextQueue) {
|
|
669
|
+
// Enqueue batch for background processing - only pass minimal references
|
|
670
|
+
// Worker will load fresh data from entity_indexes table
|
|
671
|
+
await this.fulltextQueue.enqueue({
|
|
672
|
+
jobType: 'batch-index',
|
|
673
|
+
tenantId: params.tenantId,
|
|
674
|
+
organizationId: params.organizationId,
|
|
675
|
+
records: indexableRecords.map((r) => ({ entityId: r.entityId, recordId: r.recordId })),
|
|
676
|
+
})
|
|
677
|
+
jobsEnqueued += 1
|
|
678
|
+
totalProcessed += indexableRecords.length
|
|
679
|
+
} else {
|
|
680
|
+
// Direct indexing (blocking)
|
|
681
|
+
try {
|
|
682
|
+
await fulltext.bulkIndex(indexableRecords)
|
|
683
|
+
totalProcessed += indexableRecords.length
|
|
684
|
+
} catch (indexError) {
|
|
685
|
+
// Log error but continue with remaining batches
|
|
686
|
+
const errorMsg = indexError instanceof Error ? indexError.message : String(indexError)
|
|
687
|
+
result.errors.push({
|
|
688
|
+
entityId: params.entityId,
|
|
689
|
+
error: `Batch ${page} failed: ${errorMsg}`,
|
|
690
|
+
})
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
if (queryResult.items.length < pageSize) {
|
|
696
|
+
break
|
|
697
|
+
}
|
|
698
|
+
page += 1
|
|
699
|
+
|
|
700
|
+
// Safety check to prevent infinite loops
|
|
701
|
+
if (page > MAX_PAGES) {
|
|
702
|
+
break
|
|
703
|
+
}
|
|
704
|
+
} catch (queryError) {
|
|
705
|
+
const errorMsg = queryError instanceof Error ? queryError.message : String(queryError)
|
|
706
|
+
result.errors.push({
|
|
707
|
+
entityId: params.entityId,
|
|
708
|
+
error: `Query failed: ${errorMsg}`,
|
|
709
|
+
})
|
|
710
|
+
break
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
result.entitiesProcessed = 1
|
|
715
|
+
result.recordsIndexed = totalProcessed
|
|
716
|
+
result.jobsEnqueued = jobsEnqueued
|
|
717
|
+
|
|
718
|
+
params.onProgress?.({
|
|
719
|
+
entityId: params.entityId,
|
|
720
|
+
phase: 'complete',
|
|
721
|
+
processed: totalProcessed,
|
|
722
|
+
total: totalProcessed,
|
|
723
|
+
})
|
|
724
|
+
} catch (error) {
|
|
725
|
+
result.success = false
|
|
726
|
+
result.errors.push({
|
|
727
|
+
entityId: params.entityId,
|
|
728
|
+
error: error instanceof Error ? error.message : String(error),
|
|
729
|
+
})
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
return result
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
/**
|
|
736
|
+
* Reindex all enabled entities to fulltext search.
|
|
737
|
+
*
|
|
738
|
+
* When `useQueue` is true, batches are enqueued for background processing by workers.
|
|
739
|
+
* When `useQueue` is false (default), batches are indexed directly (blocking).
|
|
740
|
+
*/
|
|
741
|
+
async reindexAllToFulltext(params: ReindexAllParams): Promise<ReindexResult> {
|
|
742
|
+
const result: ReindexResult = {
|
|
743
|
+
success: true,
|
|
744
|
+
entitiesProcessed: 0,
|
|
745
|
+
recordsIndexed: 0,
|
|
746
|
+
recordsDropped: 0,
|
|
747
|
+
jobsEnqueued: 0,
|
|
748
|
+
errors: [],
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
const fulltext = this.getFulltextStrategy()
|
|
752
|
+
if (!fulltext) {
|
|
753
|
+
result.success = false
|
|
754
|
+
result.errors.push({ entityId: 'all' as EntityId, error: 'Fulltext strategy not available' })
|
|
755
|
+
return result
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// Recreate index once before processing all entities
|
|
759
|
+
if (params.recreateIndex !== false) {
|
|
760
|
+
await fulltext.recreateIndex(params.tenantId)
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
const entities = this.listEnabledEntities()
|
|
764
|
+
|
|
765
|
+
for (const entityId of entities) {
|
|
766
|
+
const entityResult = await this.reindexEntityToFulltext({
|
|
767
|
+
entityId,
|
|
768
|
+
tenantId: params.tenantId,
|
|
769
|
+
organizationId: params.organizationId,
|
|
770
|
+
recreateIndex: false, // Already recreated above
|
|
771
|
+
onProgress: params.onProgress,
|
|
772
|
+
useQueue: params.useQueue,
|
|
773
|
+
})
|
|
774
|
+
|
|
775
|
+
result.entitiesProcessed += entityResult.entitiesProcessed
|
|
776
|
+
result.recordsIndexed += entityResult.recordsIndexed
|
|
777
|
+
result.recordsDropped = (result.recordsDropped ?? 0) + (entityResult.recordsDropped ?? 0)
|
|
778
|
+
result.jobsEnqueued = (result.jobsEnqueued ?? 0) + (entityResult.jobsEnqueued ?? 0)
|
|
779
|
+
result.errors.push(...entityResult.errors)
|
|
780
|
+
|
|
781
|
+
if (!entityResult.success) {
|
|
782
|
+
result.success = false
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
return result
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
/**
|
|
790
|
+
* Reindex a single entity type to vector search.
|
|
791
|
+
* This fetches all records from the database and enqueues them for vector indexing.
|
|
792
|
+
*
|
|
793
|
+
* When `useQueue` is true (default), record IDs are enqueued for background processing by workers.
|
|
794
|
+
* When `useQueue` is false, records are indexed directly (blocking).
|
|
795
|
+
*/
|
|
796
|
+
async reindexEntityToVector(params: ReindexEntityParams & { purgeFirst?: boolean }): Promise<ReindexResult> {
|
|
797
|
+
searchDebug('SearchIndexer', 'reindexEntityToVector called', {
|
|
798
|
+
entityId: params.entityId,
|
|
799
|
+
tenantId: params.tenantId,
|
|
800
|
+
organizationId: params.organizationId,
|
|
801
|
+
useQueue: params.useQueue,
|
|
802
|
+
purgeFirst: params.purgeFirst,
|
|
803
|
+
})
|
|
804
|
+
|
|
805
|
+
const result: ReindexResult = {
|
|
806
|
+
success: true,
|
|
807
|
+
entitiesProcessed: 0,
|
|
808
|
+
recordsIndexed: 0,
|
|
809
|
+
recordsDropped: 0,
|
|
810
|
+
jobsEnqueued: 0,
|
|
811
|
+
errors: [],
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
// If useQueue is requested but no queue is available, return error
|
|
815
|
+
if (params.useQueue !== false && !this.vectorQueue) {
|
|
816
|
+
result.success = false
|
|
817
|
+
result.errors.push({ entityId: params.entityId, error: 'Vector queue not configured for queue-based reindexing' })
|
|
818
|
+
return result
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
if (!this.queryEngine) {
|
|
822
|
+
result.success = false
|
|
823
|
+
result.errors.push({ entityId: params.entityId, error: 'QueryEngine not available for reindexing' })
|
|
824
|
+
return result
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
const config = this.entityConfigMap.get(params.entityId)
|
|
828
|
+
if (!config) {
|
|
829
|
+
result.success = false
|
|
830
|
+
result.errors.push({ entityId: params.entityId, error: 'Entity not configured for search' })
|
|
831
|
+
return result
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
try {
|
|
835
|
+
params.onProgress?.({
|
|
836
|
+
entityId: params.entityId,
|
|
837
|
+
phase: 'starting',
|
|
838
|
+
processed: 0,
|
|
839
|
+
})
|
|
840
|
+
|
|
841
|
+
// Optionally purge vector index first
|
|
842
|
+
if (params.purgeFirst) {
|
|
843
|
+
try {
|
|
844
|
+
await this.searchService.purge(params.entityId, params.tenantId)
|
|
845
|
+
} catch (error) {
|
|
846
|
+
searchDebugWarn('SearchIndexer', 'Failed to purge entity before vector reindex', {
|
|
847
|
+
entityId: params.entityId,
|
|
848
|
+
error: error instanceof Error ? error.message : error,
|
|
849
|
+
})
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Fetch and enqueue records with pagination
|
|
854
|
+
const pageSize = 200
|
|
855
|
+
let page = 1
|
|
856
|
+
let totalProcessed = 0
|
|
857
|
+
let jobsEnqueued = 0
|
|
858
|
+
|
|
859
|
+
for (;;) {
|
|
860
|
+
params.onProgress?.({
|
|
861
|
+
entityId: params.entityId,
|
|
862
|
+
phase: 'fetching',
|
|
863
|
+
processed: totalProcessed,
|
|
864
|
+
})
|
|
865
|
+
|
|
866
|
+
const queryResult = await this.queryEngine.query(params.entityId, {
|
|
867
|
+
tenantId: params.tenantId,
|
|
868
|
+
organizationId: params.organizationId ?? undefined,
|
|
869
|
+
page: { page, pageSize },
|
|
870
|
+
})
|
|
871
|
+
|
|
872
|
+
if (!queryResult.items.length) break
|
|
873
|
+
|
|
874
|
+
params.onProgress?.({
|
|
875
|
+
entityId: params.entityId,
|
|
876
|
+
phase: 'indexing',
|
|
877
|
+
processed: totalProcessed,
|
|
878
|
+
total: queryResult.total,
|
|
879
|
+
})
|
|
880
|
+
|
|
881
|
+
// Build batch of record references
|
|
882
|
+
const batchRecords: VectorBatchRecord[] = []
|
|
883
|
+
for (const item of queryResult.items) {
|
|
884
|
+
const recordId = String((item as Record<string, unknown>).id ?? '')
|
|
885
|
+
if (!recordId) {
|
|
886
|
+
result.recordsDropped = (result.recordsDropped ?? 0) + 1
|
|
887
|
+
continue
|
|
888
|
+
}
|
|
889
|
+
batchRecords.push({
|
|
890
|
+
entityId: params.entityId,
|
|
891
|
+
recordId,
|
|
892
|
+
})
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
// Enqueue batch for background processing or index directly
|
|
896
|
+
if (batchRecords.length > 0) {
|
|
897
|
+
if (params.useQueue !== false && this.vectorQueue) {
|
|
898
|
+
await this.vectorQueue.enqueue({
|
|
899
|
+
jobType: 'batch-index',
|
|
900
|
+
tenantId: params.tenantId,
|
|
901
|
+
organizationId: params.organizationId ?? null,
|
|
902
|
+
records: batchRecords,
|
|
903
|
+
})
|
|
904
|
+
jobsEnqueued += 1
|
|
905
|
+
totalProcessed += batchRecords.length
|
|
906
|
+
searchDebug('SearchIndexer', 'Enqueued batch for vector indexing', {
|
|
907
|
+
entityId: params.entityId,
|
|
908
|
+
batchSize: batchRecords.length,
|
|
909
|
+
jobsEnqueued,
|
|
910
|
+
totalProcessed,
|
|
911
|
+
})
|
|
912
|
+
} else {
|
|
913
|
+
// Direct indexing (blocking) - index each record via SearchService
|
|
914
|
+
for (const { entityId, recordId } of batchRecords) {
|
|
915
|
+
try {
|
|
916
|
+
await this.indexRecordById({
|
|
917
|
+
entityId: entityId as EntityId,
|
|
918
|
+
recordId,
|
|
919
|
+
tenantId: params.tenantId,
|
|
920
|
+
organizationId: params.organizationId,
|
|
921
|
+
})
|
|
922
|
+
totalProcessed++
|
|
923
|
+
} catch (error) {
|
|
924
|
+
searchDebugWarn('SearchIndexer', 'Failed to index record to vector', {
|
|
925
|
+
entityId,
|
|
926
|
+
recordId,
|
|
927
|
+
error: error instanceof Error ? error.message : error,
|
|
928
|
+
})
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
if (queryResult.items.length < pageSize) break
|
|
935
|
+
page += 1
|
|
936
|
+
|
|
937
|
+
// Safety check to prevent infinite loops
|
|
938
|
+
if (page > MAX_PAGES) {
|
|
939
|
+
searchDebugWarn('SearchIndexer', 'Reached MAX_PAGES limit, stopping pagination', {
|
|
940
|
+
entityId: params.entityId,
|
|
941
|
+
maxPages: MAX_PAGES,
|
|
942
|
+
totalProcessed,
|
|
943
|
+
})
|
|
944
|
+
break
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
result.entitiesProcessed = 1
|
|
949
|
+
result.recordsIndexed = totalProcessed
|
|
950
|
+
result.jobsEnqueued = jobsEnqueued
|
|
951
|
+
|
|
952
|
+
params.onProgress?.({
|
|
953
|
+
entityId: params.entityId,
|
|
954
|
+
phase: 'complete',
|
|
955
|
+
processed: totalProcessed,
|
|
956
|
+
total: totalProcessed,
|
|
957
|
+
})
|
|
958
|
+
} catch (error) {
|
|
959
|
+
result.success = false
|
|
960
|
+
result.errors.push({
|
|
961
|
+
entityId: params.entityId,
|
|
962
|
+
error: error instanceof Error ? error.message : String(error),
|
|
963
|
+
})
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
return result
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
/**
|
|
970
|
+
* Reindex all enabled entities to vector search.
|
|
971
|
+
*
|
|
972
|
+
* When `useQueue` is true (default), batches are enqueued for background processing by workers.
|
|
973
|
+
* When `useQueue` is false, batches are indexed directly (blocking).
|
|
974
|
+
*/
|
|
975
|
+
async reindexAllToVector(params: ReindexAllParams & { purgeFirst?: boolean }): Promise<ReindexResult> {
|
|
976
|
+
const result: ReindexResult = {
|
|
977
|
+
success: true,
|
|
978
|
+
entitiesProcessed: 0,
|
|
979
|
+
recordsIndexed: 0,
|
|
980
|
+
recordsDropped: 0,
|
|
981
|
+
jobsEnqueued: 0,
|
|
982
|
+
errors: [],
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
const entities = this.listEnabledEntities()
|
|
986
|
+
for (const entityId of entities) {
|
|
987
|
+
const entityResult = await this.reindexEntityToVector({
|
|
988
|
+
entityId,
|
|
989
|
+
tenantId: params.tenantId,
|
|
990
|
+
organizationId: params.organizationId,
|
|
991
|
+
onProgress: params.onProgress,
|
|
992
|
+
useQueue: params.useQueue,
|
|
993
|
+
purgeFirst: params.purgeFirst,
|
|
994
|
+
})
|
|
995
|
+
|
|
996
|
+
result.entitiesProcessed += entityResult.entitiesProcessed
|
|
997
|
+
result.recordsIndexed += entityResult.recordsIndexed
|
|
998
|
+
result.recordsDropped = (result.recordsDropped ?? 0) + (entityResult.recordsDropped ?? 0)
|
|
999
|
+
result.jobsEnqueued = (result.jobsEnqueued ?? 0) + (entityResult.jobsEnqueued ?? 0)
|
|
1000
|
+
result.errors.push(...entityResult.errors)
|
|
1001
|
+
|
|
1002
|
+
if (!entityResult.success) {
|
|
1003
|
+
result.success = false
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
return result
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
/**
|
|
1011
|
+
* Build IndexableRecords from raw query results.
|
|
1012
|
+
* Returns records and count of dropped items (missing id or other validation failures).
|
|
1013
|
+
*/
|
|
1014
|
+
private async buildIndexableRecords(
|
|
1015
|
+
entityId: EntityId,
|
|
1016
|
+
tenantId: string,
|
|
1017
|
+
organizationId: string | null,
|
|
1018
|
+
items: Record<string, unknown>[],
|
|
1019
|
+
config: SearchEntityConfig,
|
|
1020
|
+
): Promise<{ records: IndexableRecord[]; dropped: number }> {
|
|
1021
|
+
const records: IndexableRecord[] = []
|
|
1022
|
+
let dropped = 0
|
|
1023
|
+
|
|
1024
|
+
// Debug: log first item to see structure
|
|
1025
|
+
if (items.length > 0) {
|
|
1026
|
+
searchDebug('SearchIndexer', 'Sample item structure', {
|
|
1027
|
+
entityId,
|
|
1028
|
+
sampleKeys: Object.keys(items[0]),
|
|
1029
|
+
sampleId: items[0].id,
|
|
1030
|
+
hasId: 'id' in items[0],
|
|
1031
|
+
firstName: items[0].first_name,
|
|
1032
|
+
lastName: items[0].last_name,
|
|
1033
|
+
preferredName: items[0].preferred_name,
|
|
1034
|
+
sampleItem: JSON.stringify(items[0]).slice(0, 500),
|
|
1035
|
+
})
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
for (const item of items) {
|
|
1039
|
+
const recordId = String(item.id ?? '')
|
|
1040
|
+
if (!recordId) {
|
|
1041
|
+
searchDebugWarn('SearchIndexer', 'Skipping item without id', { entityId, itemKeys: Object.keys(item) })
|
|
1042
|
+
dropped++
|
|
1043
|
+
continue
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
// Extract custom fields from record
|
|
1047
|
+
const customFields: Record<string, unknown> = {}
|
|
1048
|
+
for (const [key, value] of Object.entries(item)) {
|
|
1049
|
+
if (key.startsWith('cf:') || key.startsWith('cf_')) {
|
|
1050
|
+
const cfKey = key.slice(3) // Remove 'cf:' or 'cf_' prefix (both are 3 chars)
|
|
1051
|
+
customFields[cfKey] = value
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
const buildContext: SearchBuildContext = {
|
|
1056
|
+
record: item,
|
|
1057
|
+
customFields,
|
|
1058
|
+
organizationId,
|
|
1059
|
+
tenantId,
|
|
1060
|
+
queryEngine: this.queryEngine,
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
// Try buildSource first (provides text, presenter, links, checksumSource)
|
|
1064
|
+
let text: string | string[] | undefined
|
|
1065
|
+
let presenter: SearchResultPresenter | undefined
|
|
1066
|
+
let url: string | undefined
|
|
1067
|
+
let links: SearchResultLink[] | undefined
|
|
1068
|
+
let checksumSource: unknown | undefined
|
|
1069
|
+
|
|
1070
|
+
if (config.buildSource) {
|
|
1071
|
+
try {
|
|
1072
|
+
const source = await config.buildSource(buildContext)
|
|
1073
|
+
if (source) {
|
|
1074
|
+
text = source.text
|
|
1075
|
+
if (source.presenter) presenter = source.presenter
|
|
1076
|
+
if (source.links) links = source.links
|
|
1077
|
+
if (source.checksumSource !== undefined) checksumSource = source.checksumSource
|
|
1078
|
+
}
|
|
1079
|
+
} catch (err) {
|
|
1080
|
+
searchDebugWarn('SearchIndexer', 'buildSource failed', {
|
|
1081
|
+
entityId,
|
|
1082
|
+
recordId,
|
|
1083
|
+
error: err instanceof Error ? err.message : err,
|
|
1084
|
+
})
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
// Fall back to formatResult if no presenter from buildSource
|
|
1089
|
+
if (!presenter && config.formatResult) {
|
|
1090
|
+
try {
|
|
1091
|
+
const result = await config.formatResult(buildContext)
|
|
1092
|
+
if (result) presenter = result
|
|
1093
|
+
} catch {
|
|
1094
|
+
// Skip presenter on error
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// Resolve URL if not already set
|
|
1099
|
+
if (!url && config.resolveUrl) {
|
|
1100
|
+
try {
|
|
1101
|
+
const result = await config.resolveUrl(buildContext)
|
|
1102
|
+
if (result) url = result
|
|
1103
|
+
} catch {
|
|
1104
|
+
// Skip URL on error
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
// Resolve links if not already set
|
|
1109
|
+
if (!links && config.resolveLinks) {
|
|
1110
|
+
try {
|
|
1111
|
+
const result = await config.resolveLinks(buildContext)
|
|
1112
|
+
if (result) links = result
|
|
1113
|
+
} catch {
|
|
1114
|
+
// Skip links on error
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
records.push({
|
|
1119
|
+
entityId,
|
|
1120
|
+
recordId,
|
|
1121
|
+
tenantId,
|
|
1122
|
+
organizationId,
|
|
1123
|
+
fields: item,
|
|
1124
|
+
presenter,
|
|
1125
|
+
url,
|
|
1126
|
+
links,
|
|
1127
|
+
text,
|
|
1128
|
+
checksumSource,
|
|
1129
|
+
})
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
searchDebug('SearchIndexer', 'Finished building records', {
|
|
1133
|
+
entityId,
|
|
1134
|
+
inputCount: items.length,
|
|
1135
|
+
outputCount: records.length,
|
|
1136
|
+
dropped,
|
|
1137
|
+
})
|
|
1138
|
+
|
|
1139
|
+
return { records, dropped }
|
|
1140
|
+
}
|
|
1141
|
+
}
|