@open-mercato/search 0.4.2-canary-c02407ff85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/AGENTS.md +678 -0
  2. package/build.mjs +92 -0
  3. package/dist/di.js +157 -0
  4. package/dist/di.js.map +7 -0
  5. package/dist/fulltext/drivers/index.js +21 -0
  6. package/dist/fulltext/drivers/index.js.map +7 -0
  7. package/dist/fulltext/drivers/meilisearch/index.js +320 -0
  8. package/dist/fulltext/drivers/meilisearch/index.js.map +7 -0
  9. package/dist/fulltext/index.js +7 -0
  10. package/dist/fulltext/index.js.map +7 -0
  11. package/dist/fulltext/types.js +1 -0
  12. package/dist/fulltext/types.js.map +7 -0
  13. package/dist/index.js +12 -0
  14. package/dist/index.js.map +7 -0
  15. package/dist/indexer/index.js +8 -0
  16. package/dist/indexer/index.js.map +7 -0
  17. package/dist/indexer/search-indexer.js +848 -0
  18. package/dist/indexer/search-indexer.js.map +7 -0
  19. package/dist/indexer/subscribers/delete.js +41 -0
  20. package/dist/indexer/subscribers/delete.js.map +7 -0
  21. package/dist/lib/debug.js +34 -0
  22. package/dist/lib/debug.js.map +7 -0
  23. package/dist/lib/fallback-presenter.js +107 -0
  24. package/dist/lib/fallback-presenter.js.map +7 -0
  25. package/dist/lib/field-policy.js +75 -0
  26. package/dist/lib/field-policy.js.map +7 -0
  27. package/dist/lib/index.js +19 -0
  28. package/dist/lib/index.js.map +7 -0
  29. package/dist/lib/merger.js +93 -0
  30. package/dist/lib/merger.js.map +7 -0
  31. package/dist/lib/presenter-enricher.js +192 -0
  32. package/dist/lib/presenter-enricher.js.map +7 -0
  33. package/dist/modules/search/acl.js +14 -0
  34. package/dist/modules/search/acl.js.map +7 -0
  35. package/dist/modules/search/ai-tools.js +284 -0
  36. package/dist/modules/search/ai-tools.js.map +7 -0
  37. package/dist/modules/search/api/embeddings/reindex/cancel/route.js +65 -0
  38. package/dist/modules/search/api/embeddings/reindex/cancel/route.js.map +7 -0
  39. package/dist/modules/search/api/embeddings/reindex/route.js +165 -0
  40. package/dist/modules/search/api/embeddings/reindex/route.js.map +7 -0
  41. package/dist/modules/search/api/embeddings/route.js +246 -0
  42. package/dist/modules/search/api/embeddings/route.js.map +7 -0
  43. package/dist/modules/search/api/index/route.js +245 -0
  44. package/dist/modules/search/api/index/route.js.map +7 -0
  45. package/dist/modules/search/api/reindex/cancel/route.js +65 -0
  46. package/dist/modules/search/api/reindex/cancel/route.js.map +7 -0
  47. package/dist/modules/search/api/reindex/route.js +332 -0
  48. package/dist/modules/search/api/reindex/route.js.map +7 -0
  49. package/dist/modules/search/api/search/global/route.js +100 -0
  50. package/dist/modules/search/api/search/global/route.js.map +7 -0
  51. package/dist/modules/search/api/search/route.js +101 -0
  52. package/dist/modules/search/api/search/route.js.map +7 -0
  53. package/dist/modules/search/api/settings/fulltext/route.js +55 -0
  54. package/dist/modules/search/api/settings/fulltext/route.js.map +7 -0
  55. package/dist/modules/search/api/settings/global-search/route.js +80 -0
  56. package/dist/modules/search/api/settings/global-search/route.js.map +7 -0
  57. package/dist/modules/search/api/settings/route.js +118 -0
  58. package/dist/modules/search/api/settings/route.js.map +7 -0
  59. package/dist/modules/search/api/settings/vector-store/route.js +77 -0
  60. package/dist/modules/search/api/settings/vector-store/route.js.map +7 -0
  61. package/dist/modules/search/backend/config/search/page.js +10 -0
  62. package/dist/modules/search/backend/config/search/page.js.map +7 -0
  63. package/dist/modules/search/backend/config/search/page.meta.js +24 -0
  64. package/dist/modules/search/backend/config/search/page.meta.js.map +7 -0
  65. package/dist/modules/search/cli.js +698 -0
  66. package/dist/modules/search/cli.js.map +7 -0
  67. package/dist/modules/search/di.js +32 -0
  68. package/dist/modules/search/di.js.map +7 -0
  69. package/dist/modules/search/frontend/components/GlobalSearchDialog.js +357 -0
  70. package/dist/modules/search/frontend/components/GlobalSearchDialog.js.map +7 -0
  71. package/dist/modules/search/frontend/components/HybridSearchTable.js +343 -0
  72. package/dist/modules/search/frontend/components/HybridSearchTable.js.map +7 -0
  73. package/dist/modules/search/frontend/components/SearchSettingsPageClient.js +303 -0
  74. package/dist/modules/search/frontend/components/SearchSettingsPageClient.js.map +7 -0
  75. package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js +360 -0
  76. package/dist/modules/search/frontend/components/sections/FulltextSearchSection.js.map +7 -0
  77. package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js +101 -0
  78. package/dist/modules/search/frontend/components/sections/GlobalSearchSection.js.map +7 -0
  79. package/dist/modules/search/frontend/components/sections/VectorSearchSection.js +608 -0
  80. package/dist/modules/search/frontend/components/sections/VectorSearchSection.js.map +7 -0
  81. package/dist/modules/search/frontend/index.js +9 -0
  82. package/dist/modules/search/frontend/index.js.map +7 -0
  83. package/dist/modules/search/frontend/utils.js +41 -0
  84. package/dist/modules/search/frontend/utils.js.map +7 -0
  85. package/dist/modules/search/i18n/de.json +61 -0
  86. package/dist/modules/search/i18n/en.json +72 -0
  87. package/dist/modules/search/i18n/es.json +61 -0
  88. package/dist/modules/search/i18n/pl.json +61 -0
  89. package/dist/modules/search/index.js +11 -0
  90. package/dist/modules/search/index.js.map +7 -0
  91. package/dist/modules/search/lib/auto-indexing.js +29 -0
  92. package/dist/modules/search/lib/auto-indexing.js.map +7 -0
  93. package/dist/modules/search/lib/embedding-config.js +131 -0
  94. package/dist/modules/search/lib/embedding-config.js.map +7 -0
  95. package/dist/modules/search/lib/global-search-config.js +45 -0
  96. package/dist/modules/search/lib/global-search-config.js.map +7 -0
  97. package/dist/modules/search/lib/reindex-lock.js +99 -0
  98. package/dist/modules/search/lib/reindex-lock.js.map +7 -0
  99. package/dist/modules/search/subscribers/fulltext_upsert.js +64 -0
  100. package/dist/modules/search/subscribers/fulltext_upsert.js.map +7 -0
  101. package/dist/modules/search/subscribers/vector_delete.js +58 -0
  102. package/dist/modules/search/subscribers/vector_delete.js.map +7 -0
  103. package/dist/modules/search/subscribers/vector_purge.js +142 -0
  104. package/dist/modules/search/subscribers/vector_purge.js.map +7 -0
  105. package/dist/modules/search/subscribers/vector_upsert.js +58 -0
  106. package/dist/modules/search/subscribers/vector_upsert.js.map +7 -0
  107. package/dist/modules/search/workers/fulltext-index.worker.js +240 -0
  108. package/dist/modules/search/workers/fulltext-index.worker.js.map +7 -0
  109. package/dist/modules/search/workers/vector-index.worker.js +234 -0
  110. package/dist/modules/search/workers/vector-index.worker.js.map +7 -0
  111. package/dist/queue/fulltext-indexing.js +15 -0
  112. package/dist/queue/fulltext-indexing.js.map +7 -0
  113. package/dist/queue/index.js +3 -0
  114. package/dist/queue/index.js.map +7 -0
  115. package/dist/queue/vector-indexing.js +15 -0
  116. package/dist/queue/vector-indexing.js.map +7 -0
  117. package/dist/service.js +286 -0
  118. package/dist/service.js.map +7 -0
  119. package/dist/strategies/fulltext.strategy.js +116 -0
  120. package/dist/strategies/fulltext.strategy.js.map +7 -0
  121. package/dist/strategies/index.js +12 -0
  122. package/dist/strategies/index.js.map +7 -0
  123. package/dist/strategies/token.strategy.js +80 -0
  124. package/dist/strategies/token.strategy.js.map +7 -0
  125. package/dist/strategies/vector.strategy.js +137 -0
  126. package/dist/strategies/vector.strategy.js.map +7 -0
  127. package/dist/types.js +1 -0
  128. package/dist/types.js.map +7 -0
  129. package/dist/vector/drivers/chromadb/index.js +44 -0
  130. package/dist/vector/drivers/chromadb/index.js.map +7 -0
  131. package/dist/vector/drivers/index.js +9 -0
  132. package/dist/vector/drivers/index.js.map +7 -0
  133. package/dist/vector/drivers/pgvector/index.js +509 -0
  134. package/dist/vector/drivers/pgvector/index.js.map +7 -0
  135. package/dist/vector/drivers/qdrant/index.js +44 -0
  136. package/dist/vector/drivers/qdrant/index.js.map +7 -0
  137. package/dist/vector/index.js +4 -0
  138. package/dist/vector/index.js.map +7 -0
  139. package/dist/vector/lib/vector-logs.js +33 -0
  140. package/dist/vector/lib/vector-logs.js.map +7 -0
  141. package/dist/vector/services/checksum.js +20 -0
  142. package/dist/vector/services/checksum.js.map +7 -0
  143. package/dist/vector/services/embedding.js +222 -0
  144. package/dist/vector/services/embedding.js.map +7 -0
  145. package/dist/vector/services/index.js +4 -0
  146. package/dist/vector/services/index.js.map +7 -0
  147. package/dist/vector/services/vector-index.service.js +960 -0
  148. package/dist/vector/services/vector-index.service.js.map +7 -0
  149. package/dist/vector/types/pg.d.js +1 -0
  150. package/dist/vector/types/pg.d.js.map +7 -0
  151. package/dist/vector/types.js +75 -0
  152. package/dist/vector/types.js.map +7 -0
  153. package/jest.config.cjs +19 -0
  154. package/package.json +142 -0
  155. package/src/__tests__/queue.test.ts +148 -0
  156. package/src/__tests__/service.test.ts +345 -0
  157. package/src/__tests__/workers.test.ts +319 -0
  158. package/src/di.ts +291 -0
  159. package/src/fulltext/drivers/index.ts +41 -0
  160. package/src/fulltext/drivers/meilisearch/index.ts +410 -0
  161. package/src/fulltext/index.ts +13 -0
  162. package/src/fulltext/types.ts +115 -0
  163. package/src/index.ts +36 -0
  164. package/src/indexer/index.ts +13 -0
  165. package/src/indexer/search-indexer.ts +1141 -0
  166. package/src/indexer/subscribers/delete.ts +49 -0
  167. package/src/lib/debug.ts +46 -0
  168. package/src/lib/fallback-presenter.ts +106 -0
  169. package/src/lib/field-policy.ts +169 -0
  170. package/src/lib/index.ts +13 -0
  171. package/src/lib/merger.ts +159 -0
  172. package/src/lib/presenter-enricher.ts +323 -0
  173. package/src/modules/search/README.md +694 -0
  174. package/src/modules/search/acl.ts +10 -0
  175. package/src/modules/search/ai-tools.ts +467 -0
  176. package/src/modules/search/api/embeddings/reindex/cancel/route.ts +77 -0
  177. package/src/modules/search/api/embeddings/reindex/route.ts +197 -0
  178. package/src/modules/search/api/embeddings/route.ts +304 -0
  179. package/src/modules/search/api/index/route.ts +297 -0
  180. package/src/modules/search/api/reindex/cancel/route.ts +77 -0
  181. package/src/modules/search/api/reindex/route.ts +419 -0
  182. package/src/modules/search/api/search/global/route.ts +120 -0
  183. package/src/modules/search/api/search/route.ts +121 -0
  184. package/src/modules/search/api/settings/fulltext/route.ts +82 -0
  185. package/src/modules/search/api/settings/global-search/route.ts +91 -0
  186. package/src/modules/search/api/settings/route.ts +187 -0
  187. package/src/modules/search/api/settings/vector-store/route.ts +105 -0
  188. package/src/modules/search/backend/config/search/page.meta.ts +22 -0
  189. package/src/modules/search/backend/config/search/page.tsx +12 -0
  190. package/src/modules/search/cli.ts +818 -0
  191. package/src/modules/search/di.ts +50 -0
  192. package/src/modules/search/frontend/components/GlobalSearchDialog.tsx +436 -0
  193. package/src/modules/search/frontend/components/HybridSearchTable.tsx +418 -0
  194. package/src/modules/search/frontend/components/SearchSettingsPageClient.tsx +476 -0
  195. package/src/modules/search/frontend/components/sections/FulltextSearchSection.tsx +624 -0
  196. package/src/modules/search/frontend/components/sections/GlobalSearchSection.tsx +124 -0
  197. package/src/modules/search/frontend/components/sections/VectorSearchSection.tsx +943 -0
  198. package/src/modules/search/frontend/index.ts +3 -0
  199. package/src/modules/search/frontend/utils.ts +82 -0
  200. package/src/modules/search/i18n/de.json +61 -0
  201. package/src/modules/search/i18n/en.json +72 -0
  202. package/src/modules/search/i18n/es.json +61 -0
  203. package/src/modules/search/i18n/pl.json +61 -0
  204. package/src/modules/search/index.ts +9 -0
  205. package/src/modules/search/lib/auto-indexing.ts +35 -0
  206. package/src/modules/search/lib/embedding-config.ts +161 -0
  207. package/src/modules/search/lib/global-search-config.ts +69 -0
  208. package/src/modules/search/lib/reindex-lock.ts +201 -0
  209. package/src/modules/search/subscribers/fulltext_upsert.ts +83 -0
  210. package/src/modules/search/subscribers/vector_delete.ts +75 -0
  211. package/src/modules/search/subscribers/vector_purge.ts +161 -0
  212. package/src/modules/search/subscribers/vector_upsert.ts +75 -0
  213. package/src/modules/search/workers/fulltext-index.worker.ts +318 -0
  214. package/src/modules/search/workers/vector-index.worker.ts +292 -0
  215. package/src/queue/fulltext-indexing.ts +87 -0
  216. package/src/queue/index.ts +2 -0
  217. package/src/queue/vector-indexing.ts +66 -0
  218. package/src/service.ts +397 -0
  219. package/src/strategies/fulltext.strategy.ts +155 -0
  220. package/src/strategies/index.ts +17 -0
  221. package/src/strategies/token.strategy.ts +153 -0
  222. package/src/strategies/vector.strategy.ts +234 -0
  223. package/src/types.ts +38 -0
  224. package/src/vector/drivers/chromadb/index.ts +49 -0
  225. package/src/vector/drivers/index.ts +4 -0
  226. package/src/vector/drivers/pgvector/index.ts +627 -0
  227. package/src/vector/drivers/qdrant/index.ts +49 -0
  228. package/src/vector/index.ts +3 -0
  229. package/src/vector/lib/vector-logs.ts +46 -0
  230. package/src/vector/services/checksum.ts +18 -0
  231. package/src/vector/services/embedding.ts +275 -0
  232. package/src/vector/services/index.ts +3 -0
  233. package/src/vector/services/vector-index.service.ts +1234 -0
  234. package/src/vector/types/pg.d.ts +1 -0
  235. package/src/vector/types.ts +220 -0
  236. package/tsconfig.json +9 -0
  237. package/watch.mjs +6 -0
@@ -0,0 +1,49 @@
1
+ import type { SearchIndexer } from '../search-indexer'
2
+ import type { EntityId } from '@open-mercato/shared/modules/entities'
3
+ import type { SearchDeletePayload } from '@open-mercato/shared/modules/search'
4
+ import { searchDebugWarn, searchError } from '../../lib/debug'
5
+
6
+ /**
7
+ * Event subscriber metadata.
8
+ */
9
+ export const metadata = {
10
+ event: 'search.delete_record',
11
+ persistent: false,
12
+ }
13
+
14
+ /**
15
+ * Factory to create the search delete subscriber handler.
16
+ */
17
+ export function createSearchDeleteSubscriber(indexer: SearchIndexer) {
18
+ return async function handle(payload: SearchDeletePayload): Promise<void> {
19
+ const entityId = String(payload?.entityId || '') as EntityId
20
+ const recordId = String(payload?.recordId || '')
21
+ const tenantId = String(payload?.tenantId || '')
22
+
23
+ if (!entityId || !recordId || !tenantId) {
24
+ searchDebugWarn('search.delete_record', 'Missing required fields', {
25
+ entityId,
26
+ recordId,
27
+ tenantId,
28
+ })
29
+ return
30
+ }
31
+
32
+ try {
33
+ await indexer.deleteRecord({
34
+ entityId,
35
+ recordId,
36
+ tenantId,
37
+ })
38
+ } catch (error) {
39
+ searchError('search.delete_record', 'Failed to delete record', {
40
+ entityId,
41
+ recordId,
42
+ error: error instanceof Error ? error.message : error,
43
+ })
44
+ throw error
45
+ }
46
+ }
47
+ }
48
+
49
+ export default createSearchDeleteSubscriber
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Debug utilities for search module.
3
+ *
4
+ * Set OM_SEARCH_DEBUG=true to enable debug logging.
5
+ */
6
+
7
+ export function isSearchDebugEnabled(): boolean {
8
+ const raw = (process.env.OM_SEARCH_DEBUG ?? '').toLowerCase()
9
+ return raw === '1' || raw === 'true' || raw === 'yes' || raw === 'on'
10
+ }
11
+
12
+ /**
13
+ * Log a debug message if OM_SEARCH_DEBUG is enabled.
14
+ */
15
+ export function searchDebug(prefix: string, message: string, data?: Record<string, unknown>): void {
16
+ if (!isSearchDebugEnabled()) return
17
+ if (data) {
18
+ console.log(`[${prefix}] ${message}`, data)
19
+ } else {
20
+ console.log(`[${prefix}] ${message}`)
21
+ }
22
+ }
23
+
24
+ /**
25
+ * Log a warning message if OM_SEARCH_DEBUG is enabled.
26
+ */
27
+ export function searchDebugWarn(prefix: string, message: string, data?: Record<string, unknown>): void {
28
+ if (!isSearchDebugEnabled()) return
29
+ if (data) {
30
+ console.warn(`[${prefix}] ${message}`, data)
31
+ } else {
32
+ console.warn(`[${prefix}] ${message}`)
33
+ }
34
+ }
35
+
36
+ /**
37
+ * Log an error message (always logs, not gated by debug flag).
38
+ * Errors should always be visible for troubleshooting.
39
+ */
40
+ export function searchError(prefix: string, message: string, data?: Record<string, unknown>): void {
41
+ if (data) {
42
+ console.error(`[${prefix}] ${message}`, data)
43
+ } else {
44
+ console.error(`[${prefix}] ${message}`)
45
+ }
46
+ }
@@ -0,0 +1,106 @@
1
+ import type { SearchResultPresenter } from '@open-mercato/shared/modules/search'
2
+
3
+ // Fields to check for title, in priority order
4
+ const TITLE_FIELDS = [
5
+ 'display_name', 'displayName',
6
+ 'name', 'title', 'label',
7
+ 'full_name', 'fullName',
8
+ 'brand_name', 'brandName',
9
+ 'legal_name', 'legalName',
10
+ 'first_name', 'firstName',
11
+ 'last_name', 'lastName',
12
+ 'preferred_name', 'preferredName',
13
+ 'email', 'primary_email', 'primaryEmail',
14
+ 'code', 'sku', 'reference',
15
+ 'identifier', 'slug',
16
+ ]
17
+
18
+ // Fields to check for subtitle
19
+ const SUBTITLE_FIELDS = [
20
+ 'description', 'summary', 'notes',
21
+ 'email', 'primary_email', 'primaryEmail',
22
+ 'phone', 'primary_phone', 'primaryPhone',
23
+ 'status', 'type', 'kind', 'category',
24
+ ]
25
+
26
+ function findFirstValue(doc: Record<string, unknown>, fields: string[]): string | null {
27
+ for (const field of fields) {
28
+ const value = doc[field]
29
+ if (value != null && String(value).trim().length > 0) {
30
+ return String(value).trim()
31
+ }
32
+ }
33
+ return null
34
+ }
35
+
36
+ function findAnyStringValue(doc: Record<string, unknown>, excludeFields: Set<string>): string | null {
37
+ // Skip these fields as they're not meaningful for display
38
+ const skipFields = new Set([
39
+ 'id', 'tenant_id', 'tenantId', 'organization_id', 'organizationId',
40
+ 'created_at', 'createdAt', 'updated_at', 'updatedAt', 'deleted_at', 'deletedAt',
41
+ ...excludeFields,
42
+ ])
43
+
44
+ for (const [key, value] of Object.entries(doc)) {
45
+ if (skipFields.has(key)) continue
46
+ if (key.startsWith('cf:') || key.startsWith('cf_')) continue
47
+ if (typeof value === 'string' && value.trim().length > 0 && value.length < 200) {
48
+ return value.trim()
49
+ }
50
+ }
51
+ return null
52
+ }
53
+
54
+ function formatEntityLabel(entityId: string): string {
55
+ const entityName = entityId.split(':')[1] ?? entityId
56
+ return entityName
57
+ .replace(/_/g, ' ')
58
+ .replace(/\b\w/g, (c) => c.toUpperCase())
59
+ }
60
+
61
+ /**
62
+ * Extract a presenter from doc fields when no search.ts config exists.
63
+ *
64
+ * TODO: This is a basic implementation. Future improvements could include:
65
+ * - Entity-type specific field mappings
66
+ * - Smarter field combination (e.g., first_name + last_name)
67
+ * - Custom field (cf:*) inspection for user-defined display fields
68
+ * - Configuration for default presenter fields per entity type
69
+ */
70
+ export function extractFallbackPresenter(
71
+ doc: Record<string, unknown>,
72
+ entityId: string,
73
+ recordId: string,
74
+ ): SearchResultPresenter {
75
+ const entityLabel = formatEntityLabel(entityId)
76
+
77
+ // 1. Try common title fields
78
+ let title = findFirstValue(doc, TITLE_FIELDS)
79
+
80
+ // 2. If no title found, try any string field
81
+ if (!title) {
82
+ title = findAnyStringValue(doc, new Set(SUBTITLE_FIELDS))
83
+ }
84
+
85
+ // 3. Last resort: use entity label + truncated record ID
86
+ if (!title) {
87
+ const shortId = recordId.length > 8 ? recordId.slice(0, 8) + '...' : recordId
88
+ title = `${entityLabel} ${shortId}`
89
+ }
90
+
91
+ // Build subtitle from multiple relevant fields to show more context
92
+ const subtitleParts: string[] = []
93
+ for (const field of SUBTITLE_FIELDS) {
94
+ const value = doc[field]
95
+ if (value != null && String(value).trim().length > 0 && String(value) !== title) {
96
+ subtitleParts.push(String(value).trim())
97
+ if (subtitleParts.length >= 3) break // Limit to 3 parts
98
+ }
99
+ }
100
+
101
+ return {
102
+ title,
103
+ subtitle: subtitleParts.length > 0 ? subtitleParts.join(' · ').slice(0, 120) : undefined,
104
+ badge: entityLabel,
105
+ }
106
+ }
@@ -0,0 +1,169 @@
1
+ import type { SearchFieldPolicy } from '../types'
2
+
3
+ /**
4
+ * Encryption map entry as stored in the database.
5
+ * Matches the structure from entities/data/entities EncryptionMap.
6
+ */
7
+ export type EncryptionMapEntry = {
8
+ field: string
9
+ hashField?: string | null
10
+ }
11
+
12
+ /**
13
+ * Configuration for field extraction.
14
+ */
15
+ export type FieldExtractionConfig = {
16
+ /** Encryption map entries from the database */
17
+ encryptedFields?: EncryptionMapEntry[]
18
+ /** Additional field policy from entity search config */
19
+ fieldPolicy?: SearchFieldPolicy
20
+ }
21
+
22
+ /**
23
+ * Extract only searchable (non-sensitive) fields from a record.
24
+ * This ensures encrypted and sensitive fields are never sent to external search providers.
25
+ *
26
+ * Field filtering logic:
27
+ * 1. Exclude fields in encryption map (they contain encrypted data)
28
+ * 2. Exclude fields in fieldPolicy.excluded
29
+ * 3. Exclude fields in fieldPolicy.hashOnly (should only use hash-based search)
30
+ * 4. If fieldPolicy.searchable is defined, only include those fields (whitelist mode)
31
+ *
32
+ * @param fields - All fields from the record
33
+ * @param config - Extraction configuration with encryption map and field policy
34
+ * @returns Object containing only safe-to-index fields
35
+ */
36
+ export function extractSearchableFields(
37
+ fields: Record<string, unknown>,
38
+ config?: FieldExtractionConfig,
39
+ ): Record<string, unknown> {
40
+ const encryptedFieldSet = new Set<string>(
41
+ config?.encryptedFields?.map((e) => e.field) ?? [],
42
+ )
43
+
44
+ const policy = config?.fieldPolicy
45
+ const searchableWhitelist = policy?.searchable ? new Set(policy.searchable) : null
46
+ const excludedBlacklist = new Set([
47
+ ...(policy?.excluded ?? []),
48
+ ...(policy?.hashOnly ?? []),
49
+ ])
50
+
51
+ const result: Record<string, unknown> = {}
52
+
53
+ for (const [field, value] of Object.entries(fields)) {
54
+ // Skip null/undefined values
55
+ if (value == null) continue
56
+
57
+ // Skip encrypted fields
58
+ if (encryptedFieldSet.has(field)) continue
59
+
60
+ // Skip explicitly excluded fields
61
+ if (excludedBlacklist.has(field)) continue
62
+
63
+ // If whitelist is defined, only include whitelisted fields
64
+ if (searchableWhitelist && !searchableWhitelist.has(field)) continue
65
+
66
+ result[field] = value
67
+ }
68
+
69
+ return result
70
+ }
71
+
72
+ /**
73
+ * Extract fields that should use hash-based search only.
74
+ * These are typically encrypted fields that have corresponding hash columns.
75
+ *
76
+ * @param fields - All fields from the record
77
+ * @param config - Extraction configuration with encryption map and field policy
78
+ * @returns Object containing field values for hash-based search
79
+ */
80
+ export function extractHashOnlyFields(
81
+ fields: Record<string, unknown>,
82
+ config?: FieldExtractionConfig,
83
+ ): Record<string, unknown> {
84
+ const hashOnlyFromPolicy = new Set(config?.fieldPolicy?.hashOnly ?? [])
85
+
86
+ // Fields with hashField in encryption map are also hash-searchable
87
+ const hashFieldsFromEncryption = new Set<string>(
88
+ config?.encryptedFields
89
+ ?.filter((e) => e.hashField)
90
+ .map((e) => e.field) ?? [],
91
+ )
92
+
93
+ const result: Record<string, unknown> = {}
94
+
95
+ for (const [field, value] of Object.entries(fields)) {
96
+ if (value == null) continue
97
+
98
+ if (hashOnlyFromPolicy.has(field) || hashFieldsFromEncryption.has(field)) {
99
+ result[field] = value
100
+ }
101
+ }
102
+
103
+ return result
104
+ }
105
+
106
+ /**
107
+ * Build a complete field classification for a record.
108
+ * Useful for debugging and understanding how fields will be indexed.
109
+ *
110
+ * @param fields - All fields from the record
111
+ * @param config - Extraction configuration
112
+ * @returns Classification of each field
113
+ */
114
+ export function classifyFields(
115
+ fields: Record<string, unknown>,
116
+ config?: FieldExtractionConfig,
117
+ ): {
118
+ searchable: string[]
119
+ hashOnly: string[]
120
+ excluded: string[]
121
+ } {
122
+ const searchable: string[] = []
123
+ const hashOnly: string[] = []
124
+ const excluded: string[] = []
125
+
126
+ const encryptedFieldSet = new Set<string>(
127
+ config?.encryptedFields?.map((e) => e.field) ?? [],
128
+ )
129
+ const hashFieldsFromEncryption = new Set<string>(
130
+ config?.encryptedFields
131
+ ?.filter((e) => e.hashField)
132
+ .map((e) => e.field) ?? [],
133
+ )
134
+
135
+ const policy = config?.fieldPolicy
136
+ const searchableWhitelist = policy?.searchable ? new Set(policy.searchable) : null
137
+ const hashOnlyFromPolicy = new Set(policy?.hashOnly ?? [])
138
+ const excludedFromPolicy = new Set(policy?.excluded ?? [])
139
+
140
+ for (const field of Object.keys(fields)) {
141
+ // Check explicit exclusions
142
+ if (excludedFromPolicy.has(field)) {
143
+ excluded.push(field)
144
+ continue
145
+ }
146
+
147
+ // Check hash-only
148
+ if (hashOnlyFromPolicy.has(field) || hashFieldsFromEncryption.has(field)) {
149
+ hashOnly.push(field)
150
+ continue
151
+ }
152
+
153
+ // Check encrypted (without hash)
154
+ if (encryptedFieldSet.has(field) && !hashFieldsFromEncryption.has(field)) {
155
+ excluded.push(field)
156
+ continue
157
+ }
158
+
159
+ // Check whitelist if defined
160
+ if (searchableWhitelist && !searchableWhitelist.has(field)) {
161
+ excluded.push(field)
162
+ continue
163
+ }
164
+
165
+ searchable.push(field)
166
+ }
167
+
168
+ return { searchable, hashOnly, excluded }
169
+ }
@@ -0,0 +1,13 @@
1
+ export {
2
+ mergeAndRankResults,
3
+ deduplicateResults,
4
+ normalizeScores,
5
+ } from './merger'
6
+
7
+ export {
8
+ extractSearchableFields,
9
+ extractHashOnlyFields,
10
+ classifyFields,
11
+ type EncryptionMapEntry,
12
+ type FieldExtractionConfig,
13
+ } from './field-policy'
@@ -0,0 +1,159 @@
1
+ import type { SearchResult, ResultMergeConfig, SearchStrategyId } from '../types'
2
+
3
+ /**
4
+ * Default RRF constant (k=60 is standard in literature).
5
+ * Higher values reduce the influence of ranking position.
6
+ */
7
+ const RRF_K = 60
8
+
9
+ /**
10
+ * Reciprocal Rank Fusion (RRF) algorithm for combining results from multiple search strategies.
11
+ *
12
+ * RRF is a simple but effective method for combining ranked lists. For each result,
13
+ * it computes: score = sum(weight / (k + rank)) across all lists containing that result.
14
+ *
15
+ * Reference: Cormack, G.V., Clarke, C.L.A., & Buettcher, S. (2009).
16
+ * "Reciprocal rank fusion outperforms condorcet and individual rank learning methods"
17
+ * https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
18
+ *
19
+ * @param results - Array of search results from multiple strategies
20
+ * @param config - Merge configuration with weights and thresholds
21
+ * @returns Merged and ranked results
22
+ */
23
+ export function mergeAndRankResults(
24
+ results: SearchResult[],
25
+ config: ResultMergeConfig,
26
+ ): SearchResult[] {
27
+ if (results.length === 0) return []
28
+
29
+ // Group results by source strategy for rank calculation
30
+ const bySource = new Map<SearchStrategyId, SearchResult[]>()
31
+ for (const result of results) {
32
+ const list = bySource.get(result.source) ?? []
33
+ list.push(result)
34
+ bySource.set(result.source, list)
35
+ }
36
+
37
+ // Track seen results with their RRF scores
38
+ // bestContribution tracks the highest single RRF contribution for the kept result object
39
+ const seen = new Map<string, { result: SearchResult; rrf: number; sources: Set<SearchStrategyId>; bestContribution: number }>()
40
+
41
+ // Calculate RRF score for each result
42
+ for (const [source, sourceResults] of bySource) {
43
+ const weight = config.strategyWeights?.[source] ?? 1.0
44
+
45
+ for (let rank = 0; rank < sourceResults.length; rank++) {
46
+ const result = sourceResults[rank]
47
+ const key = `${result.entityId}:${result.recordId}`
48
+ const rrfScore = weight / (RRF_K + rank + 1)
49
+
50
+ const existing = seen.get(key)
51
+ if (existing) {
52
+ // Combine RRF scores for duplicates found in multiple strategies
53
+ existing.rrf += rrfScore
54
+ existing.sources.add(source)
55
+
56
+ // Merge presenter data - prefer result that has it
57
+ // This ensures token results get enriched with presenter from meilisearch/vector
58
+ const hasExistingPresenter = existing.result.presenter?.title != null
59
+ const hasNewPresenter = result.presenter?.title != null
60
+
61
+ if (!hasExistingPresenter && hasNewPresenter) {
62
+ // Current result has no presenter, new one does - take new one's presenter
63
+ existing.result = {
64
+ ...existing.result,
65
+ presenter: result.presenter,
66
+ url: existing.result.url ?? result.url,
67
+ links: existing.result.links ?? result.links,
68
+ }
69
+ existing.bestContribution = Math.max(existing.bestContribution, rrfScore)
70
+ } else if (hasExistingPresenter && hasNewPresenter && rrfScore > existing.bestContribution) {
71
+ // Both have presenter, keep the one with better RRF contribution (not raw score)
72
+ existing.result = { ...result }
73
+ existing.bestContribution = rrfScore
74
+ } else if (!hasExistingPresenter && !hasNewPresenter && rrfScore > existing.bestContribution) {
75
+ // Neither has presenter, keep result with better RRF contribution
76
+ existing.result = { ...result }
77
+ existing.bestContribution = rrfScore
78
+ }
79
+ // If existing has presenter and new doesn't, keep existing (do nothing)
80
+ } else {
81
+ seen.set(key, {
82
+ result: { ...result },
83
+ rrf: rrfScore,
84
+ sources: new Set([source]),
85
+ bestContribution: rrfScore,
86
+ })
87
+ }
88
+ }
89
+ }
90
+
91
+ // Convert to array with final RRF scores
92
+ let merged = Array.from(seen.values()).map(({ result, rrf, sources }) => ({
93
+ ...result,
94
+ score: rrf,
95
+ metadata: {
96
+ ...result.metadata,
97
+ _sources: Array.from(sources),
98
+ _rrfScore: rrf,
99
+ },
100
+ }))
101
+
102
+ // Apply minimum score threshold
103
+ if (config.minScore != null) {
104
+ merged = merged.filter((r) => r.score >= config.minScore!)
105
+ }
106
+
107
+ // Sort by RRF score descending
108
+ merged.sort((a, b) => b.score - a.score)
109
+
110
+ return merged
111
+ }
112
+
113
+ /**
114
+ * Simple deduplication without RRF scoring.
115
+ * Keeps the highest-scored result for each entity+record pair.
116
+ *
117
+ * @param results - Array of search results
118
+ * @returns Deduplicated results sorted by score
119
+ */
120
+ export function deduplicateResults(results: SearchResult[]): SearchResult[] {
121
+ const seen = new Map<string, SearchResult>()
122
+
123
+ for (const result of results) {
124
+ const key = `${result.entityId}:${result.recordId}`
125
+ const existing = seen.get(key)
126
+
127
+ if (!existing || result.score > existing.score) {
128
+ seen.set(key, result)
129
+ }
130
+ }
131
+
132
+ return Array.from(seen.values()).sort((a, b) => b.score - a.score)
133
+ }
134
+
135
+ /**
136
+ * Normalize scores to 0-1 range using min-max normalization.
137
+ * Useful when combining strategies with different score scales.
138
+ *
139
+ * @param results - Array of search results
140
+ * @returns Results with normalized scores
141
+ */
142
+ export function normalizeScores(results: SearchResult[]): SearchResult[] {
143
+ if (results.length === 0) return []
144
+
145
+ const scores = results.map((r) => r.score)
146
+ const minScore = Math.min(...scores)
147
+ const maxScore = Math.max(...scores)
148
+ const range = maxScore - minScore
149
+
150
+ if (range === 0) {
151
+ // All scores are the same, normalize to 1.0
152
+ return results.map((r) => ({ ...r, score: 1.0 }))
153
+ }
154
+
155
+ return results.map((r) => ({
156
+ ...r,
157
+ score: (r.score - minScore) / range,
158
+ }))
159
+ }