graphile-search 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,5 +17,13 @@ export interface PgvectorAdapterOptions {
17
17
  * @default 'COSINE'
18
18
  */
19
19
  defaultMetric?: 'COSINE' | 'L2' | 'IP';
20
+ /**
21
+ * When true, tables with @hasChunks smart tag will transparently
22
+ * query through the chunks table to find the closest chunk.
23
+ * The parent row's vector distance is the minimum distance across
24
+ * all its chunks.
25
+ * @default true
26
+ */
27
+ enableChunkQuerying?: boolean;
20
28
  }
21
29
  export declare function createPgvectorAdapter(options?: PgvectorAdapterOptions): SearchAdapter;
@@ -19,8 +19,59 @@ const METRIC_OPERATORS = {
19
19
  function isVectorCodec(codec) {
20
20
  return codec?.name === 'vector';
21
21
  }
22
+ /**
23
+ * Read @hasChunks smart tag from codec extensions.
24
+ * The tag value is a JSON object like:
25
+ * {
26
+ * "chunksTable": "documents_chunks",
27
+ * "chunksSchema": "app_private", // optional, defaults to parent table's schema
28
+ * "parentFk": "document_id", // optional, defaults to "parent_id"
29
+ * "parentPk": "id", // optional, defaults to "id"
30
+ * "embeddingField": "embedding" // optional, defaults to "embedding"
31
+ * }
32
+ */
33
+ function getChunksInfo(codec) {
34
+ const tags = codec?.extensions?.tags;
35
+ if (!tags)
36
+ return undefined;
37
+ const raw = tags.hasChunks;
38
+ if (!raw)
39
+ return undefined;
40
+ let parsed;
41
+ if (typeof raw === 'string') {
42
+ try {
43
+ parsed = JSON.parse(raw);
44
+ }
45
+ catch {
46
+ // If it's just "true" or a plain string, use convention-based defaults
47
+ return undefined;
48
+ }
49
+ }
50
+ else if (typeof raw === 'object') {
51
+ parsed = raw;
52
+ }
53
+ else if (raw === true) {
54
+ return undefined; // boolean true = no metadata, can't resolve
55
+ }
56
+ else {
57
+ return undefined;
58
+ }
59
+ if (!parsed.chunksTable)
60
+ return undefined;
61
+ // Resolve schema: explicit chunksSchema > parent codec schema > null
62
+ const chunksSchema = parsed.chunksSchema
63
+ || codec?.extensions?.pg?.schemaName
64
+ || null;
65
+ return {
66
+ chunksSchema,
67
+ chunksTableName: parsed.chunksTable,
68
+ parentFkField: parsed.parentFk || 'parent_id',
69
+ parentPkField: parsed.parentPk || 'id',
70
+ embeddingField: parsed.embeddingField || 'embedding',
71
+ };
72
+ }
22
73
  function createPgvectorAdapter(options = {}) {
23
- const { filterPrefix = 'vector', defaultMetric = 'COSINE' } = options;
74
+ const { filterPrefix = 'vector', defaultMetric = 'COSINE', enableChunkQuerying = true } = options;
24
75
  return {
25
76
  name: 'vector',
26
77
  scoreSemantics: {
@@ -38,9 +89,13 @@ function createPgvectorAdapter(options = {}) {
38
89
  if (!codec?.attributes)
39
90
  return [];
40
91
  const columns = [];
92
+ const chunksInfo = enableChunkQuerying ? getChunksInfo(codec) : undefined;
41
93
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
42
94
  if (isVectorCodec(attribute.codec)) {
43
- columns.push({ attributeName });
95
+ columns.push({
96
+ attributeName,
97
+ adapterData: chunksInfo ? { chunksInfo } : undefined,
98
+ });
44
99
  }
45
100
  }
46
101
  return columns;
@@ -92,6 +147,12 @@ function createPgvectorAdapter(options = {}) {
92
147
  type: GraphQLFloat,
93
148
  description: 'Maximum distance threshold. Only rows within this distance are returned.',
94
149
  },
150
+ includeChunks: {
151
+ type: build.graphql.GraphQLBoolean,
152
+ description: 'When true (default for tables with @hasChunks), transparently queries ' +
153
+ 'the chunks table and returns the minimum distance across parent + all chunks. ' +
154
+ 'Set to false to only search the parent embedding.',
155
+ },
95
156
  };
96
157
  },
97
158
  }), 'UnifiedSearchPlugin (pgvector adapter) registering VectorNearbyInput type');
@@ -106,14 +167,54 @@ function createPgvectorAdapter(options = {}) {
106
167
  buildFilterApply(sql, alias, column, filterValue, _build) {
107
168
  if (filterValue == null)
108
169
  return null;
109
- const { vector, metric, distance } = filterValue;
170
+ const { vector, metric, distance, includeChunks } = filterValue;
110
171
  if (!vector || !Array.isArray(vector) || vector.length === 0)
111
172
  return null;
112
173
  const resolvedMetric = metric || defaultMetric;
113
174
  const operator = METRIC_OPERATORS[resolvedMetric] || METRIC_OPERATORS.COSINE;
114
175
  const vectorString = `[${vector.join(',')}]`;
115
- const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
116
176
  const vectorExpr = sql `${sql.value(vectorString)}::vector`;
177
+ // Check if this column has chunks info and chunk querying is requested
178
+ const adapterData = column.adapterData;
179
+ const chunksInfo = adapterData?.chunksInfo;
180
+ if (chunksInfo && (includeChunks !== false)) {
181
+ // Chunk-aware query: find the closest chunk for each parent row
182
+ // Uses a lateral subquery to get the minimum distance across all chunks
183
+ const chunksTableRef = chunksInfo.chunksSchema
184
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
185
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
186
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
187
+ const chunkEmbedding = sql.identifier(chunksInfo.embeddingField);
188
+ // Use the configured PK field (defaults to 'id', but can be overridden via @hasChunks tag)
189
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
190
+ // Alias to avoid ambiguity when the chunks table name might collide
191
+ const chunksAlias = sql.identifier('__chunks');
192
+ // Subquery: SELECT MIN(distance) FROM chunks WHERE chunks.parent_fk = parent.pk
193
+ const chunkDistanceSubquery = sql `(
194
+ SELECT MIN(${chunksAlias}.${chunkEmbedding} ${sql.raw(operator)} ${vectorExpr})
195
+ FROM ${chunksTableRef} AS ${chunksAlias}
196
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
197
+ )`;
198
+ // Also compute direct parent distance if the parent has an embedding
199
+ const parentColumnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
200
+ const parentDistanceExpr = sql `(${parentColumnExpr} ${sql.raw(operator)} ${vectorExpr})`;
201
+ // Use LEAST of parent distance and closest chunk distance
202
+ // COALESCE handles cases where parent or chunks may not have embeddings
203
+ const combinedDistanceExpr = sql `LEAST(
204
+ COALESCE(${parentDistanceExpr}, 'Infinity'::float),
205
+ COALESCE(${chunkDistanceSubquery}, 'Infinity'::float)
206
+ )`;
207
+ let whereClause = null;
208
+ if (distance !== undefined && distance !== null) {
209
+ whereClause = sql `${combinedDistanceExpr} <= ${sql.value(distance)}`;
210
+ }
211
+ return {
212
+ whereClause,
213
+ scoreExpression: combinedDistanceExpr,
214
+ };
215
+ }
216
+ // Standard (non-chunk) query
217
+ const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
117
218
  const distanceExpr = sql `(${columnExpr} ${sql.raw(operator)} ${vectorExpr})`;
118
219
  let whereClause = null;
119
220
  if (distance !== undefined && distance !== null) {
@@ -17,5 +17,13 @@ export interface PgvectorAdapterOptions {
17
17
  * @default 'COSINE'
18
18
  */
19
19
  defaultMetric?: 'COSINE' | 'L2' | 'IP';
20
+ /**
21
+ * When true, tables with @hasChunks smart tag will transparently
22
+ * query through the chunks table to find the closest chunk.
23
+ * The parent row's vector distance is the minimum distance across
24
+ * all its chunks.
25
+ * @default true
26
+ */
27
+ enableChunkQuerying?: boolean;
20
28
  }
21
29
  export declare function createPgvectorAdapter(options?: PgvectorAdapterOptions): SearchAdapter;
@@ -16,8 +16,59 @@ const METRIC_OPERATORS = {
16
16
  function isVectorCodec(codec) {
17
17
  return codec?.name === 'vector';
18
18
  }
19
+ /**
20
+ * Read @hasChunks smart tag from codec extensions.
21
+ * The tag value is a JSON object like:
22
+ * {
23
+ * "chunksTable": "documents_chunks",
24
+ * "chunksSchema": "app_private", // optional, defaults to parent table's schema
25
+ * "parentFk": "document_id", // optional, defaults to "parent_id"
26
+ * "parentPk": "id", // optional, defaults to "id"
27
+ * "embeddingField": "embedding" // optional, defaults to "embedding"
28
+ * }
29
+ */
30
+ function getChunksInfo(codec) {
31
+ const tags = codec?.extensions?.tags;
32
+ if (!tags)
33
+ return undefined;
34
+ const raw = tags.hasChunks;
35
+ if (!raw)
36
+ return undefined;
37
+ let parsed;
38
+ if (typeof raw === 'string') {
39
+ try {
40
+ parsed = JSON.parse(raw);
41
+ }
42
+ catch {
43
+ // If it's just "true" or a plain string, use convention-based defaults
44
+ return undefined;
45
+ }
46
+ }
47
+ else if (typeof raw === 'object') {
48
+ parsed = raw;
49
+ }
50
+ else if (raw === true) {
51
+ return undefined; // boolean true = no metadata, can't resolve
52
+ }
53
+ else {
54
+ return undefined;
55
+ }
56
+ if (!parsed.chunksTable)
57
+ return undefined;
58
+ // Resolve schema: explicit chunksSchema > parent codec schema > null
59
+ const chunksSchema = parsed.chunksSchema
60
+ || codec?.extensions?.pg?.schemaName
61
+ || null;
62
+ return {
63
+ chunksSchema,
64
+ chunksTableName: parsed.chunksTable,
65
+ parentFkField: parsed.parentFk || 'parent_id',
66
+ parentPkField: parsed.parentPk || 'id',
67
+ embeddingField: parsed.embeddingField || 'embedding',
68
+ };
69
+ }
19
70
  export function createPgvectorAdapter(options = {}) {
20
- const { filterPrefix = 'vector', defaultMetric = 'COSINE' } = options;
71
+ const { filterPrefix = 'vector', defaultMetric = 'COSINE', enableChunkQuerying = true } = options;
21
72
  return {
22
73
  name: 'vector',
23
74
  scoreSemantics: {
@@ -35,9 +86,13 @@ export function createPgvectorAdapter(options = {}) {
35
86
  if (!codec?.attributes)
36
87
  return [];
37
88
  const columns = [];
89
+ const chunksInfo = enableChunkQuerying ? getChunksInfo(codec) : undefined;
38
90
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
39
91
  if (isVectorCodec(attribute.codec)) {
40
- columns.push({ attributeName });
92
+ columns.push({
93
+ attributeName,
94
+ adapterData: chunksInfo ? { chunksInfo } : undefined,
95
+ });
41
96
  }
42
97
  }
43
98
  return columns;
@@ -89,6 +144,12 @@ export function createPgvectorAdapter(options = {}) {
89
144
  type: GraphQLFloat,
90
145
  description: 'Maximum distance threshold. Only rows within this distance are returned.',
91
146
  },
147
+ includeChunks: {
148
+ type: build.graphql.GraphQLBoolean,
149
+ description: 'When true (default for tables with @hasChunks), transparently queries ' +
150
+ 'the chunks table and returns the minimum distance across parent + all chunks. ' +
151
+ 'Set to false to only search the parent embedding.',
152
+ },
92
153
  };
93
154
  },
94
155
  }), 'UnifiedSearchPlugin (pgvector adapter) registering VectorNearbyInput type');
@@ -103,14 +164,54 @@ export function createPgvectorAdapter(options = {}) {
103
164
  buildFilterApply(sql, alias, column, filterValue, _build) {
104
165
  if (filterValue == null)
105
166
  return null;
106
- const { vector, metric, distance } = filterValue;
167
+ const { vector, metric, distance, includeChunks } = filterValue;
107
168
  if (!vector || !Array.isArray(vector) || vector.length === 0)
108
169
  return null;
109
170
  const resolvedMetric = metric || defaultMetric;
110
171
  const operator = METRIC_OPERATORS[resolvedMetric] || METRIC_OPERATORS.COSINE;
111
172
  const vectorString = `[${vector.join(',')}]`;
112
- const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
113
173
  const vectorExpr = sql `${sql.value(vectorString)}::vector`;
174
+ // Check if this column has chunks info and chunk querying is requested
175
+ const adapterData = column.adapterData;
176
+ const chunksInfo = adapterData?.chunksInfo;
177
+ if (chunksInfo && (includeChunks !== false)) {
178
+ // Chunk-aware query: find the closest chunk for each parent row
179
+ // Uses a lateral subquery to get the minimum distance across all chunks
180
+ const chunksTableRef = chunksInfo.chunksSchema
181
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
182
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
183
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
184
+ const chunkEmbedding = sql.identifier(chunksInfo.embeddingField);
185
+ // Use the configured PK field (defaults to 'id', but can be overridden via @hasChunks tag)
186
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
187
+ // Alias to avoid ambiguity when the chunks table name might collide
188
+ const chunksAlias = sql.identifier('__chunks');
189
+ // Subquery: SELECT MIN(distance) FROM chunks WHERE chunks.parent_fk = parent.pk
190
+ const chunkDistanceSubquery = sql `(
191
+ SELECT MIN(${chunksAlias}.${chunkEmbedding} ${sql.raw(operator)} ${vectorExpr})
192
+ FROM ${chunksTableRef} AS ${chunksAlias}
193
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
194
+ )`;
195
+ // Also compute direct parent distance if the parent has an embedding
196
+ const parentColumnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
197
+ const parentDistanceExpr = sql `(${parentColumnExpr} ${sql.raw(operator)} ${vectorExpr})`;
198
+ // Use LEAST of parent distance and closest chunk distance
199
+ // COALESCE handles cases where parent or chunks may not have embeddings
200
+ const combinedDistanceExpr = sql `LEAST(
201
+ COALESCE(${parentDistanceExpr}, 'Infinity'::float),
202
+ COALESCE(${chunkDistanceSubquery}, 'Infinity'::float)
203
+ )`;
204
+ let whereClause = null;
205
+ if (distance !== undefined && distance !== null) {
206
+ whereClause = sql `${combinedDistanceExpr} <= ${sql.value(distance)}`;
207
+ }
208
+ return {
209
+ whereClause,
210
+ scoreExpression: combinedDistanceExpr,
211
+ };
212
+ }
213
+ // Standard (non-chunk) query
214
+ const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
114
215
  const distanceExpr = sql `(${columnExpr} ${sql.raw(operator)} ${vectorExpr})`;
115
216
  let whereClause = null;
116
217
  if (distance !== undefined && distance !== null) {
package/esm/plugin.js CHANGED
@@ -21,6 +21,82 @@ import 'graphile-build-pg';
21
21
  import 'graphile-connection-filter';
22
22
  import { TYPES } from '@dataplan/pg';
23
23
  import { getQueryBuilder } from 'graphile-connection-filter';
24
+ /**
25
+ * Read the @searchConfig smart tag from a codec's extensions.
26
+ * Returns undefined if no searchConfig tag is present.
27
+ */
28
+ function getSearchConfig(codec) {
29
+ const tags = codec.extensions?.tags;
30
+ if (!tags)
31
+ return undefined;
32
+ const raw = tags.searchConfig;
33
+ if (!raw)
34
+ return undefined;
35
+ // Smart tags can be strings (JSON-encoded) or already-parsed objects
36
+ if (typeof raw === 'string') {
37
+ try {
38
+ return JSON.parse(raw);
39
+ }
40
+ catch {
41
+ return undefined;
42
+ }
43
+ }
44
+ if (typeof raw === 'object')
45
+ return raw;
46
+ return undefined;
47
+ }
48
+ /**
49
+ * Normalize a raw score to 0..1 using the specified strategy.
50
+ *
51
+ * When strategy is 'sigmoid', sigmoid normalization is used for ALL adapters
52
+ * (both bounded and unbounded). When strategy is 'linear' (default),
53
+ * known-range adapters use linear normalization and unbounded adapters
54
+ * use sigmoid normalization as fallback.
55
+ */
56
+ function normalizeScore(score, lowerIsBetter, range, strategy = 'linear') {
57
+ let normalized;
58
+ if (range && strategy === 'linear') {
59
+ // Known range + linear strategy: linear normalization
60
+ const [min, max] = range;
61
+ normalized = lowerIsBetter
62
+ ? 1 - (score - min) / (max - min)
63
+ : (score - min) / (max - min);
64
+ }
65
+ else {
66
+ // Unbounded range, or explicit sigmoid strategy: sigmoid normalization
67
+ if (lowerIsBetter) {
68
+ // BM25: negative scores, more negative = better
69
+ normalized = 1 / (1 + Math.abs(score));
70
+ }
71
+ else {
72
+ // Higher-is-better: map via sigmoid
73
+ normalized = score / (1 + score);
74
+ }
75
+ }
76
+ return Math.max(0, Math.min(1, normalized));
77
+ }
78
+ /**
79
+ * Apply recency boost to a normalized score.
80
+ * Uses exponential decay based on age in days.
81
+ *
82
+ * @param normalizedScore - The already-normalized score (0..1)
83
+ * @param recencyValue - The raw recency field value (timestamp string from SQL row)
84
+ * @param decay - Decay factor per day (e.g. 0.95 means 5% penalty per day)
85
+ */
86
+ function applyRecencyBoost(normalizedScore, recencyValue, decay) {
87
+ if (recencyValue == null)
88
+ return normalizedScore;
89
+ const fieldDate = new Date(recencyValue);
90
+ if (isNaN(fieldDate.getTime()))
91
+ return normalizedScore;
92
+ const now = new Date();
93
+ const ageInDays = (now.getTime() - fieldDate.getTime()) / (1000 * 60 * 60 * 24);
94
+ if (ageInDays < 0)
95
+ return normalizedScore; // future dates get no penalty
96
+ // Exponential decay: boost = decay^ageInDays
97
+ const boost = Math.pow(decay, ageInDays);
98
+ return normalizedScore * boost;
99
+ }
24
100
  /**
25
101
  * Creates the unified search plugin with the given options.
26
102
  */
@@ -195,7 +271,7 @@ export function createUnifiedSearchPlugin(options) {
195
271
  * on the appropriate output types.
196
272
  */
197
273
  GraphQLObjectType_fields(fields, build, context) {
198
- const { inflection, graphql: { GraphQLFloat }, grafast: { lambda }, } = build;
274
+ const { inflection, sql, graphql: { GraphQLFloat }, grafast: { lambda }, } = build;
199
275
  const { scope: { isPgClassType, pgCodec: rawPgCodec }, fieldWithHooks, } = context;
200
276
  if (!isPgClassType || !rawPgCodec?.attributes) {
201
277
  return fields;
@@ -266,6 +342,24 @@ export function createUnifiedSearchPlugin(options) {
266
342
  });
267
343
  }
268
344
  }
345
+ // Read per-table @searchConfig smart tag (written by DataSearch/DataFullTextSearch/DataBm25)
346
+ // Per-table config overrides global searchScoreWeights
347
+ const tableSearchConfig = getSearchConfig(codec);
348
+ // Resolve effective weights: per-table > global > equal (undefined)
349
+ const effectiveWeights = tableSearchConfig?.weights ?? options.searchScoreWeights;
350
+ // Resolve normalization strategy: per-table > default 'linear'
351
+ const normalizationStrategy = tableSearchConfig?.normalization ?? 'linear';
352
+ // Recency boost config from per-table smart tag
353
+ let boostRecent = tableSearchConfig?.boost_recent ?? false;
354
+ const boostRecencyField = tableSearchConfig?.boost_recency_field ?? 'updated_at';
355
+ const boostRecencyDecay = tableSearchConfig?.boost_recency_decay ?? 0.95;
356
+ // Phase I: Validate that the recency field actually exists on the table.
357
+ // If it doesn't, disable recency boost gracefully instead of crashing at query time.
358
+ if (boostRecent && boostRecencyField && !codec.attributes[boostRecencyField]) {
359
+ console.warn(`[graphile-search] @searchConfig.boost_recency_field "${boostRecencyField}" ` +
360
+ `not found on table "${codec.name}". Recency boost disabled for this table.`);
361
+ boostRecent = false;
362
+ }
269
363
  newFields = build.extend(newFields, {
270
364
  searchScore: fieldWithHooks({
271
365
  fieldName: 'searchScore',
@@ -273,6 +367,7 @@ export function createUnifiedSearchPlugin(options) {
273
367
  }, () => ({
274
368
  description: 'Composite search relevance score (0..1, higher = more relevant). ' +
275
369
  'Computed by normalizing and averaging all active search signals. ' +
370
+ 'Supports per-table weight customization via @searchConfig smart tag. ' +
276
371
  'Returns null when no search filters are active.',
277
372
  type: GraphQLFloat,
278
373
  plan($step) {
@@ -287,12 +382,25 @@ export function createUnifiedSearchPlugin(options) {
287
382
  }
288
383
  // Collect all meta steps for all adapters
289
384
  const $metaSteps = allMetaKeys.map((mk) => $select.getMeta(mk.metaKey));
385
+ // If recency boost is configured, inject the recency field into
386
+ // the SQL SELECT so we can read it by numeric index at runtime.
387
+ let recencySelectIndex = null;
388
+ if (boostRecent && boostRecencyField) {
389
+ const recencyColumnSql = sql `${$select.alias}.${sql.identifier(boostRecencyField)}::text`;
390
+ recencySelectIndex = $select.selectAndReturnIndex(recencyColumnSql);
391
+ }
392
+ // Capture the index in a local const for the lambda closure
393
+ const capturedRecencyIndex = recencySelectIndex;
290
394
  return lambda([...$metaSteps, $row], (args) => {
291
395
  const row = args[args.length - 1];
292
396
  if (row == null)
293
397
  return null;
294
- let sum = 0;
295
- let count = 0;
398
+ let weightedSum = 0;
399
+ let totalWeight = 0;
400
+ // Read recency value from the injected SELECT column
401
+ const recencyValue = (boostRecent && capturedRecencyIndex != null)
402
+ ? row[capturedRecencyIndex]
403
+ : null;
296
404
  for (let i = 0; i < allMetaKeys.length; i++) {
297
405
  const details = args[i];
298
406
  if (details == null || details.selectIndex == null)
@@ -304,74 +412,19 @@ export function createUnifiedSearchPlugin(options) {
304
412
  if (typeof score !== 'number' || isNaN(score))
305
413
  continue;
306
414
  const mk = allMetaKeys[i];
307
- // Normalize to 0..1 (higher = better)
308
- let normalized;
309
- if (mk.range) {
310
- // Known range: linear normalization
311
- const [min, max] = mk.range;
312
- normalized = mk.lowerIsBetter
313
- ? 1 - (score - min) / (max - min)
314
- : (score - min) / (max - min);
415
+ const weight = effectiveWeights?.[mk.adapterName] ?? 1;
416
+ // Normalize using the resolved strategy
417
+ let normalized = normalizeScore(score, mk.lowerIsBetter, mk.range, normalizationStrategy);
418
+ // Apply recency boost if configured
419
+ if (boostRecent && recencyValue != null) {
420
+ normalized = applyRecencyBoost(normalized, recencyValue, boostRecencyDecay);
315
421
  }
316
- else {
317
- // Unbounded: sigmoid normalization
318
- if (mk.lowerIsBetter) {
319
- // BM25: negative scores, more negative = better
320
- // Map via 1 / (1 + abs(score))
321
- normalized = 1 / (1 + Math.abs(score));
322
- }
323
- else {
324
- // Hypothetical unbounded higher-is-better
325
- normalized = score / (1 + score);
326
- }
327
- }
328
- // Clamp to [0, 1]
329
- normalized = Math.max(0, Math.min(1, normalized));
330
- sum += normalized;
331
- count++;
422
+ weightedSum += normalized * weight;
423
+ totalWeight += weight;
332
424
  }
333
- if (count === 0)
425
+ if (totalWeight === 0)
334
426
  return null;
335
- // Apply optional weights
336
- if (options.searchScoreWeights) {
337
- let weightedSum = 0;
338
- let totalWeight = 0;
339
- let weightIdx = 0;
340
- for (let i = 0; i < allMetaKeys.length; i++) {
341
- const details = args[i];
342
- if (details == null || details.selectIndex == null)
343
- continue;
344
- const rawValue = row[details.selectIndex];
345
- if (rawValue == null)
346
- continue;
347
- const mk = allMetaKeys[i];
348
- const weight = options.searchScoreWeights[mk.adapterName] ?? 1;
349
- const score = TYPES.float.fromPg(rawValue);
350
- if (typeof score !== 'number' || isNaN(score))
351
- continue;
352
- let normalized;
353
- if (mk.range) {
354
- const [min, max] = mk.range;
355
- normalized = mk.lowerIsBetter
356
- ? 1 - (score - min) / (max - min)
357
- : (score - min) / (max - min);
358
- }
359
- else {
360
- if (mk.lowerIsBetter) {
361
- normalized = 1 / (1 + Math.abs(score));
362
- }
363
- else {
364
- normalized = score / (1 + score);
365
- }
366
- }
367
- normalized = Math.max(0, Math.min(1, normalized));
368
- weightedSum += normalized * weight;
369
- totalWeight += weight;
370
- weightIdx++;
371
- }
372
- return totalWeight > 0 ? weightedSum / totalWeight : null;
373
- }
374
- return sum / count;
427
+ return weightedSum / totalWeight;
375
428
  });
376
429
  },
377
430
  })),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphile-search",
3
- "version": "1.2.1",
3
+ "version": "1.3.0",
4
4
  "description": "Unified PostGraphile v5 search plugin — abstracts tsvector, BM25, pg_trgm, and pgvector behind a single adapter-based architecture with composite searchScore",
5
5
  "author": "Constructive <developers@constructive.io>",
6
6
  "homepage": "https://github.com/constructive-io/constructive",
@@ -62,5 +62,5 @@
62
62
  "hybrid-search",
63
63
  "searchScore"
64
64
  ],
65
- "gitHead": "c18ef8e002d958001fe69fff3758d1f89613eb2b"
65
+ "gitHead": "71058a2aa53cc20255502068b0f0615c384d93d2"
66
66
  }
package/plugin.js CHANGED
@@ -24,6 +24,82 @@ require("graphile-build-pg");
24
24
  require("graphile-connection-filter");
25
25
  const pg_1 = require("@dataplan/pg");
26
26
  const graphile_connection_filter_1 = require("graphile-connection-filter");
27
+ /**
28
+ * Read the @searchConfig smart tag from a codec's extensions.
29
+ * Returns undefined if no searchConfig tag is present.
30
+ */
31
+ function getSearchConfig(codec) {
32
+ const tags = codec.extensions?.tags;
33
+ if (!tags)
34
+ return undefined;
35
+ const raw = tags.searchConfig;
36
+ if (!raw)
37
+ return undefined;
38
+ // Smart tags can be strings (JSON-encoded) or already-parsed objects
39
+ if (typeof raw === 'string') {
40
+ try {
41
+ return JSON.parse(raw);
42
+ }
43
+ catch {
44
+ return undefined;
45
+ }
46
+ }
47
+ if (typeof raw === 'object')
48
+ return raw;
49
+ return undefined;
50
+ }
51
+ /**
52
+ * Normalize a raw score to 0..1 using the specified strategy.
53
+ *
54
+ * When strategy is 'sigmoid', sigmoid normalization is used for ALL adapters
55
+ * (both bounded and unbounded). When strategy is 'linear' (default),
56
+ * known-range adapters use linear normalization and unbounded adapters
57
+ * use sigmoid normalization as fallback.
58
+ */
59
+ function normalizeScore(score, lowerIsBetter, range, strategy = 'linear') {
60
+ let normalized;
61
+ if (range && strategy === 'linear') {
62
+ // Known range + linear strategy: linear normalization
63
+ const [min, max] = range;
64
+ normalized = lowerIsBetter
65
+ ? 1 - (score - min) / (max - min)
66
+ : (score - min) / (max - min);
67
+ }
68
+ else {
69
+ // Unbounded range, or explicit sigmoid strategy: sigmoid normalization
70
+ if (lowerIsBetter) {
71
+ // BM25: negative scores, more negative = better
72
+ normalized = 1 / (1 + Math.abs(score));
73
+ }
74
+ else {
75
+ // Higher-is-better: map via sigmoid
76
+ normalized = score / (1 + score);
77
+ }
78
+ }
79
+ return Math.max(0, Math.min(1, normalized));
80
+ }
81
+ /**
82
+ * Apply recency boost to a normalized score.
83
+ * Uses exponential decay based on age in days.
84
+ *
85
+ * @param normalizedScore - The already-normalized score (0..1)
86
+ * @param recencyValue - The raw recency field value (timestamp string from SQL row)
87
+ * @param decay - Decay factor per day (e.g. 0.95 means 5% penalty per day)
88
+ */
89
+ function applyRecencyBoost(normalizedScore, recencyValue, decay) {
90
+ if (recencyValue == null)
91
+ return normalizedScore;
92
+ const fieldDate = new Date(recencyValue);
93
+ if (isNaN(fieldDate.getTime()))
94
+ return normalizedScore;
95
+ const now = new Date();
96
+ const ageInDays = (now.getTime() - fieldDate.getTime()) / (1000 * 60 * 60 * 24);
97
+ if (ageInDays < 0)
98
+ return normalizedScore; // future dates get no penalty
99
+ // Exponential decay: boost = decay^ageInDays
100
+ const boost = Math.pow(decay, ageInDays);
101
+ return normalizedScore * boost;
102
+ }
27
103
  /**
28
104
  * Creates the unified search plugin with the given options.
29
105
  */
@@ -198,7 +274,7 @@ function createUnifiedSearchPlugin(options) {
198
274
  * on the appropriate output types.
199
275
  */
200
276
  GraphQLObjectType_fields(fields, build, context) {
201
- const { inflection, graphql: { GraphQLFloat }, grafast: { lambda }, } = build;
277
+ const { inflection, sql, graphql: { GraphQLFloat }, grafast: { lambda }, } = build;
202
278
  const { scope: { isPgClassType, pgCodec: rawPgCodec }, fieldWithHooks, } = context;
203
279
  if (!isPgClassType || !rawPgCodec?.attributes) {
204
280
  return fields;
@@ -269,6 +345,24 @@ function createUnifiedSearchPlugin(options) {
269
345
  });
270
346
  }
271
347
  }
348
+ // Read per-table @searchConfig smart tag (written by DataSearch/DataFullTextSearch/DataBm25)
349
+ // Per-table config overrides global searchScoreWeights
350
+ const tableSearchConfig = getSearchConfig(codec);
351
+ // Resolve effective weights: per-table > global > equal (undefined)
352
+ const effectiveWeights = tableSearchConfig?.weights ?? options.searchScoreWeights;
353
+ // Resolve normalization strategy: per-table > default 'linear'
354
+ const normalizationStrategy = tableSearchConfig?.normalization ?? 'linear';
355
+ // Recency boost config from per-table smart tag
356
+ let boostRecent = tableSearchConfig?.boost_recent ?? false;
357
+ const boostRecencyField = tableSearchConfig?.boost_recency_field ?? 'updated_at';
358
+ const boostRecencyDecay = tableSearchConfig?.boost_recency_decay ?? 0.95;
359
+ // Phase I: Validate that the recency field actually exists on the table.
360
+ // If it doesn't, disable recency boost gracefully instead of crashing at query time.
361
+ if (boostRecent && boostRecencyField && !codec.attributes[boostRecencyField]) {
362
+ console.warn(`[graphile-search] @searchConfig.boost_recency_field "${boostRecencyField}" ` +
363
+ `not found on table "${codec.name}". Recency boost disabled for this table.`);
364
+ boostRecent = false;
365
+ }
272
366
  newFields = build.extend(newFields, {
273
367
  searchScore: fieldWithHooks({
274
368
  fieldName: 'searchScore',
@@ -276,6 +370,7 @@ function createUnifiedSearchPlugin(options) {
276
370
  }, () => ({
277
371
  description: 'Composite search relevance score (0..1, higher = more relevant). ' +
278
372
  'Computed by normalizing and averaging all active search signals. ' +
373
+ 'Supports per-table weight customization via @searchConfig smart tag. ' +
279
374
  'Returns null when no search filters are active.',
280
375
  type: GraphQLFloat,
281
376
  plan($step) {
@@ -290,12 +385,25 @@ function createUnifiedSearchPlugin(options) {
290
385
  }
291
386
  // Collect all meta steps for all adapters
292
387
  const $metaSteps = allMetaKeys.map((mk) => $select.getMeta(mk.metaKey));
388
+ // If recency boost is configured, inject the recency field into
389
+ // the SQL SELECT so we can read it by numeric index at runtime.
390
+ let recencySelectIndex = null;
391
+ if (boostRecent && boostRecencyField) {
392
+ const recencyColumnSql = sql `${$select.alias}.${sql.identifier(boostRecencyField)}::text`;
393
+ recencySelectIndex = $select.selectAndReturnIndex(recencyColumnSql);
394
+ }
395
+ // Capture the index in a local const for the lambda closure
396
+ const capturedRecencyIndex = recencySelectIndex;
293
397
  return lambda([...$metaSteps, $row], (args) => {
294
398
  const row = args[args.length - 1];
295
399
  if (row == null)
296
400
  return null;
297
- let sum = 0;
298
- let count = 0;
401
+ let weightedSum = 0;
402
+ let totalWeight = 0;
403
+ // Read recency value from the injected SELECT column
404
+ const recencyValue = (boostRecent && capturedRecencyIndex != null)
405
+ ? row[capturedRecencyIndex]
406
+ : null;
299
407
  for (let i = 0; i < allMetaKeys.length; i++) {
300
408
  const details = args[i];
301
409
  if (details == null || details.selectIndex == null)
@@ -307,74 +415,19 @@ function createUnifiedSearchPlugin(options) {
307
415
  if (typeof score !== 'number' || isNaN(score))
308
416
  continue;
309
417
  const mk = allMetaKeys[i];
310
- // Normalize to 0..1 (higher = better)
311
- let normalized;
312
- if (mk.range) {
313
- // Known range: linear normalization
314
- const [min, max] = mk.range;
315
- normalized = mk.lowerIsBetter
316
- ? 1 - (score - min) / (max - min)
317
- : (score - min) / (max - min);
418
+ const weight = effectiveWeights?.[mk.adapterName] ?? 1;
419
+ // Normalize using the resolved strategy
420
+ let normalized = normalizeScore(score, mk.lowerIsBetter, mk.range, normalizationStrategy);
421
+ // Apply recency boost if configured
422
+ if (boostRecent && recencyValue != null) {
423
+ normalized = applyRecencyBoost(normalized, recencyValue, boostRecencyDecay);
318
424
  }
319
- else {
320
- // Unbounded: sigmoid normalization
321
- if (mk.lowerIsBetter) {
322
- // BM25: negative scores, more negative = better
323
- // Map via 1 / (1 + abs(score))
324
- normalized = 1 / (1 + Math.abs(score));
325
- }
326
- else {
327
- // Hypothetical unbounded higher-is-better
328
- normalized = score / (1 + score);
329
- }
330
- }
331
- // Clamp to [0, 1]
332
- normalized = Math.max(0, Math.min(1, normalized));
333
- sum += normalized;
334
- count++;
425
+ weightedSum += normalized * weight;
426
+ totalWeight += weight;
335
427
  }
336
- if (count === 0)
428
+ if (totalWeight === 0)
337
429
  return null;
338
- // Apply optional weights
339
- if (options.searchScoreWeights) {
340
- let weightedSum = 0;
341
- let totalWeight = 0;
342
- let weightIdx = 0;
343
- for (let i = 0; i < allMetaKeys.length; i++) {
344
- const details = args[i];
345
- if (details == null || details.selectIndex == null)
346
- continue;
347
- const rawValue = row[details.selectIndex];
348
- if (rawValue == null)
349
- continue;
350
- const mk = allMetaKeys[i];
351
- const weight = options.searchScoreWeights[mk.adapterName] ?? 1;
352
- const score = pg_1.TYPES.float.fromPg(rawValue);
353
- if (typeof score !== 'number' || isNaN(score))
354
- continue;
355
- let normalized;
356
- if (mk.range) {
357
- const [min, max] = mk.range;
358
- normalized = mk.lowerIsBetter
359
- ? 1 - (score - min) / (max - min)
360
- : (score - min) / (max - min);
361
- }
362
- else {
363
- if (mk.lowerIsBetter) {
364
- normalized = 1 / (1 + Math.abs(score));
365
- }
366
- else {
367
- normalized = score / (1 + score);
368
- }
369
- }
370
- normalized = Math.max(0, Math.min(1, normalized));
371
- weightedSum += normalized * weight;
372
- totalWeight += weight;
373
- weightIdx++;
374
- }
375
- return totalWeight > 0 ? weightedSum / totalWeight : null;
376
- }
377
- return sum / count;
430
+ return weightedSum / totalWeight;
378
431
  });
379
432
  },
380
433
  })),