graphile-search 1.12.1 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,11 @@
6
6
  *
7
7
  * Requires the Bm25CodecPlugin to be loaded first (for index discovery).
8
8
  * The adapter reads from the bm25IndexStore populated during the gather phase.
9
+ *
10
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
11
+ * table has chunks with a BM25 index, the adapter includes a lateral
12
+ * subquery to find the best-matching chunk and returns
13
+ * LEAST(parent_score, chunk_score) (lower = better for BM25).
9
14
  */
10
15
  import type { SearchAdapter } from '../types';
11
16
  /**
package/adapters/bm25.js CHANGED
@@ -7,10 +7,16 @@
7
7
  *
8
8
  * Requires the Bm25CodecPlugin to be loaded first (for index discovery).
9
9
  * The adapter reads from the bm25IndexStore populated during the gather phase.
10
+ *
11
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
12
+ * table has chunks with a BM25 index, the adapter includes a lateral
13
+ * subquery to find the best-matching chunk and returns
14
+ * LEAST(parent_score, chunk_score) (lower = better for BM25).
10
15
  */
11
16
  Object.defineProperty(exports, "__esModule", { value: true });
12
17
  exports.createBm25Adapter = createBm25Adapter;
13
18
  const bm25_codec_1 = require("../codecs/bm25-codec");
19
+ const chunks_1 = require("./chunks");
14
20
  function isTextCodec(codec) {
15
21
  const name = codec?.name;
16
22
  return name === 'text' || name === 'varchar' || name === 'bpchar';
@@ -62,7 +68,14 @@ function createBm25Adapter(options = {}) {
62
68
  const bm25Index = getBm25IndexForAttribute(codec, attributeName, build);
63
69
  if (!bm25Index)
64
70
  continue;
65
- columns.push({ attributeName, adapterData: bm25Index });
71
+ // Check for chunk-aware BM25
72
+ const chunksInfo = (0, chunks_1.getChunksInfo)(codec);
73
+ const hasChunkBm25 = chunksInfo?.searchIndexes.includes('bm25');
74
+ const columnData = {
75
+ bm25Index,
76
+ chunksInfo: hasChunkBm25 ? chunksInfo : undefined,
77
+ };
78
+ columns.push({ attributeName, adapterData: columnData });
66
79
  }
67
80
  return columns;
68
81
  },
@@ -97,15 +110,53 @@ function createBm25Adapter(options = {}) {
97
110
  buildFilterApply(sql, alias, column, filterValue, _build) {
98
111
  if (filterValue == null)
99
112
  return null;
100
- const { query, threshold } = filterValue;
113
+ const { query, threshold, includeChunks } = filterValue;
101
114
  if (!query || typeof query !== 'string' || query.trim().length === 0)
102
115
  return null;
103
- const bm25Index = column.adapterData;
116
+ const columnData = column.adapterData;
117
+ const bm25Index = columnData.bm25Index;
104
118
  const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
105
119
  // Use quoteQualifiedIdentifier to produce the qualified index name
106
120
  const qualifiedIndexName = `"${bm25Index.schemaName}"."${bm25Index.indexName}"`;
107
121
  const bm25queryExpr = sql `to_bm25query(${sql.value(query)}, ${sql.value(qualifiedIndexName)})`;
108
122
  const scoreExpr = sql `(${columnExpr} <@> ${bm25queryExpr})`;
123
+ // Check for chunk-aware querying
124
+ const chunksInfo = columnData.chunksInfo;
125
+ if (chunksInfo && chunksInfo.searchIndexes.includes('bm25') && (includeChunks !== false)) {
126
+ const chunksTableRef = chunksInfo.chunksSchema
127
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
128
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
129
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
130
+ const chunkContentField = sql.identifier(chunksInfo.contentField);
131
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
132
+ const chunksAlias = sql.identifier('__bm25_chunks');
133
+ // BM25 on chunks requires an index name on the chunks table.
134
+ // We construct it from the chunks table schema + a conventional index name.
135
+ // The BM25 index on chunks is named: {chunks_table}_{content_field}_bm25_idx
136
+ const chunksIndexName = `"${chunksInfo.chunksSchema || bm25Index.schemaName}"."${chunksInfo.chunksTableName}_${chunksInfo.contentField}_bm25_idx"`;
137
+ const chunkBm25queryExpr = sql `to_bm25query(${sql.value(query)}, ${sql.value(chunksIndexName)})`;
138
+ const chunkScoreExpr = sql `(${chunksAlias}.${chunkContentField} <@> ${chunkBm25queryExpr})`;
139
+ // Subquery: MIN(bm25_score) across chunks (lower = better for BM25)
140
+ const chunkScoreSubquery = sql `(
141
+ SELECT MIN(${chunkScoreExpr})
142
+ FROM ${chunksTableRef} AS ${chunksAlias}
143
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
144
+ )`;
145
+ // Combined: LEAST of parent score and best chunk score (lower = better)
146
+ const combinedScoreExpr = sql `LEAST(
147
+ COALESCE(${scoreExpr}, 0::real),
148
+ COALESCE(${chunkScoreSubquery}, 0::real)
149
+ )`;
150
+ let whereClause = null;
151
+ if (threshold !== undefined && threshold !== null) {
152
+ whereClause = sql `${combinedScoreExpr} < ${sql.value(threshold)}`;
153
+ }
154
+ return {
155
+ whereClause,
156
+ scoreExpression: combinedScoreExpr,
157
+ };
158
+ }
159
+ // Standard (non-chunk) query
109
160
  let whereClause = null;
110
161
  if (threshold !== undefined && threshold !== null) {
111
162
  whereClause = sql `${scoreExpr} < ${sql.value(threshold)}`;
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Shared @hasChunks smart tag utilities.
3
+ *
4
+ * Extracts chunk table metadata from the @hasChunks smart tag on a codec.
5
+ * Used by pgvector, tsvector, BM25, and trgm adapters to build lateral
6
+ * subqueries against the chunks table for chunk-aware search.
7
+ */
8
+ /**
9
+ * Chunks table info detected from @hasChunks smart tag.
10
+ */
11
+ export interface ChunksInfo {
12
+ chunksSchema: string | null;
13
+ chunksTableName: string;
14
+ parentFkField: string;
15
+ parentPkField: string;
16
+ embeddingField: string;
17
+ /** Text content field on chunks table (e.g. "content") */
18
+ contentField: string;
19
+ /** tsvector field on chunks table, if fulltext search is enabled (e.g. "search") */
20
+ searchField: string | null;
21
+ /** Which search indexes are created on the chunks table (e.g. ["fulltext", "bm25"]) */
22
+ searchIndexes: string[];
23
+ }
24
+ /**
25
+ * Read @hasChunks smart tag from codec extensions.
26
+ *
27
+ * The tag value is a JSON object like:
28
+ * {
29
+ * "chunksTable": "documents_chunks",
30
+ * "chunksSchema": "app_private", // optional, defaults to parent table's schema
31
+ * "parentFk": "document_id", // optional, defaults to "parent_id"
32
+ * "parentPk": "id", // optional, defaults to "id"
33
+ * "embeddingField": "embedding", // optional, defaults to "embedding"
34
+ * "contentField": "content", // optional, defaults to "content"
35
+ * "searchField": "search", // optional, null if no fulltext
36
+ * "searchIndexes": ["fulltext","bm25"] // optional, defaults to []
37
+ * }
38
+ */
39
+ export declare function getChunksInfo(codec: any): ChunksInfo | undefined;
@@ -0,0 +1,81 @@
1
+ "use strict";
2
+ /**
3
+ * Shared @hasChunks smart tag utilities.
4
+ *
5
+ * Extracts chunk table metadata from the @hasChunks smart tag on a codec.
6
+ * Used by pgvector, tsvector, BM25, and trgm adapters to build lateral
7
+ * subqueries against the chunks table for chunk-aware search.
8
+ */
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.getChunksInfo = getChunksInfo;
11
+ /**
12
+ * Read @hasChunks smart tag from codec extensions.
13
+ *
14
+ * The tag value is a JSON object like:
15
+ * {
16
+ * "chunksTable": "documents_chunks",
17
+ * "chunksSchema": "app_private", // optional, defaults to parent table's schema
18
+ * "parentFk": "document_id", // optional, defaults to "parent_id"
19
+ * "parentPk": "id", // optional, defaults to "id"
20
+ * "embeddingField": "embedding", // optional, defaults to "embedding"
21
+ * "contentField": "content", // optional, defaults to "content"
22
+ * "searchField": "search", // optional, null if no fulltext
23
+ * "searchIndexes": ["fulltext","bm25"] // optional, defaults to []
24
+ * }
25
+ */
26
+ function getChunksInfo(codec) {
27
+ const tags = codec?.extensions?.tags;
28
+ if (!tags)
29
+ return undefined;
30
+ const raw = tags.hasChunks;
31
+ if (!raw)
32
+ return undefined;
33
+ let parsed;
34
+ if (typeof raw === 'string') {
35
+ try {
36
+ parsed = JSON.parse(raw);
37
+ }
38
+ catch {
39
+ return undefined;
40
+ }
41
+ }
42
+ else if (typeof raw === 'object') {
43
+ parsed = raw;
44
+ }
45
+ else if (raw === true) {
46
+ return undefined;
47
+ }
48
+ else {
49
+ return undefined;
50
+ }
51
+ if (!parsed.chunksTable)
52
+ return undefined;
53
+ const chunksSchema = parsed.chunksSchema
54
+ || codec?.extensions?.pg?.schemaName
55
+ || null;
56
+ // Parse searchIndexes from tag (may be array or JSON string)
57
+ let searchIndexes = [];
58
+ if (Array.isArray(parsed.searchIndexes)) {
59
+ searchIndexes = parsed.searchIndexes;
60
+ }
61
+ else if (typeof parsed.searchIndexes === 'string') {
62
+ try {
63
+ const arr = JSON.parse(parsed.searchIndexes);
64
+ if (Array.isArray(arr))
65
+ searchIndexes = arr;
66
+ }
67
+ catch {
68
+ // ignore
69
+ }
70
+ }
71
+ return {
72
+ chunksSchema,
73
+ chunksTableName: parsed.chunksTable,
74
+ parentFkField: parsed.parentFk || 'parent_id',
75
+ parentPkField: parsed.parentPk || 'id',
76
+ embeddingField: parsed.embeddingField || 'embedding',
77
+ contentField: parsed.contentField || 'content',
78
+ searchField: parsed.searchField || null,
79
+ searchIndexes,
80
+ };
81
+ }
@@ -12,3 +12,5 @@ export { createTrgmAdapter } from './trgm';
12
12
  export type { TrgmAdapterOptions } from './trgm';
13
13
  export { createPgvectorAdapter } from './pgvector';
14
14
  export type { PgvectorAdapterOptions } from './pgvector';
15
+ export { getChunksInfo } from './chunks';
16
+ export type { ChunksInfo } from './chunks';
package/adapters/index.js CHANGED
@@ -6,7 +6,7 @@
6
6
  * search algorithm. They are plain objects — not Graphile plugins.
7
7
  */
8
8
  Object.defineProperty(exports, "__esModule", { value: true });
9
- exports.createPgvectorAdapter = exports.createTrgmAdapter = exports.createBm25Adapter = exports.createTsvectorAdapter = void 0;
9
+ exports.getChunksInfo = exports.createPgvectorAdapter = exports.createTrgmAdapter = exports.createBm25Adapter = exports.createTsvectorAdapter = void 0;
10
10
  var tsvector_1 = require("./tsvector");
11
11
  Object.defineProperty(exports, "createTsvectorAdapter", { enumerable: true, get: function () { return tsvector_1.createTsvectorAdapter; } });
12
12
  var bm25_1 = require("./bm25");
@@ -15,3 +15,5 @@ var trgm_1 = require("./trgm");
15
15
  Object.defineProperty(exports, "createTrgmAdapter", { enumerable: true, get: function () { return trgm_1.createTrgmAdapter; } });
16
16
  var pgvector_1 = require("./pgvector");
17
17
  Object.defineProperty(exports, "createPgvectorAdapter", { enumerable: true, get: function () { return pgvector_1.createPgvectorAdapter; } });
18
+ var chunks_1 = require("./chunks");
19
+ Object.defineProperty(exports, "getChunksInfo", { enumerable: true, get: function () { return chunks_1.getChunksInfo; } });
@@ -8,6 +8,7 @@
8
8
  */
9
9
  Object.defineProperty(exports, "__esModule", { value: true });
10
10
  exports.createPgvectorAdapter = createPgvectorAdapter;
11
+ const chunks_1 = require("./chunks");
11
12
  /**
12
13
  * Build a distance expression for the given metric.
13
14
  * Uses explicit SQL template literals for each operator to avoid sql.raw.
@@ -26,57 +27,6 @@ function buildDistanceExpr(sql, columnExpr, vectorExpr, metric) {
26
27
  function isVectorCodec(codec) {
27
28
  return codec?.name === 'vector';
28
29
  }
29
- /**
30
- * Read @hasChunks smart tag from codec extensions.
31
- * The tag value is a JSON object like:
32
- * {
33
- * "chunksTable": "documents_chunks",
34
- * "chunksSchema": "app_private", // optional, defaults to parent table's schema
35
- * "parentFk": "document_id", // optional, defaults to "parent_id"
36
- * "parentPk": "id", // optional, defaults to "id"
37
- * "embeddingField": "embedding" // optional, defaults to "embedding"
38
- * }
39
- */
40
- function getChunksInfo(codec) {
41
- const tags = codec?.extensions?.tags;
42
- if (!tags)
43
- return undefined;
44
- const raw = tags.hasChunks;
45
- if (!raw)
46
- return undefined;
47
- let parsed;
48
- if (typeof raw === 'string') {
49
- try {
50
- parsed = JSON.parse(raw);
51
- }
52
- catch {
53
- // If it's just "true" or a plain string, use convention-based defaults
54
- return undefined;
55
- }
56
- }
57
- else if (typeof raw === 'object') {
58
- parsed = raw;
59
- }
60
- else if (raw === true) {
61
- return undefined; // boolean true = no metadata, can't resolve
62
- }
63
- else {
64
- return undefined;
65
- }
66
- if (!parsed.chunksTable)
67
- return undefined;
68
- // Resolve schema: explicit chunksSchema > parent codec schema > null
69
- const chunksSchema = parsed.chunksSchema
70
- || codec?.extensions?.pg?.schemaName
71
- || null;
72
- return {
73
- chunksSchema,
74
- chunksTableName: parsed.chunksTable,
75
- parentFkField: parsed.parentFk || 'parent_id',
76
- parentPkField: parsed.parentPk || 'id',
77
- embeddingField: parsed.embeddingField || 'embedding',
78
- };
79
- }
80
30
  function createPgvectorAdapter(options = {}) {
81
31
  const { filterPrefix = 'vector', defaultMetric = 'COSINE', enableChunkQuerying = true } = options;
82
32
  return {
@@ -96,7 +46,7 @@ function createPgvectorAdapter(options = {}) {
96
46
  if (!codec?.attributes)
97
47
  return [];
98
48
  const columns = [];
99
- const chunksInfo = enableChunkQuerying ? getChunksInfo(codec) : undefined;
49
+ const chunksInfo = enableChunkQuerying ? (0, chunks_1.getChunksInfo)(codec) : undefined;
100
50
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
101
51
  if (isVectorCodec(attribute.codec)) {
102
52
  columns.push({
@@ -3,6 +3,11 @@
3
3
  *
4
4
  * Detects text/varchar columns and generates trigram similarity scoring.
5
5
  * Wraps the same SQL logic as graphile-trgm but as a SearchAdapter.
6
+ *
7
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
8
+ * table has chunks with a trigram index, the adapter includes a lateral
9
+ * subquery to find the best-matching chunk and returns
10
+ * GREATEST(parent_similarity, chunk_similarity).
6
11
  */
7
12
  import type { SearchAdapter } from '../types';
8
13
  export interface TrgmAdapterOptions {
package/adapters/trgm.js CHANGED
@@ -4,9 +4,15 @@
4
4
  *
5
5
  * Detects text/varchar columns and generates trigram similarity scoring.
6
6
  * Wraps the same SQL logic as graphile-trgm but as a SearchAdapter.
7
+ *
8
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
9
+ * table has chunks with a trigram index, the adapter includes a lateral
10
+ * subquery to find the best-matching chunk and returns
11
+ * GREATEST(parent_similarity, chunk_similarity).
7
12
  */
8
13
  Object.defineProperty(exports, "__esModule", { value: true });
9
14
  exports.createTrgmAdapter = createTrgmAdapter;
15
+ const chunks_1 = require("./chunks");
10
16
  function isTextCodec(codec) {
11
17
  const name = codec?.name;
12
18
  return name === 'text' || name === 'varchar' || name === 'bpchar';
@@ -37,7 +43,13 @@ function createTrgmAdapter(options = {}) {
37
43
  const columns = [];
38
44
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
39
45
  if (isTextCodec(attribute.codec)) {
40
- columns.push({ attributeName });
46
+ // Store chunks info if available and chunks have trigram search
47
+ const chunksInfo = (0, chunks_1.getChunksInfo)(codec);
48
+ const hasChunkTrgm = chunksInfo?.searchIndexes.includes('trigram');
49
+ columns.push({
50
+ attributeName,
51
+ adapterData: hasChunkTrgm ? chunksInfo : undefined,
52
+ });
41
53
  }
42
54
  }
43
55
  return columns;
@@ -73,12 +85,42 @@ function createTrgmAdapter(options = {}) {
73
85
  buildFilterApply(sql, alias, column, filterValue, _build) {
74
86
  if (filterValue == null)
75
87
  return null;
76
- const { value, threshold } = filterValue;
88
+ const { value, threshold, includeChunks } = filterValue;
77
89
  if (!value || typeof value !== 'string' || value.trim().length === 0)
78
90
  return null;
79
91
  const th = threshold != null ? threshold : defaultThreshold;
80
92
  const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
81
93
  const similarityExpr = sql `similarity(${columnExpr}, ${sql.value(value)})`;
94
+ // Check for chunk-aware querying
95
+ const chunksInfo = column.adapterData;
96
+ if (chunksInfo && chunksInfo.searchIndexes.includes('trigram') && (includeChunks !== false)) {
97
+ const chunksTableRef = chunksInfo.chunksSchema
98
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
99
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
100
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
101
+ const chunkContentField = sql.identifier(chunksInfo.contentField);
102
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
103
+ const chunksAlias = sql.identifier('__trgm_chunks');
104
+ // Subquery: MAX(similarity) across chunks (higher = better for trgm)
105
+ const chunkSimilaritySubquery = sql `(
106
+ SELECT MAX(similarity(${chunksAlias}.${chunkContentField}, ${sql.value(value)}))
107
+ FROM ${chunksTableRef} AS ${chunksAlias}
108
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
109
+ AND similarity(${chunksAlias}.${chunkContentField}, ${sql.value(value)}) > ${sql.value(th)}
110
+ )`;
111
+ // Combined: GREATEST of parent similarity and best chunk similarity
112
+ const combinedSimilarityExpr = sql `GREATEST(
113
+ COALESCE(${similarityExpr}, 0::real),
114
+ COALESCE(${chunkSimilaritySubquery}, 0::real)
115
+ )`;
116
+ // WHERE: parent matches OR any chunk matches
117
+ const whereClause = sql `(${similarityExpr} > ${sql.value(th)} OR ${chunkSimilaritySubquery} IS NOT NULL)`;
118
+ return {
119
+ whereClause,
120
+ scoreExpression: combinedSimilarityExpr,
121
+ };
122
+ }
123
+ // Standard (non-chunk) query
82
124
  return {
83
125
  whereClause: sql `${similarityExpr} > ${sql.value(th)}`,
84
126
  scoreExpression: similarityExpr,
@@ -3,6 +3,11 @@
3
3
  *
4
4
  * Detects tsvector columns and generates ts_rank-based scoring.
5
5
  * Wraps the same SQL logic as graphile-tsvector but as a SearchAdapter.
6
+ *
7
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
8
+ * table has chunks with a tsvector search field, the adapter includes a
9
+ * lateral subquery to find the best-matching chunk and returns
10
+ * GREATEST(parent_rank, chunk_rank).
6
11
  */
7
12
  import type { SearchAdapter } from '../types';
8
13
  export interface TsvectorAdapterOptions {
@@ -4,9 +4,15 @@
4
4
  *
5
5
  * Detects tsvector columns and generates ts_rank-based scoring.
6
6
  * Wraps the same SQL logic as graphile-tsvector but as a SearchAdapter.
7
+ *
8
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
9
+ * table has chunks with a tsvector search field, the adapter includes a
10
+ * lateral subquery to find the best-matching chunk and returns
11
+ * GREATEST(parent_rank, chunk_rank).
7
12
  */
8
13
  Object.defineProperty(exports, "__esModule", { value: true });
9
14
  exports.createTsvectorAdapter = createTsvectorAdapter;
15
+ const chunks_1 = require("./chunks");
10
16
  function isTsvectorCodec(codec) {
11
17
  // In graphile-build-pg >= 5.0.0-rc.8, the built-in TYPES.tsvector codec
12
18
  // has name === 'tsvector' but does NOT have extensions.pg. We need to
@@ -36,7 +42,14 @@ function createTsvectorAdapter(options = {}) {
36
42
  const columns = [];
37
43
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
38
44
  if (isTsvectorCodec(attribute.codec)) {
39
- columns.push({ attributeName });
45
+ // Store chunks info if available and chunks have fulltext search
46
+ const chunksInfo = (0, chunks_1.getChunksInfo)(codec);
47
+ const hasChunkFulltext = chunksInfo?.searchField &&
48
+ chunksInfo.searchIndexes.includes('fulltext');
49
+ columns.push({
50
+ attributeName,
51
+ adapterData: hasChunkFulltext ? chunksInfo : undefined,
52
+ });
40
53
  }
41
54
  }
42
55
  return columns;
@@ -50,11 +63,51 @@ function createTsvectorAdapter(options = {}) {
50
63
  buildFilterApply(sql, alias, column, filterValue, _build) {
51
64
  if (filterValue == null)
52
65
  return null;
53
- const val = typeof filterValue === 'string' ? filterValue : String(filterValue);
66
+ // Handle includeChunks option when filter is an object
67
+ let val;
68
+ let includeChunks;
69
+ if (typeof filterValue === 'object' && filterValue !== null && 'query' in filterValue) {
70
+ val = typeof filterValue.query === 'string' ? filterValue.query : String(filterValue.query);
71
+ includeChunks = filterValue.includeChunks;
72
+ }
73
+ else {
74
+ val = typeof filterValue === 'string' ? filterValue : String(filterValue);
75
+ }
54
76
  if (val.trim().length === 0)
55
77
  return null;
56
78
  const tsquery = sql `websearch_to_tsquery(${sql.literal(tsConfig)}, ${sql.value(val)})`;
57
79
  const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
80
+ // Check for chunk-aware querying
81
+ const chunksInfo = column.adapterData;
82
+ if (chunksInfo && chunksInfo.searchField && (includeChunks !== false)) {
83
+ const chunksTableRef = chunksInfo.chunksSchema
84
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
85
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
86
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
87
+ const chunkSearchField = sql.identifier(chunksInfo.searchField);
88
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
89
+ const chunksAlias = sql.identifier('__tsv_chunks');
90
+ // Subquery: MAX(ts_rank) across matching chunks
91
+ const chunkRankSubquery = sql `(
92
+ SELECT MAX(ts_rank(${chunksAlias}.${chunkSearchField}, ${tsquery}))
93
+ FROM ${chunksTableRef} AS ${chunksAlias}
94
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
95
+ AND ${chunksAlias}.${chunkSearchField} @@ ${tsquery}
96
+ )`;
97
+ const parentRankExpr = sql `ts_rank(${columnExpr}, ${tsquery})`;
98
+ // Combined: GREATEST of parent rank and best chunk rank
99
+ const combinedRankExpr = sql `GREATEST(
100
+ COALESCE(CASE WHEN ${columnExpr} @@ ${tsquery} THEN ${parentRankExpr} ELSE 0::real END, 0::real),
101
+ COALESCE(${chunkRankSubquery}, 0::real)
102
+ )`;
103
+ // WHERE: parent matches OR any chunk matches
104
+ const whereClause = sql `(${columnExpr} @@ ${tsquery} OR ${chunkRankSubquery} IS NOT NULL)`;
105
+ return {
106
+ whereClause,
107
+ scoreExpression: combinedRankExpr,
108
+ };
109
+ }
110
+ // Standard (non-chunk) query
58
111
  return {
59
112
  whereClause: sql `${columnExpr} @@ ${tsquery}`,
60
113
  scoreExpression: sql `ts_rank(${columnExpr}, ${tsquery})`,
@@ -6,6 +6,11 @@
6
6
  *
7
7
  * Requires the Bm25CodecPlugin to be loaded first (for index discovery).
8
8
  * The adapter reads from the bm25IndexStore populated during the gather phase.
9
+ *
10
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
11
+ * table has chunks with a BM25 index, the adapter includes a lateral
12
+ * subquery to find the best-matching chunk and returns
13
+ * LEAST(parent_score, chunk_score) (lower = better for BM25).
9
14
  */
10
15
  import type { SearchAdapter } from '../types';
11
16
  /**
@@ -6,8 +6,14 @@
6
6
  *
7
7
  * Requires the Bm25CodecPlugin to be loaded first (for index discovery).
8
8
  * The adapter reads from the bm25IndexStore populated during the gather phase.
9
+ *
10
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
11
+ * table has chunks with a BM25 index, the adapter includes a lateral
12
+ * subquery to find the best-matching chunk and returns
13
+ * LEAST(parent_score, chunk_score) (lower = better for BM25).
9
14
  */
10
15
  import { bm25IndexStore as moduleBm25IndexStore } from '../codecs/bm25-codec';
16
+ import { getChunksInfo } from './chunks';
11
17
  function isTextCodec(codec) {
12
18
  const name = codec?.name;
13
19
  return name === 'text' || name === 'varchar' || name === 'bpchar';
@@ -59,7 +65,14 @@ export function createBm25Adapter(options = {}) {
59
65
  const bm25Index = getBm25IndexForAttribute(codec, attributeName, build);
60
66
  if (!bm25Index)
61
67
  continue;
62
- columns.push({ attributeName, adapterData: bm25Index });
68
+ // Check for chunk-aware BM25
69
+ const chunksInfo = getChunksInfo(codec);
70
+ const hasChunkBm25 = chunksInfo?.searchIndexes.includes('bm25');
71
+ const columnData = {
72
+ bm25Index,
73
+ chunksInfo: hasChunkBm25 ? chunksInfo : undefined,
74
+ };
75
+ columns.push({ attributeName, adapterData: columnData });
63
76
  }
64
77
  return columns;
65
78
  },
@@ -94,15 +107,53 @@ export function createBm25Adapter(options = {}) {
94
107
  buildFilterApply(sql, alias, column, filterValue, _build) {
95
108
  if (filterValue == null)
96
109
  return null;
97
- const { query, threshold } = filterValue;
110
+ const { query, threshold, includeChunks } = filterValue;
98
111
  if (!query || typeof query !== 'string' || query.trim().length === 0)
99
112
  return null;
100
- const bm25Index = column.adapterData;
113
+ const columnData = column.adapterData;
114
+ const bm25Index = columnData.bm25Index;
101
115
  const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
102
116
  // Use quoteQualifiedIdentifier to produce the qualified index name
103
117
  const qualifiedIndexName = `"${bm25Index.schemaName}"."${bm25Index.indexName}"`;
104
118
  const bm25queryExpr = sql `to_bm25query(${sql.value(query)}, ${sql.value(qualifiedIndexName)})`;
105
119
  const scoreExpr = sql `(${columnExpr} <@> ${bm25queryExpr})`;
120
+ // Check for chunk-aware querying
121
+ const chunksInfo = columnData.chunksInfo;
122
+ if (chunksInfo && chunksInfo.searchIndexes.includes('bm25') && (includeChunks !== false)) {
123
+ const chunksTableRef = chunksInfo.chunksSchema
124
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
125
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
126
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
127
+ const chunkContentField = sql.identifier(chunksInfo.contentField);
128
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
129
+ const chunksAlias = sql.identifier('__bm25_chunks');
130
+ // BM25 on chunks requires an index name on the chunks table.
131
+ // We construct it from the chunks table schema + a conventional index name.
132
+ // The BM25 index on chunks is named: {chunks_table}_{content_field}_bm25_idx
133
+ const chunksIndexName = `"${chunksInfo.chunksSchema || bm25Index.schemaName}"."${chunksInfo.chunksTableName}_${chunksInfo.contentField}_bm25_idx"`;
134
+ const chunkBm25queryExpr = sql `to_bm25query(${sql.value(query)}, ${sql.value(chunksIndexName)})`;
135
+ const chunkScoreExpr = sql `(${chunksAlias}.${chunkContentField} <@> ${chunkBm25queryExpr})`;
136
+ // Subquery: MIN(bm25_score) across chunks (lower = better for BM25)
137
+ const chunkScoreSubquery = sql `(
138
+ SELECT MIN(${chunkScoreExpr})
139
+ FROM ${chunksTableRef} AS ${chunksAlias}
140
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
141
+ )`;
142
+ // Combined: LEAST of parent score and best chunk score (lower = better)
143
+ const combinedScoreExpr = sql `LEAST(
144
+ COALESCE(${scoreExpr}, 0::real),
145
+ COALESCE(${chunkScoreSubquery}, 0::real)
146
+ )`;
147
+ let whereClause = null;
148
+ if (threshold !== undefined && threshold !== null) {
149
+ whereClause = sql `${combinedScoreExpr} < ${sql.value(threshold)}`;
150
+ }
151
+ return {
152
+ whereClause,
153
+ scoreExpression: combinedScoreExpr,
154
+ };
155
+ }
156
+ // Standard (non-chunk) query
106
157
  let whereClause = null;
107
158
  if (threshold !== undefined && threshold !== null) {
108
159
  whereClause = sql `${scoreExpr} < ${sql.value(threshold)}`;
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Shared @hasChunks smart tag utilities.
3
+ *
4
+ * Extracts chunk table metadata from the @hasChunks smart tag on a codec.
5
+ * Used by pgvector, tsvector, BM25, and trgm adapters to build lateral
6
+ * subqueries against the chunks table for chunk-aware search.
7
+ */
8
+ /**
9
+ * Chunks table info detected from @hasChunks smart tag.
10
+ */
11
+ export interface ChunksInfo {
12
+ chunksSchema: string | null;
13
+ chunksTableName: string;
14
+ parentFkField: string;
15
+ parentPkField: string;
16
+ embeddingField: string;
17
+ /** Text content field on chunks table (e.g. "content") */
18
+ contentField: string;
19
+ /** tsvector field on chunks table, if fulltext search is enabled (e.g. "search") */
20
+ searchField: string | null;
21
+ /** Which search indexes are created on the chunks table (e.g. ["fulltext", "bm25"]) */
22
+ searchIndexes: string[];
23
+ }
24
+ /**
25
+ * Read @hasChunks smart tag from codec extensions.
26
+ *
27
+ * The tag value is a JSON object like:
28
+ * {
29
+ * "chunksTable": "documents_chunks",
30
+ * "chunksSchema": "app_private", // optional, defaults to parent table's schema
31
+ * "parentFk": "document_id", // optional, defaults to "parent_id"
32
+ * "parentPk": "id", // optional, defaults to "id"
33
+ * "embeddingField": "embedding", // optional, defaults to "embedding"
34
+ * "contentField": "content", // optional, defaults to "content"
35
+ * "searchField": "search", // optional, null if no fulltext
36
+ * "searchIndexes": ["fulltext","bm25"] // optional, defaults to []
37
+ * }
38
+ */
39
+ export declare function getChunksInfo(codec: any): ChunksInfo | undefined;
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Shared @hasChunks smart tag utilities.
3
+ *
4
+ * Extracts chunk table metadata from the @hasChunks smart tag on a codec.
5
+ * Used by pgvector, tsvector, BM25, and trgm adapters to build lateral
6
+ * subqueries against the chunks table for chunk-aware search.
7
+ */
8
+ /**
9
+ * Read @hasChunks smart tag from codec extensions.
10
+ *
11
+ * The tag value is a JSON object like:
12
+ * {
13
+ * "chunksTable": "documents_chunks",
14
+ * "chunksSchema": "app_private", // optional, defaults to parent table's schema
15
+ * "parentFk": "document_id", // optional, defaults to "parent_id"
16
+ * "parentPk": "id", // optional, defaults to "id"
17
+ * "embeddingField": "embedding", // optional, defaults to "embedding"
18
+ * "contentField": "content", // optional, defaults to "content"
19
+ * "searchField": "search", // optional, null if no fulltext
20
+ * "searchIndexes": ["fulltext","bm25"] // optional, defaults to []
21
+ * }
22
+ */
23
+ export function getChunksInfo(codec) {
24
+ const tags = codec?.extensions?.tags;
25
+ if (!tags)
26
+ return undefined;
27
+ const raw = tags.hasChunks;
28
+ if (!raw)
29
+ return undefined;
30
+ let parsed;
31
+ if (typeof raw === 'string') {
32
+ try {
33
+ parsed = JSON.parse(raw);
34
+ }
35
+ catch {
36
+ return undefined;
37
+ }
38
+ }
39
+ else if (typeof raw === 'object') {
40
+ parsed = raw;
41
+ }
42
+ else if (raw === true) {
43
+ return undefined;
44
+ }
45
+ else {
46
+ return undefined;
47
+ }
48
+ if (!parsed.chunksTable)
49
+ return undefined;
50
+ const chunksSchema = parsed.chunksSchema
51
+ || codec?.extensions?.pg?.schemaName
52
+ || null;
53
+ // Parse searchIndexes from tag (may be array or JSON string)
54
+ let searchIndexes = [];
55
+ if (Array.isArray(parsed.searchIndexes)) {
56
+ searchIndexes = parsed.searchIndexes;
57
+ }
58
+ else if (typeof parsed.searchIndexes === 'string') {
59
+ try {
60
+ const arr = JSON.parse(parsed.searchIndexes);
61
+ if (Array.isArray(arr))
62
+ searchIndexes = arr;
63
+ }
64
+ catch {
65
+ // ignore
66
+ }
67
+ }
68
+ return {
69
+ chunksSchema,
70
+ chunksTableName: parsed.chunksTable,
71
+ parentFkField: parsed.parentFk || 'parent_id',
72
+ parentPkField: parsed.parentPk || 'id',
73
+ embeddingField: parsed.embeddingField || 'embedding',
74
+ contentField: parsed.contentField || 'content',
75
+ searchField: parsed.searchField || null,
76
+ searchIndexes,
77
+ };
78
+ }
@@ -12,3 +12,5 @@ export { createTrgmAdapter } from './trgm';
12
12
  export type { TrgmAdapterOptions } from './trgm';
13
13
  export { createPgvectorAdapter } from './pgvector';
14
14
  export type { PgvectorAdapterOptions } from './pgvector';
15
+ export { getChunksInfo } from './chunks';
16
+ export type { ChunksInfo } from './chunks';
@@ -8,3 +8,4 @@ export { createTsvectorAdapter } from './tsvector';
8
8
  export { createBm25Adapter } from './bm25';
9
9
  export { createTrgmAdapter } from './trgm';
10
10
  export { createPgvectorAdapter } from './pgvector';
11
+ export { getChunksInfo } from './chunks';
@@ -5,6 +5,7 @@
5
5
  * pgvector operators (<=> cosine, <-> L2, <#> inner product).
6
6
  * Wraps the same SQL logic as graphile-pgvector but as a SearchAdapter.
7
7
  */
8
+ import { getChunksInfo } from './chunks';
8
9
  /**
9
10
  * Build a distance expression for the given metric.
10
11
  * Uses explicit SQL template literals for each operator to avoid sql.raw.
@@ -23,57 +24,6 @@ function buildDistanceExpr(sql, columnExpr, vectorExpr, metric) {
23
24
  function isVectorCodec(codec) {
24
25
  return codec?.name === 'vector';
25
26
  }
26
- /**
27
- * Read @hasChunks smart tag from codec extensions.
28
- * The tag value is a JSON object like:
29
- * {
30
- * "chunksTable": "documents_chunks",
31
- * "chunksSchema": "app_private", // optional, defaults to parent table's schema
32
- * "parentFk": "document_id", // optional, defaults to "parent_id"
33
- * "parentPk": "id", // optional, defaults to "id"
34
- * "embeddingField": "embedding" // optional, defaults to "embedding"
35
- * }
36
- */
37
- function getChunksInfo(codec) {
38
- const tags = codec?.extensions?.tags;
39
- if (!tags)
40
- return undefined;
41
- const raw = tags.hasChunks;
42
- if (!raw)
43
- return undefined;
44
- let parsed;
45
- if (typeof raw === 'string') {
46
- try {
47
- parsed = JSON.parse(raw);
48
- }
49
- catch {
50
- // If it's just "true" or a plain string, use convention-based defaults
51
- return undefined;
52
- }
53
- }
54
- else if (typeof raw === 'object') {
55
- parsed = raw;
56
- }
57
- else if (raw === true) {
58
- return undefined; // boolean true = no metadata, can't resolve
59
- }
60
- else {
61
- return undefined;
62
- }
63
- if (!parsed.chunksTable)
64
- return undefined;
65
- // Resolve schema: explicit chunksSchema > parent codec schema > null
66
- const chunksSchema = parsed.chunksSchema
67
- || codec?.extensions?.pg?.schemaName
68
- || null;
69
- return {
70
- chunksSchema,
71
- chunksTableName: parsed.chunksTable,
72
- parentFkField: parsed.parentFk || 'parent_id',
73
- parentPkField: parsed.parentPk || 'id',
74
- embeddingField: parsed.embeddingField || 'embedding',
75
- };
76
- }
77
27
  export function createPgvectorAdapter(options = {}) {
78
28
  const { filterPrefix = 'vector', defaultMetric = 'COSINE', enableChunkQuerying = true } = options;
79
29
  return {
@@ -3,6 +3,11 @@
3
3
  *
4
4
  * Detects text/varchar columns and generates trigram similarity scoring.
5
5
  * Wraps the same SQL logic as graphile-trgm but as a SearchAdapter.
6
+ *
7
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
8
+ * table has chunks with a trigram index, the adapter includes a lateral
9
+ * subquery to find the best-matching chunk and returns
10
+ * GREATEST(parent_similarity, chunk_similarity).
6
11
  */
7
12
  import type { SearchAdapter } from '../types';
8
13
  export interface TrgmAdapterOptions {
@@ -3,7 +3,13 @@
3
3
  *
4
4
  * Detects text/varchar columns and generates trigram similarity scoring.
5
5
  * Wraps the same SQL logic as graphile-trgm but as a SearchAdapter.
6
+ *
7
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
8
+ * table has chunks with a trigram index, the adapter includes a lateral
9
+ * subquery to find the best-matching chunk and returns
10
+ * GREATEST(parent_similarity, chunk_similarity).
6
11
  */
12
+ import { getChunksInfo } from './chunks';
7
13
  function isTextCodec(codec) {
8
14
  const name = codec?.name;
9
15
  return name === 'text' || name === 'varchar' || name === 'bpchar';
@@ -34,7 +40,13 @@ export function createTrgmAdapter(options = {}) {
34
40
  const columns = [];
35
41
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
36
42
  if (isTextCodec(attribute.codec)) {
37
- columns.push({ attributeName });
43
+ // Store chunks info if available and chunks have trigram search
44
+ const chunksInfo = getChunksInfo(codec);
45
+ const hasChunkTrgm = chunksInfo?.searchIndexes.includes('trigram');
46
+ columns.push({
47
+ attributeName,
48
+ adapterData: hasChunkTrgm ? chunksInfo : undefined,
49
+ });
38
50
  }
39
51
  }
40
52
  return columns;
@@ -70,12 +82,42 @@ export function createTrgmAdapter(options = {}) {
70
82
  buildFilterApply(sql, alias, column, filterValue, _build) {
71
83
  if (filterValue == null)
72
84
  return null;
73
- const { value, threshold } = filterValue;
85
+ const { value, threshold, includeChunks } = filterValue;
74
86
  if (!value || typeof value !== 'string' || value.trim().length === 0)
75
87
  return null;
76
88
  const th = threshold != null ? threshold : defaultThreshold;
77
89
  const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
78
90
  const similarityExpr = sql `similarity(${columnExpr}, ${sql.value(value)})`;
91
+ // Check for chunk-aware querying
92
+ const chunksInfo = column.adapterData;
93
+ if (chunksInfo && chunksInfo.searchIndexes.includes('trigram') && (includeChunks !== false)) {
94
+ const chunksTableRef = chunksInfo.chunksSchema
95
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
96
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
97
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
98
+ const chunkContentField = sql.identifier(chunksInfo.contentField);
99
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
100
+ const chunksAlias = sql.identifier('__trgm_chunks');
101
+ // Subquery: MAX(similarity) across chunks (higher = better for trgm)
102
+ const chunkSimilaritySubquery = sql `(
103
+ SELECT MAX(similarity(${chunksAlias}.${chunkContentField}, ${sql.value(value)}))
104
+ FROM ${chunksTableRef} AS ${chunksAlias}
105
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
106
+ AND similarity(${chunksAlias}.${chunkContentField}, ${sql.value(value)}) > ${sql.value(th)}
107
+ )`;
108
+ // Combined: GREATEST of parent similarity and best chunk similarity
109
+ const combinedSimilarityExpr = sql `GREATEST(
110
+ COALESCE(${similarityExpr}, 0::real),
111
+ COALESCE(${chunkSimilaritySubquery}, 0::real)
112
+ )`;
113
+ // WHERE: parent matches OR any chunk matches
114
+ const whereClause = sql `(${similarityExpr} > ${sql.value(th)} OR ${chunkSimilaritySubquery} IS NOT NULL)`;
115
+ return {
116
+ whereClause,
117
+ scoreExpression: combinedSimilarityExpr,
118
+ };
119
+ }
120
+ // Standard (non-chunk) query
79
121
  return {
80
122
  whereClause: sql `${similarityExpr} > ${sql.value(th)}`,
81
123
  scoreExpression: similarityExpr,
@@ -3,6 +3,11 @@
3
3
  *
4
4
  * Detects tsvector columns and generates ts_rank-based scoring.
5
5
  * Wraps the same SQL logic as graphile-tsvector but as a SearchAdapter.
6
+ *
7
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
8
+ * table has chunks with a tsvector search field, the adapter includes a
9
+ * lateral subquery to find the best-matching chunk and returns
10
+ * GREATEST(parent_rank, chunk_rank).
6
11
  */
7
12
  import type { SearchAdapter } from '../types';
8
13
  export interface TsvectorAdapterOptions {
@@ -3,7 +3,13 @@
3
3
  *
4
4
  * Detects tsvector columns and generates ts_rank-based scoring.
5
5
  * Wraps the same SQL logic as graphile-tsvector but as a SearchAdapter.
6
+ *
7
+ * Supports chunk-aware querying via @hasChunks smart tag: when the parent
8
+ * table has chunks with a tsvector search field, the adapter includes a
9
+ * lateral subquery to find the best-matching chunk and returns
10
+ * GREATEST(parent_rank, chunk_rank).
6
11
  */
12
+ import { getChunksInfo } from './chunks';
7
13
  function isTsvectorCodec(codec) {
8
14
  // In graphile-build-pg >= 5.0.0-rc.8, the built-in TYPES.tsvector codec
9
15
  // has name === 'tsvector' but does NOT have extensions.pg. We need to
@@ -33,7 +39,14 @@ export function createTsvectorAdapter(options = {}) {
33
39
  const columns = [];
34
40
  for (const [attributeName, attribute] of Object.entries(codec.attributes)) {
35
41
  if (isTsvectorCodec(attribute.codec)) {
36
- columns.push({ attributeName });
42
+ // Store chunks info if available and chunks have fulltext search
43
+ const chunksInfo = getChunksInfo(codec);
44
+ const hasChunkFulltext = chunksInfo?.searchField &&
45
+ chunksInfo.searchIndexes.includes('fulltext');
46
+ columns.push({
47
+ attributeName,
48
+ adapterData: hasChunkFulltext ? chunksInfo : undefined,
49
+ });
37
50
  }
38
51
  }
39
52
  return columns;
@@ -47,11 +60,51 @@ export function createTsvectorAdapter(options = {}) {
47
60
  buildFilterApply(sql, alias, column, filterValue, _build) {
48
61
  if (filterValue == null)
49
62
  return null;
50
- const val = typeof filterValue === 'string' ? filterValue : String(filterValue);
63
+ // Handle includeChunks option when filter is an object
64
+ let val;
65
+ let includeChunks;
66
+ if (typeof filterValue === 'object' && filterValue !== null && 'query' in filterValue) {
67
+ val = typeof filterValue.query === 'string' ? filterValue.query : String(filterValue.query);
68
+ includeChunks = filterValue.includeChunks;
69
+ }
70
+ else {
71
+ val = typeof filterValue === 'string' ? filterValue : String(filterValue);
72
+ }
51
73
  if (val.trim().length === 0)
52
74
  return null;
53
75
  const tsquery = sql `websearch_to_tsquery(${sql.literal(tsConfig)}, ${sql.value(val)})`;
54
76
  const columnExpr = sql `${alias}.${sql.identifier(column.attributeName)}`;
77
+ // Check for chunk-aware querying
78
+ const chunksInfo = column.adapterData;
79
+ if (chunksInfo && chunksInfo.searchField && (includeChunks !== false)) {
80
+ const chunksTableRef = chunksInfo.chunksSchema
81
+ ? sql `${sql.identifier(chunksInfo.chunksSchema)}.${sql.identifier(chunksInfo.chunksTableName)}`
82
+ : sql `${sql.identifier(chunksInfo.chunksTableName)}`;
83
+ const parentFk = sql.identifier(chunksInfo.parentFkField);
84
+ const chunkSearchField = sql.identifier(chunksInfo.searchField);
85
+ const parentId = sql `${alias}.${sql.identifier(chunksInfo.parentPkField)}`;
86
+ const chunksAlias = sql.identifier('__tsv_chunks');
87
+ // Subquery: MAX(ts_rank) across matching chunks
88
+ const chunkRankSubquery = sql `(
89
+ SELECT MAX(ts_rank(${chunksAlias}.${chunkSearchField}, ${tsquery}))
90
+ FROM ${chunksTableRef} AS ${chunksAlias}
91
+ WHERE ${chunksAlias}.${parentFk} = ${parentId}
92
+ AND ${chunksAlias}.${chunkSearchField} @@ ${tsquery}
93
+ )`;
94
+ const parentRankExpr = sql `ts_rank(${columnExpr}, ${tsquery})`;
95
+ // Combined: GREATEST of parent rank and best chunk rank
96
+ const combinedRankExpr = sql `GREATEST(
97
+ COALESCE(CASE WHEN ${columnExpr} @@ ${tsquery} THEN ${parentRankExpr} ELSE 0::real END, 0::real),
98
+ COALESCE(${chunkRankSubquery}, 0::real)
99
+ )`;
100
+ // WHERE: parent matches OR any chunk matches
101
+ const whereClause = sql `(${columnExpr} @@ ${tsquery} OR ${chunkRankSubquery} IS NOT NULL)`;
102
+ return {
103
+ whereClause,
104
+ scoreExpression: combinedRankExpr,
105
+ };
106
+ }
107
+ // Standard (non-chunk) query
55
108
  return {
56
109
  whereClause: sql `${columnExpr} @@ ${tsquery}`,
57
110
  scoreExpression: sql `ts_rank(${columnExpr}, ${tsquery})`,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphile-search",
3
- "version": "1.12.1",
3
+ "version": "1.13.0",
4
4
  "description": "Unified PostGraphile v5 search plugin — abstracts tsvector, BM25, pg_trgm, and pgvector behind a single adapter-based architecture with composite searchScore",
5
5
  "author": "Constructive <developers@constructive.io>",
6
6
  "homepage": "https://github.com/constructive-io/constructive",
@@ -62,5 +62,5 @@
62
62
  "hybrid-search",
63
63
  "searchScore"
64
64
  ],
65
- "gitHead": "62282a9e2b4a72a68c6c3c6a8729e3a0a42a54b2"
65
+ "gitHead": "1aaafe14a8ba4eeeaab099f5fdc69865ce4e2a2e"
66
66
  }