@karmaniverous/jeeves-watcher 0.8.4 → 0.9.0-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/jeeves-watcher/index.js +222 -41
- package/dist/index.d.ts +41 -0
- package/dist/index.js +177 -41
- package/package.json +6 -6
|
@@ -621,18 +621,23 @@ class ValuesManager extends JsonFileStore {
|
|
|
621
621
|
index[ruleName] ??= {};
|
|
622
622
|
const ruleValues = index[ruleName];
|
|
623
623
|
for (const [key, value] of Object.entries(metadata)) {
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
const
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
624
|
+
// Decompose arrays into individual trackable elements so that
|
|
625
|
+
// array-typed fields (e.g. domains: ["email"]) are indexed.
|
|
626
|
+
const items = Array.isArray(value) ? value : [value];
|
|
627
|
+
for (const item of items) {
|
|
628
|
+
if (!this.isTrackable(item))
|
|
629
|
+
continue;
|
|
630
|
+
ruleValues[key] ??= [];
|
|
631
|
+
const arr = ruleValues[key];
|
|
632
|
+
if (!arr.includes(item)) {
|
|
633
|
+
arr.push(item);
|
|
634
|
+
arr.sort((a, b) => {
|
|
635
|
+
if (typeof a === typeof b) {
|
|
636
|
+
return String(a).localeCompare(String(b));
|
|
637
|
+
}
|
|
638
|
+
return typeof a < typeof b ? -1 : 1;
|
|
639
|
+
});
|
|
640
|
+
}
|
|
636
641
|
}
|
|
637
642
|
}
|
|
638
643
|
this.save();
|
|
@@ -1491,6 +1496,31 @@ function validateSchemaCompleteness(schema, ruleName) {
|
|
|
1491
1496
|
}
|
|
1492
1497
|
}
|
|
1493
1498
|
}
|
|
1499
|
+
/** Types that can produce trackable facet values. */
|
|
1500
|
+
const FACETABLE_TYPES = new Set([
|
|
1501
|
+
'string',
|
|
1502
|
+
'number',
|
|
1503
|
+
'boolean',
|
|
1504
|
+
'integer',
|
|
1505
|
+
'array',
|
|
1506
|
+
]);
|
|
1507
|
+
/**
|
|
1508
|
+
* Validate that uiHint and enum are only applied to facetable property types.
|
|
1509
|
+
* Throws if a non-facetable type (e.g. object) declares uiHint or enum.
|
|
1510
|
+
*
|
|
1511
|
+
* @param schema - Resolved schema to validate.
|
|
1512
|
+
* @param ruleName - Name of the rule (for error messages).
|
|
1513
|
+
*/
|
|
1514
|
+
function validateFacetTypes(schema, ruleName) {
|
|
1515
|
+
for (const [propName, propDef] of Object.entries(schema.properties)) {
|
|
1516
|
+
if (!propDef.type)
|
|
1517
|
+
continue;
|
|
1518
|
+
if (!FACETABLE_TYPES.has(propDef.type) &&
|
|
1519
|
+
(propDef.uiHint !== undefined || propDef.enum !== undefined)) {
|
|
1520
|
+
throw new Error(`Property "${propName}" in rule "${ruleName}" has type "${propDef.type}" with uiHint/enum. Facet hints are only valid on string, number, boolean, integer, or array types.`);
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1494
1524
|
|
|
1495
1525
|
/**
|
|
1496
1526
|
* @module rules/apply
|
|
@@ -1557,6 +1587,7 @@ async function applyRules(compiledRules, attributes, options = {}) {
|
|
|
1557
1587
|
});
|
|
1558
1588
|
// Validate schema completeness
|
|
1559
1589
|
validateSchemaCompleteness(mergedSchema, rule.name);
|
|
1590
|
+
validateFacetTypes(mergedSchema, rule.name);
|
|
1560
1591
|
// Resolve and coerce metadata
|
|
1561
1592
|
const schemaOutput = resolveAndCoerce(mergedSchema, attributes, hbs);
|
|
1562
1593
|
merged = { ...merged, ...schemaOutput };
|
|
@@ -2668,6 +2699,7 @@ function validateInferenceRuleSchemas(parsed) {
|
|
|
2668
2699
|
globalSchemas: parsed.schemas,
|
|
2669
2700
|
});
|
|
2670
2701
|
validateSchemaCompleteness(merged, rule.name);
|
|
2702
|
+
validateFacetTypes(merged, rule.name);
|
|
2671
2703
|
}
|
|
2672
2704
|
catch (error) {
|
|
2673
2705
|
return [
|
|
@@ -2748,6 +2780,9 @@ function computeRulesHash(rules) {
|
|
|
2748
2780
|
* A property is facetable if it declares `uiHint` or `enum`.
|
|
2749
2781
|
*/
|
|
2750
2782
|
function isFacetable(prop) {
|
|
2783
|
+
// Reject non-primitive types that can never produce trackable values.
|
|
2784
|
+
if (prop.type === 'object')
|
|
2785
|
+
return false;
|
|
2751
2786
|
return prop.uiHint !== undefined || prop.enum !== undefined;
|
|
2752
2787
|
}
|
|
2753
2788
|
/**
|
|
@@ -3338,6 +3373,44 @@ function createRulesUnregisterParamHandler(deps) {
|
|
|
3338
3373
|
}, deps.logger, 'RulesUnregister');
|
|
3339
3374
|
}
|
|
3340
3375
|
|
|
3376
|
+
/**
|
|
3377
|
+
* @module api/handlers/scan
|
|
3378
|
+
* Fastify route handler for POST /scan. Filter-only point query without vector search.
|
|
3379
|
+
*/
|
|
3380
|
+
/**
|
|
3381
|
+
* Create handler for POST /scan.
|
|
3382
|
+
*
|
|
3383
|
+
* @param deps - Route dependencies.
|
|
3384
|
+
*/
|
|
3385
|
+
function createScanHandler(deps) {
|
|
3386
|
+
return wrapHandler(async (request, reply) => {
|
|
3387
|
+
const { filter, limit = 100, cursor, fields, countOnly } = request.body;
|
|
3388
|
+
if (!filter || typeof filter !== 'object') {
|
|
3389
|
+
deps.logger.warn('Scan rejected: missing or invalid filter');
|
|
3390
|
+
void reply
|
|
3391
|
+
.status(400)
|
|
3392
|
+
.send({ error: 'Missing required field: filter (object)' });
|
|
3393
|
+
return;
|
|
3394
|
+
}
|
|
3395
|
+
if (typeof limit !== 'number' || limit < 1 || limit > 1000) {
|
|
3396
|
+
deps.logger.warn({ limit }, 'Scan rejected: limit out of bounds');
|
|
3397
|
+
void reply
|
|
3398
|
+
.status(400)
|
|
3399
|
+
.send({ error: 'limit must be between 1 and 1000' });
|
|
3400
|
+
return;
|
|
3401
|
+
}
|
|
3402
|
+
if (countOnly) {
|
|
3403
|
+
const count = await deps.vectorStore.count(filter);
|
|
3404
|
+
return { count };
|
|
3405
|
+
}
|
|
3406
|
+
const result = await deps.vectorStore.scrollPage(filter, limit, cursor, fields);
|
|
3407
|
+
return {
|
|
3408
|
+
points: result.points,
|
|
3409
|
+
cursor: result.nextCursor ?? null,
|
|
3410
|
+
};
|
|
3411
|
+
}, deps.logger, 'Scan');
|
|
3412
|
+
}
|
|
3413
|
+
|
|
3341
3414
|
/**
|
|
3342
3415
|
* @module api/handlers/search
|
|
3343
3416
|
* Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
|
|
@@ -3519,6 +3592,10 @@ function createApiServer(options) {
|
|
|
3519
3592
|
textWeight: config.search.hybrid.textWeight,
|
|
3520
3593
|
}
|
|
3521
3594
|
: undefined;
|
|
3595
|
+
app.post('/scan', createScanHandler({
|
|
3596
|
+
vectorStore,
|
|
3597
|
+
logger,
|
|
3598
|
+
}));
|
|
3522
3599
|
app.post('/search', createSearchHandler({
|
|
3523
3600
|
embeddingProvider,
|
|
3524
3601
|
vectorStore,
|
|
@@ -4776,6 +4853,28 @@ async function getCollectionInfo(client, collectionName) {
|
|
|
4776
4853
|
return { pointCount, dimensions, payloadFields };
|
|
4777
4854
|
}
|
|
4778
4855
|
|
|
4856
|
+
/**
|
|
4857
|
+
* @module vectorStore/count
|
|
4858
|
+
* Count utility for Qdrant collection points.
|
|
4859
|
+
*/
|
|
4860
|
+
/**
|
|
4861
|
+
* Count points in a Qdrant collection matching an optional filter.
|
|
4862
|
+
*
|
|
4863
|
+
* Uses exact counting for accurate results.
|
|
4864
|
+
*
|
|
4865
|
+
* @param client - The Qdrant client instance.
|
|
4866
|
+
* @param collectionName - The collection to count.
|
|
4867
|
+
* @param filter - Optional Qdrant filter.
|
|
4868
|
+
* @returns The number of matching points.
|
|
4869
|
+
*/
|
|
4870
|
+
async function countPoints(client, collectionName, filter) {
|
|
4871
|
+
const result = await client.count(collectionName, {
|
|
4872
|
+
...(filter ? { filter } : {}),
|
|
4873
|
+
exact: true,
|
|
4874
|
+
});
|
|
4875
|
+
return result.count;
|
|
4876
|
+
}
|
|
4877
|
+
|
|
4779
4878
|
/**
|
|
4780
4879
|
* @module vectorStore/hybridSearch
|
|
4781
4880
|
* Hybrid search and text index helpers for Qdrant vector store.
|
|
@@ -4875,11 +4974,43 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
|
|
|
4875
4974
|
|
|
4876
4975
|
/**
|
|
4877
4976
|
* @module vectorStore/scroll
|
|
4878
|
-
*
|
|
4977
|
+
* Scroll utilities for paginating through Qdrant collection points.
|
|
4978
|
+
*/
|
|
4979
|
+
/**
|
|
4980
|
+
* Scroll one page of points matching a filter.
|
|
4981
|
+
*
|
|
4982
|
+
* @param client - The Qdrant client instance.
|
|
4983
|
+
* @param collectionName - The collection to scroll.
|
|
4984
|
+
* @param filter - Optional Qdrant filter.
|
|
4985
|
+
* @param limit - Page size.
|
|
4986
|
+
* @param offset - Cursor offset from previous page.
|
|
4987
|
+
* @param fields - Optional payload field projection (array of field names).
|
|
4988
|
+
* @returns Page of points and next cursor.
|
|
4879
4989
|
*/
|
|
4990
|
+
async function scrollPage(client, collectionName, filter, limit = 100, offset, fields) {
|
|
4991
|
+
const result = await client.scroll(collectionName, {
|
|
4992
|
+
limit,
|
|
4993
|
+
with_payload: fields ? fields : true,
|
|
4994
|
+
with_vector: false,
|
|
4995
|
+
...(filter ? { filter } : {}),
|
|
4996
|
+
...(offset !== undefined ? { offset } : {}),
|
|
4997
|
+
});
|
|
4998
|
+
return {
|
|
4999
|
+
points: result.points.map((p) => ({
|
|
5000
|
+
id: String(p.id),
|
|
5001
|
+
payload: p.payload,
|
|
5002
|
+
})),
|
|
5003
|
+
nextCursor: typeof result.next_page_offset === 'string' ||
|
|
5004
|
+
typeof result.next_page_offset === 'number'
|
|
5005
|
+
? result.next_page_offset
|
|
5006
|
+
: undefined,
|
|
5007
|
+
};
|
|
5008
|
+
}
|
|
4880
5009
|
/**
|
|
4881
5010
|
* Scroll through all points in a Qdrant collection matching a filter.
|
|
4882
5011
|
*
|
|
5012
|
+
* Iterates over pages using {@link scrollPage}.
|
|
5013
|
+
*
|
|
4883
5014
|
* @param client - The Qdrant client instance.
|
|
4884
5015
|
* @param collectionName - The collection to scroll.
|
|
4885
5016
|
* @param filter - Optional Qdrant filter.
|
|
@@ -4887,32 +5018,14 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
|
|
|
4887
5018
|
* @yields Scrolled points.
|
|
4888
5019
|
*/
|
|
4889
5020
|
async function* scrollCollection(client, collectionName, filter, limit = 100) {
|
|
4890
|
-
let
|
|
4891
|
-
|
|
4892
|
-
const
|
|
4893
|
-
|
|
4894
|
-
|
|
4895
|
-
with_vector: false,
|
|
4896
|
-
...(filter ? { filter } : {}),
|
|
4897
|
-
...(offset !== undefined ? { offset } : {}),
|
|
4898
|
-
});
|
|
4899
|
-
for (const point of result.points) {
|
|
4900
|
-
yield {
|
|
4901
|
-
id: String(point.id),
|
|
4902
|
-
payload: point.payload,
|
|
4903
|
-
};
|
|
4904
|
-
}
|
|
4905
|
-
const nextOffset = result.next_page_offset;
|
|
4906
|
-
if (nextOffset === null || nextOffset === undefined) {
|
|
4907
|
-
break;
|
|
4908
|
-
}
|
|
4909
|
-
if (typeof nextOffset === 'string' || typeof nextOffset === 'number') {
|
|
4910
|
-
offset = nextOffset;
|
|
5021
|
+
let cursor;
|
|
5022
|
+
do {
|
|
5023
|
+
const page = await scrollPage(client, collectionName, filter, limit, cursor);
|
|
5024
|
+
for (const point of page.points) {
|
|
5025
|
+
yield point;
|
|
4911
5026
|
}
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
}
|
|
4915
|
-
}
|
|
5027
|
+
cursor = page.nextCursor;
|
|
5028
|
+
} while (cursor !== undefined);
|
|
4916
5029
|
}
|
|
4917
5030
|
|
|
4918
5031
|
/**
|
|
@@ -4956,6 +5069,15 @@ class VectorStoreClient {
|
|
|
4956
5069
|
checkCompatibility: false,
|
|
4957
5070
|
});
|
|
4958
5071
|
}
|
|
5072
|
+
/**
|
|
5073
|
+
* Count points matching a filter.
|
|
5074
|
+
*
|
|
5075
|
+
* @param filter - Optional Qdrant filter.
|
|
5076
|
+
* @returns The number of matching points.
|
|
5077
|
+
*/
|
|
5078
|
+
async count(filter) {
|
|
5079
|
+
return countPoints(this.client, this.collectionName, filter);
|
|
5080
|
+
}
|
|
4959
5081
|
/**
|
|
4960
5082
|
* Ensure the collection exists with correct dimensions and Cosine distance.
|
|
4961
5083
|
*/
|
|
@@ -5136,6 +5258,18 @@ class VectorStoreClient {
|
|
|
5136
5258
|
async hybridSearch(vector, queryText, limit, textWeight, filter) {
|
|
5137
5259
|
return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
|
|
5138
5260
|
}
|
|
5261
|
+
/**
|
|
5262
|
+
* Scroll one page of points matching a filter.
|
|
5263
|
+
*
|
|
5264
|
+
* @param filter - Optional Qdrant filter.
|
|
5265
|
+
* @param limit - Page size.
|
|
5266
|
+
* @param offset - Cursor offset from previous page.
|
|
5267
|
+
* @param fields - Optional field projection.
|
|
5268
|
+
* @returns Page of points and next cursor.
|
|
5269
|
+
*/
|
|
5270
|
+
async scrollPage(filter, limit = 100, offset, fields) {
|
|
5271
|
+
return scrollPage(this.client, this.collectionName, filter, limit, offset, fields);
|
|
5272
|
+
}
|
|
5139
5273
|
/**
|
|
5140
5274
|
* Scroll through all points matching a filter.
|
|
5141
5275
|
*
|
|
@@ -5987,6 +6121,9 @@ class JeevesWatcher {
|
|
|
5987
6121
|
const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
|
|
5988
6122
|
this.helperIntrospection = await introspectHelpers(this.config, configDir);
|
|
5989
6123
|
const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
|
|
6124
|
+
const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
|
|
6125
|
+
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
6126
|
+
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
5990
6127
|
const processor = this.factories.createDocumentProcessor({
|
|
5991
6128
|
config: processorConfig,
|
|
5992
6129
|
embeddingProvider,
|
|
@@ -5994,6 +6131,8 @@ class JeevesWatcher {
|
|
|
5994
6131
|
compiledRules,
|
|
5995
6132
|
logger,
|
|
5996
6133
|
templateEngine,
|
|
6134
|
+
issuesManager: this.issuesManager,
|
|
6135
|
+
valuesManager: this.valuesManager,
|
|
5997
6136
|
});
|
|
5998
6137
|
this.processor = processor;
|
|
5999
6138
|
this.queue = this.factories.createEventQueue({
|
|
@@ -6002,9 +6141,6 @@ class JeevesWatcher {
|
|
|
6002
6141
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
6003
6142
|
});
|
|
6004
6143
|
this.watcher = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions);
|
|
6005
|
-
const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
|
|
6006
|
-
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
6007
|
-
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
6008
6144
|
this.server = await this.startApiServer();
|
|
6009
6145
|
this.watcher.start();
|
|
6010
6146
|
this.startConfigWatch();
|
|
@@ -6482,6 +6618,50 @@ function registerReindexCommand(cli) {
|
|
|
6482
6618
|
});
|
|
6483
6619
|
}
|
|
6484
6620
|
|
|
6621
|
+
/**
|
|
6622
|
+
* @module commands/scan
|
|
6623
|
+
*
|
|
6624
|
+
* CLI command: scan.
|
|
6625
|
+
*/
|
|
6626
|
+
function registerScanCommand(cli) {
|
|
6627
|
+
const command = cli
|
|
6628
|
+
.command('scan')
|
|
6629
|
+
.description('Scan the vector store (POST /scan)')
|
|
6630
|
+
.option('-f, --filter <filter>', 'Qdrant filter (JSON string)', '{}')
|
|
6631
|
+
.option('-l, --limit <limit>', 'Max results', '100')
|
|
6632
|
+
.option('-c, --cursor <cursor>', 'Cursor from previous response')
|
|
6633
|
+
.option('--fields <fields>', 'Fields to return (comma-separated)')
|
|
6634
|
+
.option('--count-only', 'Return count only');
|
|
6635
|
+
withApiOptions(command).action(async (options) => {
|
|
6636
|
+
let filterObj = {};
|
|
6637
|
+
try {
|
|
6638
|
+
if (options.filter) {
|
|
6639
|
+
filterObj = JSON.parse(options.filter);
|
|
6640
|
+
}
|
|
6641
|
+
}
|
|
6642
|
+
catch (error) {
|
|
6643
|
+
console.error('Invalid filter JSON:', error);
|
|
6644
|
+
process.exit(1);
|
|
6645
|
+
}
|
|
6646
|
+
const fieldsArray = options.fields
|
|
6647
|
+
? options.fields.split(',').map((f) => f.trim())
|
|
6648
|
+
: undefined;
|
|
6649
|
+
await runApiCommand({
|
|
6650
|
+
host: options.host,
|
|
6651
|
+
port: options.port,
|
|
6652
|
+
method: 'POST',
|
|
6653
|
+
path: '/scan',
|
|
6654
|
+
body: {
|
|
6655
|
+
filter: filterObj,
|
|
6656
|
+
limit: Number(options.limit),
|
|
6657
|
+
cursor: options.cursor,
|
|
6658
|
+
fields: fieldsArray,
|
|
6659
|
+
countOnly: options.countOnly,
|
|
6660
|
+
},
|
|
6661
|
+
});
|
|
6662
|
+
});
|
|
6663
|
+
}
|
|
6664
|
+
|
|
6485
6665
|
/**
|
|
6486
6666
|
* @module commands/search
|
|
6487
6667
|
*
|
|
@@ -6647,6 +6827,7 @@ registerStatusCommand(cli);
|
|
|
6647
6827
|
registerReindexCommand(cli);
|
|
6648
6828
|
registerRebuildMetadataCommand(cli);
|
|
6649
6829
|
registerSearchCommand(cli);
|
|
6830
|
+
registerScanCommand(cli);
|
|
6650
6831
|
registerEnrichCommand(cli);
|
|
6651
6832
|
registerConfigReindexCommand(cli);
|
|
6652
6833
|
registerServiceCommand(cli);
|
package/dist/index.d.ts
CHANGED
|
@@ -676,6 +676,13 @@ interface ScrolledPoint {
|
|
|
676
676
|
/** The payload metadata. */
|
|
677
677
|
payload: Record<string, unknown>;
|
|
678
678
|
}
|
|
679
|
+
/** Result of a single scroll page. */
|
|
680
|
+
interface ScrollPageResult {
|
|
681
|
+
/** Matched points. */
|
|
682
|
+
points: ScrolledPoint[];
|
|
683
|
+
/** Cursor for next page, or `undefined` when no more pages. */
|
|
684
|
+
nextCursor?: string | number;
|
|
685
|
+
}
|
|
679
686
|
/** Payload field schema information as reported by Qdrant. */
|
|
680
687
|
interface PayloadFieldSchema {
|
|
681
688
|
/** Qdrant data type for the field (e.g. `keyword`, `text`, `integer`). */
|
|
@@ -702,6 +709,13 @@ interface VectorStore {
|
|
|
702
709
|
* Ensure the collection exists with correct configuration.
|
|
703
710
|
*/
|
|
704
711
|
ensureCollection(): Promise<void>;
|
|
712
|
+
/**
|
|
713
|
+
* Count points matching a filter.
|
|
714
|
+
*
|
|
715
|
+
* @param filter - Optional Qdrant filter.
|
|
716
|
+
* @returns The number of matching points.
|
|
717
|
+
*/
|
|
718
|
+
count(filter?: Record<string, unknown>): Promise<number>;
|
|
705
719
|
/**
|
|
706
720
|
* Upsert points into the collection.
|
|
707
721
|
*
|
|
@@ -742,6 +756,16 @@ interface VectorStore {
|
|
|
742
756
|
* @returns An array of search results.
|
|
743
757
|
*/
|
|
744
758
|
search(vector: number[], limit: number, filter?: Record<string, unknown>, offset?: number): Promise<SearchResult[]>;
|
|
759
|
+
/**
|
|
760
|
+
* Scroll one page of points matching a filter.
|
|
761
|
+
*
|
|
762
|
+
* @param filter - Optional Qdrant filter.
|
|
763
|
+
* @param limit - Page size.
|
|
764
|
+
* @param offset - Cursor offset from previous page.
|
|
765
|
+
* @param fields - Optional field projection.
|
|
766
|
+
* @returns Page of points and next cursor.
|
|
767
|
+
*/
|
|
768
|
+
scrollPage(filter?: Record<string, unknown>, limit?: number, offset?: string | number, fields?: string[]): Promise<ScrollPageResult>;
|
|
745
769
|
/**
|
|
746
770
|
* Scroll through all points matching a filter.
|
|
747
771
|
*
|
|
@@ -798,6 +822,13 @@ declare class VectorStoreClient implements VectorStore {
|
|
|
798
822
|
* Creating a fresh client for write operations ensures clean TCP connections.
|
|
799
823
|
*/
|
|
800
824
|
private createClient;
|
|
825
|
+
/**
|
|
826
|
+
* Count points matching a filter.
|
|
827
|
+
*
|
|
828
|
+
* @param filter - Optional Qdrant filter.
|
|
829
|
+
* @returns The number of matching points.
|
|
830
|
+
*/
|
|
831
|
+
count(filter?: Record<string, unknown>): Promise<number>;
|
|
801
832
|
/**
|
|
802
833
|
* Ensure the collection exists with correct dimensions and Cosine distance.
|
|
803
834
|
*/
|
|
@@ -877,6 +908,16 @@ declare class VectorStoreClient implements VectorStore {
|
|
|
877
908
|
* @returns An array of search results.
|
|
878
909
|
*/
|
|
879
910
|
hybridSearch(vector: number[], queryText: string, limit: number, textWeight: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
|
|
911
|
+
/**
|
|
912
|
+
* Scroll one page of points matching a filter.
|
|
913
|
+
*
|
|
914
|
+
* @param filter - Optional Qdrant filter.
|
|
915
|
+
* @param limit - Page size.
|
|
916
|
+
* @param offset - Cursor offset from previous page.
|
|
917
|
+
* @param fields - Optional field projection.
|
|
918
|
+
* @returns Page of points and next cursor.
|
|
919
|
+
*/
|
|
920
|
+
scrollPage(filter?: Record<string, unknown>, limit?: number, offset?: string | number, fields?: string[]): Promise<ScrollPageResult>;
|
|
880
921
|
/**
|
|
881
922
|
* Scroll through all points matching a filter.
|
|
882
923
|
*
|
package/dist/index.js
CHANGED
|
@@ -846,6 +846,31 @@ function validateSchemaCompleteness(schema, ruleName) {
|
|
|
846
846
|
}
|
|
847
847
|
}
|
|
848
848
|
}
|
|
849
|
+
/** Types that can produce trackable facet values. */
|
|
850
|
+
const FACETABLE_TYPES = new Set([
|
|
851
|
+
'string',
|
|
852
|
+
'number',
|
|
853
|
+
'boolean',
|
|
854
|
+
'integer',
|
|
855
|
+
'array',
|
|
856
|
+
]);
|
|
857
|
+
/**
|
|
858
|
+
* Validate that uiHint and enum are only applied to facetable property types.
|
|
859
|
+
* Throws if a non-facetable type (e.g. object) declares uiHint or enum.
|
|
860
|
+
*
|
|
861
|
+
* @param schema - Resolved schema to validate.
|
|
862
|
+
* @param ruleName - Name of the rule (for error messages).
|
|
863
|
+
*/
|
|
864
|
+
function validateFacetTypes(schema, ruleName) {
|
|
865
|
+
for (const [propName, propDef] of Object.entries(schema.properties)) {
|
|
866
|
+
if (!propDef.type)
|
|
867
|
+
continue;
|
|
868
|
+
if (!FACETABLE_TYPES.has(propDef.type) &&
|
|
869
|
+
(propDef.uiHint !== undefined || propDef.enum !== undefined)) {
|
|
870
|
+
throw new Error(`Property "${propName}" in rule "${ruleName}" has type "${propDef.type}" with uiHint/enum. Facet hints are only valid on string, number, boolean, integer, or array types.`);
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
}
|
|
849
874
|
|
|
850
875
|
/**
|
|
851
876
|
* @module rules/apply
|
|
@@ -912,6 +937,7 @@ async function applyRules(compiledRules, attributes, options = {}) {
|
|
|
912
937
|
});
|
|
913
938
|
// Validate schema completeness
|
|
914
939
|
validateSchemaCompleteness(mergedSchema, rule.name);
|
|
940
|
+
validateFacetTypes(mergedSchema, rule.name);
|
|
915
941
|
// Resolve and coerce metadata
|
|
916
942
|
const schemaOutput = resolveAndCoerce(mergedSchema, attributes, hbs);
|
|
917
943
|
merged = { ...merged, ...schemaOutput };
|
|
@@ -2359,6 +2385,7 @@ function validateInferenceRuleSchemas(parsed) {
|
|
|
2359
2385
|
globalSchemas: parsed.schemas,
|
|
2360
2386
|
});
|
|
2361
2387
|
validateSchemaCompleteness(merged, rule.name);
|
|
2388
|
+
validateFacetTypes(merged, rule.name);
|
|
2362
2389
|
}
|
|
2363
2390
|
catch (error) {
|
|
2364
2391
|
return [
|
|
@@ -2439,6 +2466,9 @@ function computeRulesHash(rules) {
|
|
|
2439
2466
|
* A property is facetable if it declares `uiHint` or `enum`.
|
|
2440
2467
|
*/
|
|
2441
2468
|
function isFacetable(prop) {
|
|
2469
|
+
// Reject non-primitive types that can never produce trackable values.
|
|
2470
|
+
if (prop.type === 'object')
|
|
2471
|
+
return false;
|
|
2442
2472
|
return prop.uiHint !== undefined || prop.enum !== undefined;
|
|
2443
2473
|
}
|
|
2444
2474
|
/**
|
|
@@ -3029,6 +3059,44 @@ function createRulesUnregisterParamHandler(deps) {
|
|
|
3029
3059
|
}, deps.logger, 'RulesUnregister');
|
|
3030
3060
|
}
|
|
3031
3061
|
|
|
3062
|
+
/**
|
|
3063
|
+
* @module api/handlers/scan
|
|
3064
|
+
* Fastify route handler for POST /scan. Filter-only point query without vector search.
|
|
3065
|
+
*/
|
|
3066
|
+
/**
|
|
3067
|
+
* Create handler for POST /scan.
|
|
3068
|
+
*
|
|
3069
|
+
* @param deps - Route dependencies.
|
|
3070
|
+
*/
|
|
3071
|
+
function createScanHandler(deps) {
|
|
3072
|
+
return wrapHandler(async (request, reply) => {
|
|
3073
|
+
const { filter, limit = 100, cursor, fields, countOnly } = request.body;
|
|
3074
|
+
if (!filter || typeof filter !== 'object') {
|
|
3075
|
+
deps.logger.warn('Scan rejected: missing or invalid filter');
|
|
3076
|
+
void reply
|
|
3077
|
+
.status(400)
|
|
3078
|
+
.send({ error: 'Missing required field: filter (object)' });
|
|
3079
|
+
return;
|
|
3080
|
+
}
|
|
3081
|
+
if (typeof limit !== 'number' || limit < 1 || limit > 1000) {
|
|
3082
|
+
deps.logger.warn({ limit }, 'Scan rejected: limit out of bounds');
|
|
3083
|
+
void reply
|
|
3084
|
+
.status(400)
|
|
3085
|
+
.send({ error: 'limit must be between 1 and 1000' });
|
|
3086
|
+
return;
|
|
3087
|
+
}
|
|
3088
|
+
if (countOnly) {
|
|
3089
|
+
const count = await deps.vectorStore.count(filter);
|
|
3090
|
+
return { count };
|
|
3091
|
+
}
|
|
3092
|
+
const result = await deps.vectorStore.scrollPage(filter, limit, cursor, fields);
|
|
3093
|
+
return {
|
|
3094
|
+
points: result.points,
|
|
3095
|
+
cursor: result.nextCursor ?? null,
|
|
3096
|
+
};
|
|
3097
|
+
}, deps.logger, 'Scan');
|
|
3098
|
+
}
|
|
3099
|
+
|
|
3032
3100
|
/**
|
|
3033
3101
|
* @module api/handlers/search
|
|
3034
3102
|
* Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
|
|
@@ -3210,6 +3278,10 @@ function createApiServer(options) {
|
|
|
3210
3278
|
textWeight: config.search.hybrid.textWeight,
|
|
3211
3279
|
}
|
|
3212
3280
|
: undefined;
|
|
3281
|
+
app.post('/scan', createScanHandler({
|
|
3282
|
+
vectorStore,
|
|
3283
|
+
logger,
|
|
3284
|
+
}));
|
|
3213
3285
|
app.post('/search', createSearchHandler({
|
|
3214
3286
|
embeddingProvider,
|
|
3215
3287
|
vectorStore,
|
|
@@ -3509,18 +3581,23 @@ class ValuesManager extends JsonFileStore {
|
|
|
3509
3581
|
index[ruleName] ??= {};
|
|
3510
3582
|
const ruleValues = index[ruleName];
|
|
3511
3583
|
for (const [key, value] of Object.entries(metadata)) {
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
const
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3522
|
-
|
|
3523
|
-
|
|
3584
|
+
// Decompose arrays into individual trackable elements so that
|
|
3585
|
+
// array-typed fields (e.g. domains: ["email"]) are indexed.
|
|
3586
|
+
const items = Array.isArray(value) ? value : [value];
|
|
3587
|
+
for (const item of items) {
|
|
3588
|
+
if (!this.isTrackable(item))
|
|
3589
|
+
continue;
|
|
3590
|
+
ruleValues[key] ??= [];
|
|
3591
|
+
const arr = ruleValues[key];
|
|
3592
|
+
if (!arr.includes(item)) {
|
|
3593
|
+
arr.push(item);
|
|
3594
|
+
arr.sort((a, b) => {
|
|
3595
|
+
if (typeof a === typeof b) {
|
|
3596
|
+
return String(a).localeCompare(String(b));
|
|
3597
|
+
}
|
|
3598
|
+
return typeof a < typeof b ? -1 : 1;
|
|
3599
|
+
});
|
|
3600
|
+
}
|
|
3524
3601
|
}
|
|
3525
3602
|
}
|
|
3526
3603
|
this.save();
|
|
@@ -4754,6 +4831,28 @@ async function getCollectionInfo(client, collectionName) {
|
|
|
4754
4831
|
return { pointCount, dimensions, payloadFields };
|
|
4755
4832
|
}
|
|
4756
4833
|
|
|
4834
|
+
/**
|
|
4835
|
+
* @module vectorStore/count
|
|
4836
|
+
* Count utility for Qdrant collection points.
|
|
4837
|
+
*/
|
|
4838
|
+
/**
|
|
4839
|
+
* Count points in a Qdrant collection matching an optional filter.
|
|
4840
|
+
*
|
|
4841
|
+
* Uses exact counting for accurate results.
|
|
4842
|
+
*
|
|
4843
|
+
* @param client - The Qdrant client instance.
|
|
4844
|
+
* @param collectionName - The collection to count.
|
|
4845
|
+
* @param filter - Optional Qdrant filter.
|
|
4846
|
+
* @returns The number of matching points.
|
|
4847
|
+
*/
|
|
4848
|
+
async function countPoints(client, collectionName, filter) {
|
|
4849
|
+
const result = await client.count(collectionName, {
|
|
4850
|
+
...(filter ? { filter } : {}),
|
|
4851
|
+
exact: true,
|
|
4852
|
+
});
|
|
4853
|
+
return result.count;
|
|
4854
|
+
}
|
|
4855
|
+
|
|
4757
4856
|
/**
|
|
4758
4857
|
* @module vectorStore/hybridSearch
|
|
4759
4858
|
* Hybrid search and text index helpers for Qdrant vector store.
|
|
@@ -4853,11 +4952,43 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
|
|
|
4853
4952
|
|
|
4854
4953
|
/**
|
|
4855
4954
|
* @module vectorStore/scroll
|
|
4856
|
-
*
|
|
4955
|
+
* Scroll utilities for paginating through Qdrant collection points.
|
|
4956
|
+
*/
|
|
4957
|
+
/**
|
|
4958
|
+
* Scroll one page of points matching a filter.
|
|
4959
|
+
*
|
|
4960
|
+
* @param client - The Qdrant client instance.
|
|
4961
|
+
* @param collectionName - The collection to scroll.
|
|
4962
|
+
* @param filter - Optional Qdrant filter.
|
|
4963
|
+
* @param limit - Page size.
|
|
4964
|
+
* @param offset - Cursor offset from previous page.
|
|
4965
|
+
* @param fields - Optional payload field projection (array of field names).
|
|
4966
|
+
* @returns Page of points and next cursor.
|
|
4857
4967
|
*/
|
|
4968
|
+
async function scrollPage(client, collectionName, filter, limit = 100, offset, fields) {
|
|
4969
|
+
const result = await client.scroll(collectionName, {
|
|
4970
|
+
limit,
|
|
4971
|
+
with_payload: fields ? fields : true,
|
|
4972
|
+
with_vector: false,
|
|
4973
|
+
...(filter ? { filter } : {}),
|
|
4974
|
+
...(offset !== undefined ? { offset } : {}),
|
|
4975
|
+
});
|
|
4976
|
+
return {
|
|
4977
|
+
points: result.points.map((p) => ({
|
|
4978
|
+
id: String(p.id),
|
|
4979
|
+
payload: p.payload,
|
|
4980
|
+
})),
|
|
4981
|
+
nextCursor: typeof result.next_page_offset === 'string' ||
|
|
4982
|
+
typeof result.next_page_offset === 'number'
|
|
4983
|
+
? result.next_page_offset
|
|
4984
|
+
: undefined,
|
|
4985
|
+
};
|
|
4986
|
+
}
|
|
4858
4987
|
/**
|
|
4859
4988
|
* Scroll through all points in a Qdrant collection matching a filter.
|
|
4860
4989
|
*
|
|
4990
|
+
* Iterates over pages using {@link scrollPage}.
|
|
4991
|
+
*
|
|
4861
4992
|
* @param client - The Qdrant client instance.
|
|
4862
4993
|
* @param collectionName - The collection to scroll.
|
|
4863
4994
|
* @param filter - Optional Qdrant filter.
|
|
@@ -4865,32 +4996,14 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
|
|
|
4865
4996
|
* @yields Scrolled points.
|
|
4866
4997
|
*/
|
|
4867
4998
|
async function* scrollCollection(client, collectionName, filter, limit = 100) {
|
|
4868
|
-
let
|
|
4869
|
-
|
|
4870
|
-
const
|
|
4871
|
-
|
|
4872
|
-
|
|
4873
|
-
with_vector: false,
|
|
4874
|
-
...(filter ? { filter } : {}),
|
|
4875
|
-
...(offset !== undefined ? { offset } : {}),
|
|
4876
|
-
});
|
|
4877
|
-
for (const point of result.points) {
|
|
4878
|
-
yield {
|
|
4879
|
-
id: String(point.id),
|
|
4880
|
-
payload: point.payload,
|
|
4881
|
-
};
|
|
4882
|
-
}
|
|
4883
|
-
const nextOffset = result.next_page_offset;
|
|
4884
|
-
if (nextOffset === null || nextOffset === undefined) {
|
|
4885
|
-
break;
|
|
4999
|
+
let cursor;
|
|
5000
|
+
do {
|
|
5001
|
+
const page = await scrollPage(client, collectionName, filter, limit, cursor);
|
|
5002
|
+
for (const point of page.points) {
|
|
5003
|
+
yield point;
|
|
4886
5004
|
}
|
|
4887
|
-
|
|
4888
|
-
|
|
4889
|
-
}
|
|
4890
|
-
else {
|
|
4891
|
-
break;
|
|
4892
|
-
}
|
|
4893
|
-
}
|
|
5005
|
+
cursor = page.nextCursor;
|
|
5006
|
+
} while (cursor !== undefined);
|
|
4894
5007
|
}
|
|
4895
5008
|
|
|
4896
5009
|
/**
|
|
@@ -4934,6 +5047,15 @@ class VectorStoreClient {
|
|
|
4934
5047
|
checkCompatibility: false,
|
|
4935
5048
|
});
|
|
4936
5049
|
}
|
|
5050
|
+
/**
|
|
5051
|
+
* Count points matching a filter.
|
|
5052
|
+
*
|
|
5053
|
+
* @param filter - Optional Qdrant filter.
|
|
5054
|
+
* @returns The number of matching points.
|
|
5055
|
+
*/
|
|
5056
|
+
async count(filter) {
|
|
5057
|
+
return countPoints(this.client, this.collectionName, filter);
|
|
5058
|
+
}
|
|
4937
5059
|
/**
|
|
4938
5060
|
* Ensure the collection exists with correct dimensions and Cosine distance.
|
|
4939
5061
|
*/
|
|
@@ -5114,6 +5236,18 @@ class VectorStoreClient {
|
|
|
5114
5236
|
async hybridSearch(vector, queryText, limit, textWeight, filter) {
|
|
5115
5237
|
return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
|
|
5116
5238
|
}
|
|
5239
|
+
/**
|
|
5240
|
+
* Scroll one page of points matching a filter.
|
|
5241
|
+
*
|
|
5242
|
+
* @param filter - Optional Qdrant filter.
|
|
5243
|
+
* @param limit - Page size.
|
|
5244
|
+
* @param offset - Cursor offset from previous page.
|
|
5245
|
+
* @param fields - Optional field projection.
|
|
5246
|
+
* @returns Page of points and next cursor.
|
|
5247
|
+
*/
|
|
5248
|
+
async scrollPage(filter, limit = 100, offset, fields) {
|
|
5249
|
+
return scrollPage(this.client, this.collectionName, filter, limit, offset, fields);
|
|
5250
|
+
}
|
|
5117
5251
|
/**
|
|
5118
5252
|
* Scroll through all points matching a filter.
|
|
5119
5253
|
*
|
|
@@ -5965,6 +6099,9 @@ class JeevesWatcher {
|
|
|
5965
6099
|
const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
|
|
5966
6100
|
this.helperIntrospection = await introspectHelpers(this.config, configDir);
|
|
5967
6101
|
const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
|
|
6102
|
+
const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
|
|
6103
|
+
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
6104
|
+
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
5968
6105
|
const processor = this.factories.createDocumentProcessor({
|
|
5969
6106
|
config: processorConfig,
|
|
5970
6107
|
embeddingProvider,
|
|
@@ -5972,6 +6109,8 @@ class JeevesWatcher {
|
|
|
5972
6109
|
compiledRules,
|
|
5973
6110
|
logger,
|
|
5974
6111
|
templateEngine,
|
|
6112
|
+
issuesManager: this.issuesManager,
|
|
6113
|
+
valuesManager: this.valuesManager,
|
|
5975
6114
|
});
|
|
5976
6115
|
this.processor = processor;
|
|
5977
6116
|
this.queue = this.factories.createEventQueue({
|
|
@@ -5980,9 +6119,6 @@ class JeevesWatcher {
|
|
|
5980
6119
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
5981
6120
|
});
|
|
5982
6121
|
this.watcher = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions);
|
|
5983
|
-
const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
|
|
5984
|
-
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
5985
|
-
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
5986
6122
|
this.server = await this.startApiServer();
|
|
5987
6123
|
this.watcher.start();
|
|
5988
6124
|
this.startConfigWatch();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@karmaniverous/jeeves-watcher",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0-0",
|
|
4
4
|
"author": "Jason Williscroft",
|
|
5
5
|
"description": "Filesystem watcher that keeps a Qdrant vector store in sync with document changes",
|
|
6
6
|
"license": "BSD-3-Clause",
|
|
@@ -84,22 +84,22 @@
|
|
|
84
84
|
"zod": "^4.3.6"
|
|
85
85
|
},
|
|
86
86
|
"devDependencies": {
|
|
87
|
-
"@dotenvx/dotenvx": "^1.
|
|
87
|
+
"@dotenvx/dotenvx": "^1.54.1",
|
|
88
88
|
"@rollup/plugin-alias": "^6.0.0",
|
|
89
|
-
"@rollup/plugin-commonjs": "^29.0.
|
|
89
|
+
"@rollup/plugin-commonjs": "^29.0.2",
|
|
90
90
|
"@rollup/plugin-json": "^6.1.0",
|
|
91
91
|
"@rollup/plugin-node-resolve": "^16.0.3",
|
|
92
92
|
"@rollup/plugin-typescript": "^12.3.0",
|
|
93
93
|
"@types/fs-extra": "^11.0.4",
|
|
94
94
|
"@types/js-yaml": "*",
|
|
95
|
-
"@types/node": "^25.3.
|
|
95
|
+
"@types/node": "^25.3.5",
|
|
96
96
|
"@types/picomatch": "^4.0.2",
|
|
97
97
|
"@types/uuid": "*",
|
|
98
98
|
"@vitest/coverage-v8": "^4.0.18",
|
|
99
99
|
"auto-changelog": "^2.5.0",
|
|
100
100
|
"cross-env": "^10.1.0",
|
|
101
|
-
"fs-extra": "^11.3.
|
|
102
|
-
"happy-dom": "^20.
|
|
101
|
+
"fs-extra": "^11.3.4",
|
|
102
|
+
"happy-dom": "^20.8.3",
|
|
103
103
|
"knip": "^5.85.0",
|
|
104
104
|
"release-it": "^19.2.4",
|
|
105
105
|
"rollup": "^4.59.0",
|