@karmaniverous/jeeves-watcher 0.8.4 → 0.9.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -621,18 +621,23 @@ class ValuesManager extends JsonFileStore {
621
621
  index[ruleName] ??= {};
622
622
  const ruleValues = index[ruleName];
623
623
  for (const [key, value] of Object.entries(metadata)) {
624
- if (!this.isTrackable(value))
625
- continue;
626
- ruleValues[key] ??= [];
627
- const arr = ruleValues[key];
628
- if (!arr.includes(value)) {
629
- arr.push(value);
630
- arr.sort((a, b) => {
631
- if (typeof a === typeof b) {
632
- return String(a).localeCompare(String(b));
633
- }
634
- return typeof a < typeof b ? -1 : 1;
635
- });
624
+ // Decompose arrays into individual trackable elements so that
625
+ // array-typed fields (e.g. domains: ["email"]) are indexed.
626
+ const items = Array.isArray(value) ? value : [value];
627
+ for (const item of items) {
628
+ if (!this.isTrackable(item))
629
+ continue;
630
+ ruleValues[key] ??= [];
631
+ const arr = ruleValues[key];
632
+ if (!arr.includes(item)) {
633
+ arr.push(item);
634
+ arr.sort((a, b) => {
635
+ if (typeof a === typeof b) {
636
+ return String(a).localeCompare(String(b));
637
+ }
638
+ return typeof a < typeof b ? -1 : 1;
639
+ });
640
+ }
636
641
  }
637
642
  }
638
643
  this.save();
@@ -1491,6 +1496,31 @@ function validateSchemaCompleteness(schema, ruleName) {
1491
1496
  }
1492
1497
  }
1493
1498
  }
1499
+ /** Types that can produce trackable facet values. */
1500
+ const FACETABLE_TYPES = new Set([
1501
+ 'string',
1502
+ 'number',
1503
+ 'boolean',
1504
+ 'integer',
1505
+ 'array',
1506
+ ]);
1507
+ /**
1508
+ * Validate that uiHint and enum are only applied to facetable property types.
1509
+ * Throws if a non-facetable type (e.g. object) declares uiHint or enum.
1510
+ *
1511
+ * @param schema - Resolved schema to validate.
1512
+ * @param ruleName - Name of the rule (for error messages).
1513
+ */
1514
+ function validateFacetTypes(schema, ruleName) {
1515
+ for (const [propName, propDef] of Object.entries(schema.properties)) {
1516
+ if (!propDef.type)
1517
+ continue;
1518
+ if (!FACETABLE_TYPES.has(propDef.type) &&
1519
+ (propDef.uiHint !== undefined || propDef.enum !== undefined)) {
1520
+ throw new Error(`Property "${propName}" in rule "${ruleName}" has type "${propDef.type}" with uiHint/enum. Facet hints are only valid on string, number, boolean, integer, or array types.`);
1521
+ }
1522
+ }
1523
+ }
1494
1524
 
1495
1525
  /**
1496
1526
  * @module rules/apply
@@ -1557,6 +1587,7 @@ async function applyRules(compiledRules, attributes, options = {}) {
1557
1587
  });
1558
1588
  // Validate schema completeness
1559
1589
  validateSchemaCompleteness(mergedSchema, rule.name);
1590
+ validateFacetTypes(mergedSchema, rule.name);
1560
1591
  // Resolve and coerce metadata
1561
1592
  const schemaOutput = resolveAndCoerce(mergedSchema, attributes, hbs);
1562
1593
  merged = { ...merged, ...schemaOutput };
@@ -2668,6 +2699,7 @@ function validateInferenceRuleSchemas(parsed) {
2668
2699
  globalSchemas: parsed.schemas,
2669
2700
  });
2670
2701
  validateSchemaCompleteness(merged, rule.name);
2702
+ validateFacetTypes(merged, rule.name);
2671
2703
  }
2672
2704
  catch (error) {
2673
2705
  return [
@@ -2748,6 +2780,9 @@ function computeRulesHash(rules) {
2748
2780
  * A property is facetable if it declares `uiHint` or `enum`.
2749
2781
  */
2750
2782
  function isFacetable(prop) {
2783
+ // Reject non-primitive types that can never produce trackable values.
2784
+ if (prop.type === 'object')
2785
+ return false;
2751
2786
  return prop.uiHint !== undefined || prop.enum !== undefined;
2752
2787
  }
2753
2788
  /**
@@ -3338,6 +3373,44 @@ function createRulesUnregisterParamHandler(deps) {
3338
3373
  }, deps.logger, 'RulesUnregister');
3339
3374
  }
3340
3375
 
3376
+ /**
3377
+ * @module api/handlers/scan
3378
+ * Fastify route handler for POST /scan. Filter-only point query without vector search.
3379
+ */
3380
+ /**
3381
+ * Create handler for POST /scan.
3382
+ *
3383
+ * @param deps - Route dependencies.
3384
+ */
3385
+ function createScanHandler(deps) {
3386
+ return wrapHandler(async (request, reply) => {
3387
+ const { filter, limit = 100, cursor, fields, countOnly } = request.body;
3388
+ if (!filter || typeof filter !== 'object') {
3389
+ deps.logger.warn('Scan rejected: missing or invalid filter');
3390
+ void reply
3391
+ .status(400)
3392
+ .send({ error: 'Missing required field: filter (object)' });
3393
+ return;
3394
+ }
3395
+ if (typeof limit !== 'number' || limit < 1 || limit > 1000) {
3396
+ deps.logger.warn({ limit }, 'Scan rejected: limit out of bounds');
3397
+ void reply
3398
+ .status(400)
3399
+ .send({ error: 'limit must be between 1 and 1000' });
3400
+ return;
3401
+ }
3402
+ if (countOnly) {
3403
+ const count = await deps.vectorStore.count(filter);
3404
+ return { count };
3405
+ }
3406
+ const result = await deps.vectorStore.scrollPage(filter, limit, cursor, fields);
3407
+ return {
3408
+ points: result.points,
3409
+ cursor: result.nextCursor ?? null,
3410
+ };
3411
+ }, deps.logger, 'Scan');
3412
+ }
3413
+
3341
3414
  /**
3342
3415
  * @module api/handlers/search
3343
3416
  * Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
@@ -3519,6 +3592,10 @@ function createApiServer(options) {
3519
3592
  textWeight: config.search.hybrid.textWeight,
3520
3593
  }
3521
3594
  : undefined;
3595
+ app.post('/scan', createScanHandler({
3596
+ vectorStore,
3597
+ logger,
3598
+ }));
3522
3599
  app.post('/search', createSearchHandler({
3523
3600
  embeddingProvider,
3524
3601
  vectorStore,
@@ -4776,6 +4853,28 @@ async function getCollectionInfo(client, collectionName) {
4776
4853
  return { pointCount, dimensions, payloadFields };
4777
4854
  }
4778
4855
 
4856
+ /**
4857
+ * @module vectorStore/count
4858
+ * Count utility for Qdrant collection points.
4859
+ */
4860
+ /**
4861
+ * Count points in a Qdrant collection matching an optional filter.
4862
+ *
4863
+ * Uses exact counting for accurate results.
4864
+ *
4865
+ * @param client - The Qdrant client instance.
4866
+ * @param collectionName - The collection to count.
4867
+ * @param filter - Optional Qdrant filter.
4868
+ * @returns The number of matching points.
4869
+ */
4870
+ async function countPoints(client, collectionName, filter) {
4871
+ const result = await client.count(collectionName, {
4872
+ ...(filter ? { filter } : {}),
4873
+ exact: true,
4874
+ });
4875
+ return result.count;
4876
+ }
4877
+
4779
4878
  /**
4780
4879
  * @module vectorStore/hybridSearch
4781
4880
  * Hybrid search and text index helpers for Qdrant vector store.
@@ -4875,11 +4974,43 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
4875
4974
 
4876
4975
  /**
4877
4976
  * @module vectorStore/scroll
4878
- * Standalone scroll utility for paginating through Qdrant collection points.
4977
+ * Scroll utilities for paginating through Qdrant collection points.
4978
+ */
4979
+ /**
4980
+ * Scroll one page of points matching a filter.
4981
+ *
4982
+ * @param client - The Qdrant client instance.
4983
+ * @param collectionName - The collection to scroll.
4984
+ * @param filter - Optional Qdrant filter.
4985
+ * @param limit - Page size.
4986
+ * @param offset - Cursor offset from previous page.
4987
+ * @param fields - Optional payload field projection (array of field names).
4988
+ * @returns Page of points and next cursor.
4879
4989
  */
4990
+ async function scrollPage(client, collectionName, filter, limit = 100, offset, fields) {
4991
+ const result = await client.scroll(collectionName, {
4992
+ limit,
4993
+ with_payload: fields ? fields : true,
4994
+ with_vector: false,
4995
+ ...(filter ? { filter } : {}),
4996
+ ...(offset !== undefined ? { offset } : {}),
4997
+ });
4998
+ return {
4999
+ points: result.points.map((p) => ({
5000
+ id: String(p.id),
5001
+ payload: p.payload,
5002
+ })),
5003
+ nextCursor: typeof result.next_page_offset === 'string' ||
5004
+ typeof result.next_page_offset === 'number'
5005
+ ? result.next_page_offset
5006
+ : undefined,
5007
+ };
5008
+ }
4880
5009
  /**
4881
5010
  * Scroll through all points in a Qdrant collection matching a filter.
4882
5011
  *
5012
+ * Iterates over pages using {@link scrollPage}.
5013
+ *
4883
5014
  * @param client - The Qdrant client instance.
4884
5015
  * @param collectionName - The collection to scroll.
4885
5016
  * @param filter - Optional Qdrant filter.
@@ -4887,32 +5018,14 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
4887
5018
  * @yields Scrolled points.
4888
5019
  */
4889
5020
  async function* scrollCollection(client, collectionName, filter, limit = 100) {
4890
- let offset = undefined;
4891
- for (;;) {
4892
- const result = await client.scroll(collectionName, {
4893
- limit,
4894
- with_payload: true,
4895
- with_vector: false,
4896
- ...(filter ? { filter } : {}),
4897
- ...(offset !== undefined ? { offset } : {}),
4898
- });
4899
- for (const point of result.points) {
4900
- yield {
4901
- id: String(point.id),
4902
- payload: point.payload,
4903
- };
4904
- }
4905
- const nextOffset = result.next_page_offset;
4906
- if (nextOffset === null || nextOffset === undefined) {
4907
- break;
4908
- }
4909
- if (typeof nextOffset === 'string' || typeof nextOffset === 'number') {
4910
- offset = nextOffset;
5021
+ let cursor;
5022
+ do {
5023
+ const page = await scrollPage(client, collectionName, filter, limit, cursor);
5024
+ for (const point of page.points) {
5025
+ yield point;
4911
5026
  }
4912
- else {
4913
- break;
4914
- }
4915
- }
5027
+ cursor = page.nextCursor;
5028
+ } while (cursor !== undefined);
4916
5029
  }
4917
5030
 
4918
5031
  /**
@@ -4956,6 +5069,15 @@ class VectorStoreClient {
4956
5069
  checkCompatibility: false,
4957
5070
  });
4958
5071
  }
5072
+ /**
5073
+ * Count points matching a filter.
5074
+ *
5075
+ * @param filter - Optional Qdrant filter.
5076
+ * @returns The number of matching points.
5077
+ */
5078
+ async count(filter) {
5079
+ return countPoints(this.client, this.collectionName, filter);
5080
+ }
4959
5081
  /**
4960
5082
  * Ensure the collection exists with correct dimensions and Cosine distance.
4961
5083
  */
@@ -5136,6 +5258,18 @@ class VectorStoreClient {
5136
5258
  async hybridSearch(vector, queryText, limit, textWeight, filter) {
5137
5259
  return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
5138
5260
  }
5261
+ /**
5262
+ * Scroll one page of points matching a filter.
5263
+ *
5264
+ * @param filter - Optional Qdrant filter.
5265
+ * @param limit - Page size.
5266
+ * @param offset - Cursor offset from previous page.
5267
+ * @param fields - Optional field projection.
5268
+ * @returns Page of points and next cursor.
5269
+ */
5270
+ async scrollPage(filter, limit = 100, offset, fields) {
5271
+ return scrollPage(this.client, this.collectionName, filter, limit, offset, fields);
5272
+ }
5139
5273
  /**
5140
5274
  * Scroll through all points matching a filter.
5141
5275
  *
@@ -5987,6 +6121,9 @@ class JeevesWatcher {
5987
6121
  const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
5988
6122
  this.helperIntrospection = await introspectHelpers(this.config, configDir);
5989
6123
  const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
6124
+ const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
6125
+ this.issuesManager = new IssuesManager(stateDir, logger);
6126
+ this.valuesManager = new ValuesManager(stateDir, logger);
5990
6127
  const processor = this.factories.createDocumentProcessor({
5991
6128
  config: processorConfig,
5992
6129
  embeddingProvider,
@@ -5994,6 +6131,8 @@ class JeevesWatcher {
5994
6131
  compiledRules,
5995
6132
  logger,
5996
6133
  templateEngine,
6134
+ issuesManager: this.issuesManager,
6135
+ valuesManager: this.valuesManager,
5997
6136
  });
5998
6137
  this.processor = processor;
5999
6138
  this.queue = this.factories.createEventQueue({
@@ -6002,9 +6141,6 @@ class JeevesWatcher {
6002
6141
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
6003
6142
  });
6004
6143
  this.watcher = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions);
6005
- const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
6006
- this.issuesManager = new IssuesManager(stateDir, logger);
6007
- this.valuesManager = new ValuesManager(stateDir, logger);
6008
6144
  this.server = await this.startApiServer();
6009
6145
  this.watcher.start();
6010
6146
  this.startConfigWatch();
@@ -6482,6 +6618,50 @@ function registerReindexCommand(cli) {
6482
6618
  });
6483
6619
  }
6484
6620
 
6621
+ /**
6622
+ * @module commands/scan
6623
+ *
6624
+ * CLI command: scan.
6625
+ */
6626
+ function registerScanCommand(cli) {
6627
+ const command = cli
6628
+ .command('scan')
6629
+ .description('Scan the vector store (POST /scan)')
6630
+ .option('-f, --filter <filter>', 'Qdrant filter (JSON string)', '{}')
6631
+ .option('-l, --limit <limit>', 'Max results', '100')
6632
+ .option('-c, --cursor <cursor>', 'Cursor from previous response')
6633
+ .option('--fields <fields>', 'Fields to return (comma-separated)')
6634
+ .option('--count-only', 'Return count only');
6635
+ withApiOptions(command).action(async (options) => {
6636
+ let filterObj = {};
6637
+ try {
6638
+ if (options.filter) {
6639
+ filterObj = JSON.parse(options.filter);
6640
+ }
6641
+ }
6642
+ catch (error) {
6643
+ console.error('Invalid filter JSON:', error);
6644
+ process.exit(1);
6645
+ }
6646
+ const fieldsArray = options.fields
6647
+ ? options.fields.split(',').map((f) => f.trim())
6648
+ : undefined;
6649
+ await runApiCommand({
6650
+ host: options.host,
6651
+ port: options.port,
6652
+ method: 'POST',
6653
+ path: '/scan',
6654
+ body: {
6655
+ filter: filterObj,
6656
+ limit: Number(options.limit),
6657
+ cursor: options.cursor,
6658
+ fields: fieldsArray,
6659
+ countOnly: options.countOnly,
6660
+ },
6661
+ });
6662
+ });
6663
+ }
6664
+
6485
6665
  /**
6486
6666
  * @module commands/search
6487
6667
  *
@@ -6647,6 +6827,7 @@ registerStatusCommand(cli);
6647
6827
  registerReindexCommand(cli);
6648
6828
  registerRebuildMetadataCommand(cli);
6649
6829
  registerSearchCommand(cli);
6830
+ registerScanCommand(cli);
6650
6831
  registerEnrichCommand(cli);
6651
6832
  registerConfigReindexCommand(cli);
6652
6833
  registerServiceCommand(cli);
package/dist/index.d.ts CHANGED
@@ -676,6 +676,13 @@ interface ScrolledPoint {
676
676
  /** The payload metadata. */
677
677
  payload: Record<string, unknown>;
678
678
  }
679
+ /** Result of a single scroll page. */
680
+ interface ScrollPageResult {
681
+ /** Matched points. */
682
+ points: ScrolledPoint[];
683
+ /** Cursor for next page, or `undefined` when no more pages. */
684
+ nextCursor?: string | number;
685
+ }
679
686
  /** Payload field schema information as reported by Qdrant. */
680
687
  interface PayloadFieldSchema {
681
688
  /** Qdrant data type for the field (e.g. `keyword`, `text`, `integer`). */
@@ -702,6 +709,13 @@ interface VectorStore {
702
709
  * Ensure the collection exists with correct configuration.
703
710
  */
704
711
  ensureCollection(): Promise<void>;
712
+ /**
713
+ * Count points matching a filter.
714
+ *
715
+ * @param filter - Optional Qdrant filter.
716
+ * @returns The number of matching points.
717
+ */
718
+ count(filter?: Record<string, unknown>): Promise<number>;
705
719
  /**
706
720
  * Upsert points into the collection.
707
721
  *
@@ -742,6 +756,16 @@ interface VectorStore {
742
756
  * @returns An array of search results.
743
757
  */
744
758
  search(vector: number[], limit: number, filter?: Record<string, unknown>, offset?: number): Promise<SearchResult[]>;
759
+ /**
760
+ * Scroll one page of points matching a filter.
761
+ *
762
+ * @param filter - Optional Qdrant filter.
763
+ * @param limit - Page size.
764
+ * @param offset - Cursor offset from previous page.
765
+ * @param fields - Optional field projection.
766
+ * @returns Page of points and next cursor.
767
+ */
768
+ scrollPage(filter?: Record<string, unknown>, limit?: number, offset?: string | number, fields?: string[]): Promise<ScrollPageResult>;
745
769
  /**
746
770
  * Scroll through all points matching a filter.
747
771
  *
@@ -798,6 +822,13 @@ declare class VectorStoreClient implements VectorStore {
798
822
  * Creating a fresh client for write operations ensures clean TCP connections.
799
823
  */
800
824
  private createClient;
825
+ /**
826
+ * Count points matching a filter.
827
+ *
828
+ * @param filter - Optional Qdrant filter.
829
+ * @returns The number of matching points.
830
+ */
831
+ count(filter?: Record<string, unknown>): Promise<number>;
801
832
  /**
802
833
  * Ensure the collection exists with correct dimensions and Cosine distance.
803
834
  */
@@ -877,6 +908,16 @@ declare class VectorStoreClient implements VectorStore {
877
908
  * @returns An array of search results.
878
909
  */
879
910
  hybridSearch(vector: number[], queryText: string, limit: number, textWeight: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
911
+ /**
912
+ * Scroll one page of points matching a filter.
913
+ *
914
+ * @param filter - Optional Qdrant filter.
915
+ * @param limit - Page size.
916
+ * @param offset - Cursor offset from previous page.
917
+ * @param fields - Optional field projection.
918
+ * @returns Page of points and next cursor.
919
+ */
920
+ scrollPage(filter?: Record<string, unknown>, limit?: number, offset?: string | number, fields?: string[]): Promise<ScrollPageResult>;
880
921
  /**
881
922
  * Scroll through all points matching a filter.
882
923
  *
package/dist/index.js CHANGED
@@ -846,6 +846,31 @@ function validateSchemaCompleteness(schema, ruleName) {
846
846
  }
847
847
  }
848
848
  }
849
+ /** Types that can produce trackable facet values. */
850
+ const FACETABLE_TYPES = new Set([
851
+ 'string',
852
+ 'number',
853
+ 'boolean',
854
+ 'integer',
855
+ 'array',
856
+ ]);
857
+ /**
858
+ * Validate that uiHint and enum are only applied to facetable property types.
859
+ * Throws if a non-facetable type (e.g. object) declares uiHint or enum.
860
+ *
861
+ * @param schema - Resolved schema to validate.
862
+ * @param ruleName - Name of the rule (for error messages).
863
+ */
864
+ function validateFacetTypes(schema, ruleName) {
865
+ for (const [propName, propDef] of Object.entries(schema.properties)) {
866
+ if (!propDef.type)
867
+ continue;
868
+ if (!FACETABLE_TYPES.has(propDef.type) &&
869
+ (propDef.uiHint !== undefined || propDef.enum !== undefined)) {
870
+ throw new Error(`Property "${propName}" in rule "${ruleName}" has type "${propDef.type}" with uiHint/enum. Facet hints are only valid on string, number, boolean, integer, or array types.`);
871
+ }
872
+ }
873
+ }
849
874
 
850
875
  /**
851
876
  * @module rules/apply
@@ -912,6 +937,7 @@ async function applyRules(compiledRules, attributes, options = {}) {
912
937
  });
913
938
  // Validate schema completeness
914
939
  validateSchemaCompleteness(mergedSchema, rule.name);
940
+ validateFacetTypes(mergedSchema, rule.name);
915
941
  // Resolve and coerce metadata
916
942
  const schemaOutput = resolveAndCoerce(mergedSchema, attributes, hbs);
917
943
  merged = { ...merged, ...schemaOutput };
@@ -2359,6 +2385,7 @@ function validateInferenceRuleSchemas(parsed) {
2359
2385
  globalSchemas: parsed.schemas,
2360
2386
  });
2361
2387
  validateSchemaCompleteness(merged, rule.name);
2388
+ validateFacetTypes(merged, rule.name);
2362
2389
  }
2363
2390
  catch (error) {
2364
2391
  return [
@@ -2439,6 +2466,9 @@ function computeRulesHash(rules) {
2439
2466
  * A property is facetable if it declares `uiHint` or `enum`.
2440
2467
  */
2441
2468
  function isFacetable(prop) {
2469
+ // Reject non-primitive types that can never produce trackable values.
2470
+ if (prop.type === 'object')
2471
+ return false;
2442
2472
  return prop.uiHint !== undefined || prop.enum !== undefined;
2443
2473
  }
2444
2474
  /**
@@ -3029,6 +3059,44 @@ function createRulesUnregisterParamHandler(deps) {
3029
3059
  }, deps.logger, 'RulesUnregister');
3030
3060
  }
3031
3061
 
3062
+ /**
3063
+ * @module api/handlers/scan
3064
+ * Fastify route handler for POST /scan. Filter-only point query without vector search.
3065
+ */
3066
+ /**
3067
+ * Create handler for POST /scan.
3068
+ *
3069
+ * @param deps - Route dependencies.
3070
+ */
3071
+ function createScanHandler(deps) {
3072
+ return wrapHandler(async (request, reply) => {
3073
+ const { filter, limit = 100, cursor, fields, countOnly } = request.body;
3074
+ if (!filter || typeof filter !== 'object') {
3075
+ deps.logger.warn('Scan rejected: missing or invalid filter');
3076
+ void reply
3077
+ .status(400)
3078
+ .send({ error: 'Missing required field: filter (object)' });
3079
+ return;
3080
+ }
3081
+ if (typeof limit !== 'number' || limit < 1 || limit > 1000) {
3082
+ deps.logger.warn({ limit }, 'Scan rejected: limit out of bounds');
3083
+ void reply
3084
+ .status(400)
3085
+ .send({ error: 'limit must be between 1 and 1000' });
3086
+ return;
3087
+ }
3088
+ if (countOnly) {
3089
+ const count = await deps.vectorStore.count(filter);
3090
+ return { count };
3091
+ }
3092
+ const result = await deps.vectorStore.scrollPage(filter, limit, cursor, fields);
3093
+ return {
3094
+ points: result.points,
3095
+ cursor: result.nextCursor ?? null,
3096
+ };
3097
+ }, deps.logger, 'Scan');
3098
+ }
3099
+
3032
3100
  /**
3033
3101
  * @module api/handlers/search
3034
3102
  * Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
@@ -3210,6 +3278,10 @@ function createApiServer(options) {
3210
3278
  textWeight: config.search.hybrid.textWeight,
3211
3279
  }
3212
3280
  : undefined;
3281
+ app.post('/scan', createScanHandler({
3282
+ vectorStore,
3283
+ logger,
3284
+ }));
3213
3285
  app.post('/search', createSearchHandler({
3214
3286
  embeddingProvider,
3215
3287
  vectorStore,
@@ -3509,18 +3581,23 @@ class ValuesManager extends JsonFileStore {
3509
3581
  index[ruleName] ??= {};
3510
3582
  const ruleValues = index[ruleName];
3511
3583
  for (const [key, value] of Object.entries(metadata)) {
3512
- if (!this.isTrackable(value))
3513
- continue;
3514
- ruleValues[key] ??= [];
3515
- const arr = ruleValues[key];
3516
- if (!arr.includes(value)) {
3517
- arr.push(value);
3518
- arr.sort((a, b) => {
3519
- if (typeof a === typeof b) {
3520
- return String(a).localeCompare(String(b));
3521
- }
3522
- return typeof a < typeof b ? -1 : 1;
3523
- });
3584
+ // Decompose arrays into individual trackable elements so that
3585
+ // array-typed fields (e.g. domains: ["email"]) are indexed.
3586
+ const items = Array.isArray(value) ? value : [value];
3587
+ for (const item of items) {
3588
+ if (!this.isTrackable(item))
3589
+ continue;
3590
+ ruleValues[key] ??= [];
3591
+ const arr = ruleValues[key];
3592
+ if (!arr.includes(item)) {
3593
+ arr.push(item);
3594
+ arr.sort((a, b) => {
3595
+ if (typeof a === typeof b) {
3596
+ return String(a).localeCompare(String(b));
3597
+ }
3598
+ return typeof a < typeof b ? -1 : 1;
3599
+ });
3600
+ }
3524
3601
  }
3525
3602
  }
3526
3603
  this.save();
@@ -4754,6 +4831,28 @@ async function getCollectionInfo(client, collectionName) {
4754
4831
  return { pointCount, dimensions, payloadFields };
4755
4832
  }
4756
4833
 
4834
+ /**
4835
+ * @module vectorStore/count
4836
+ * Count utility for Qdrant collection points.
4837
+ */
4838
+ /**
4839
+ * Count points in a Qdrant collection matching an optional filter.
4840
+ *
4841
+ * Uses exact counting for accurate results.
4842
+ *
4843
+ * @param client - The Qdrant client instance.
4844
+ * @param collectionName - The collection to count.
4845
+ * @param filter - Optional Qdrant filter.
4846
+ * @returns The number of matching points.
4847
+ */
4848
+ async function countPoints(client, collectionName, filter) {
4849
+ const result = await client.count(collectionName, {
4850
+ ...(filter ? { filter } : {}),
4851
+ exact: true,
4852
+ });
4853
+ return result.count;
4854
+ }
4855
+
4757
4856
  /**
4758
4857
  * @module vectorStore/hybridSearch
4759
4858
  * Hybrid search and text index helpers for Qdrant vector store.
@@ -4853,11 +4952,43 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
4853
4952
 
4854
4953
  /**
4855
4954
  * @module vectorStore/scroll
4856
- * Standalone scroll utility for paginating through Qdrant collection points.
4955
+ * Scroll utilities for paginating through Qdrant collection points.
4956
+ */
4957
+ /**
4958
+ * Scroll one page of points matching a filter.
4959
+ *
4960
+ * @param client - The Qdrant client instance.
4961
+ * @param collectionName - The collection to scroll.
4962
+ * @param filter - Optional Qdrant filter.
4963
+ * @param limit - Page size.
4964
+ * @param offset - Cursor offset from previous page.
4965
+ * @param fields - Optional payload field projection (array of field names).
4966
+ * @returns Page of points and next cursor.
4857
4967
  */
4968
+ async function scrollPage(client, collectionName, filter, limit = 100, offset, fields) {
4969
+ const result = await client.scroll(collectionName, {
4970
+ limit,
4971
+ with_payload: fields ? fields : true,
4972
+ with_vector: false,
4973
+ ...(filter ? { filter } : {}),
4974
+ ...(offset !== undefined ? { offset } : {}),
4975
+ });
4976
+ return {
4977
+ points: result.points.map((p) => ({
4978
+ id: String(p.id),
4979
+ payload: p.payload,
4980
+ })),
4981
+ nextCursor: typeof result.next_page_offset === 'string' ||
4982
+ typeof result.next_page_offset === 'number'
4983
+ ? result.next_page_offset
4984
+ : undefined,
4985
+ };
4986
+ }
4858
4987
  /**
4859
4988
  * Scroll through all points in a Qdrant collection matching a filter.
4860
4989
  *
4990
+ * Iterates over pages using {@link scrollPage}.
4991
+ *
4861
4992
  * @param client - The Qdrant client instance.
4862
4993
  * @param collectionName - The collection to scroll.
4863
4994
  * @param filter - Optional Qdrant filter.
@@ -4865,32 +4996,14 @@ async function hybridSearch(client, collectionName, vector, queryText, limit, te
4865
4996
  * @yields Scrolled points.
4866
4997
  */
4867
4998
  async function* scrollCollection(client, collectionName, filter, limit = 100) {
4868
- let offset = undefined;
4869
- for (;;) {
4870
- const result = await client.scroll(collectionName, {
4871
- limit,
4872
- with_payload: true,
4873
- with_vector: false,
4874
- ...(filter ? { filter } : {}),
4875
- ...(offset !== undefined ? { offset } : {}),
4876
- });
4877
- for (const point of result.points) {
4878
- yield {
4879
- id: String(point.id),
4880
- payload: point.payload,
4881
- };
4882
- }
4883
- const nextOffset = result.next_page_offset;
4884
- if (nextOffset === null || nextOffset === undefined) {
4885
- break;
4999
+ let cursor;
5000
+ do {
5001
+ const page = await scrollPage(client, collectionName, filter, limit, cursor);
5002
+ for (const point of page.points) {
5003
+ yield point;
4886
5004
  }
4887
- if (typeof nextOffset === 'string' || typeof nextOffset === 'number') {
4888
- offset = nextOffset;
4889
- }
4890
- else {
4891
- break;
4892
- }
4893
- }
5005
+ cursor = page.nextCursor;
5006
+ } while (cursor !== undefined);
4894
5007
  }
4895
5008
 
4896
5009
  /**
@@ -4934,6 +5047,15 @@ class VectorStoreClient {
4934
5047
  checkCompatibility: false,
4935
5048
  });
4936
5049
  }
5050
+ /**
5051
+ * Count points matching a filter.
5052
+ *
5053
+ * @param filter - Optional Qdrant filter.
5054
+ * @returns The number of matching points.
5055
+ */
5056
+ async count(filter) {
5057
+ return countPoints(this.client, this.collectionName, filter);
5058
+ }
4937
5059
  /**
4938
5060
  * Ensure the collection exists with correct dimensions and Cosine distance.
4939
5061
  */
@@ -5114,6 +5236,18 @@ class VectorStoreClient {
5114
5236
  async hybridSearch(vector, queryText, limit, textWeight, filter) {
5115
5237
  return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
5116
5238
  }
5239
+ /**
5240
+ * Scroll one page of points matching a filter.
5241
+ *
5242
+ * @param filter - Optional Qdrant filter.
5243
+ * @param limit - Page size.
5244
+ * @param offset - Cursor offset from previous page.
5245
+ * @param fields - Optional field projection.
5246
+ * @returns Page of points and next cursor.
5247
+ */
5248
+ async scrollPage(filter, limit = 100, offset, fields) {
5249
+ return scrollPage(this.client, this.collectionName, filter, limit, offset, fields);
5250
+ }
5117
5251
  /**
5118
5252
  * Scroll through all points matching a filter.
5119
5253
  *
@@ -5965,6 +6099,9 @@ class JeevesWatcher {
5965
6099
  const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
5966
6100
  this.helperIntrospection = await introspectHelpers(this.config, configDir);
5967
6101
  const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
6102
+ const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
6103
+ this.issuesManager = new IssuesManager(stateDir, logger);
6104
+ this.valuesManager = new ValuesManager(stateDir, logger);
5968
6105
  const processor = this.factories.createDocumentProcessor({
5969
6106
  config: processorConfig,
5970
6107
  embeddingProvider,
@@ -5972,6 +6109,8 @@ class JeevesWatcher {
5972
6109
  compiledRules,
5973
6110
  logger,
5974
6111
  templateEngine,
6112
+ issuesManager: this.issuesManager,
6113
+ valuesManager: this.valuesManager,
5975
6114
  });
5976
6115
  this.processor = processor;
5977
6116
  this.queue = this.factories.createEventQueue({
@@ -5980,9 +6119,6 @@ class JeevesWatcher {
5980
6119
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
5981
6120
  });
5982
6121
  this.watcher = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions);
5983
- const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
5984
- this.issuesManager = new IssuesManager(stateDir, logger);
5985
- this.valuesManager = new ValuesManager(stateDir, logger);
5986
6122
  this.server = await this.startApiServer();
5987
6123
  this.watcher.start();
5988
6124
  this.startConfigWatch();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@karmaniverous/jeeves-watcher",
3
- "version": "0.8.4",
3
+ "version": "0.9.0-0",
4
4
  "author": "Jason Williscroft",
5
5
  "description": "Filesystem watcher that keeps a Qdrant vector store in sync with document changes",
6
6
  "license": "BSD-3-Clause",
@@ -84,22 +84,22 @@
84
84
  "zod": "^4.3.6"
85
85
  },
86
86
  "devDependencies": {
87
- "@dotenvx/dotenvx": "^1.52.0",
87
+ "@dotenvx/dotenvx": "^1.54.1",
88
88
  "@rollup/plugin-alias": "^6.0.0",
89
- "@rollup/plugin-commonjs": "^29.0.0",
89
+ "@rollup/plugin-commonjs": "^29.0.2",
90
90
  "@rollup/plugin-json": "^6.1.0",
91
91
  "@rollup/plugin-node-resolve": "^16.0.3",
92
92
  "@rollup/plugin-typescript": "^12.3.0",
93
93
  "@types/fs-extra": "^11.0.4",
94
94
  "@types/js-yaml": "*",
95
- "@types/node": "^25.3.0",
95
+ "@types/node": "^25.3.5",
96
96
  "@types/picomatch": "^4.0.2",
97
97
  "@types/uuid": "*",
98
98
  "@vitest/coverage-v8": "^4.0.18",
99
99
  "auto-changelog": "^2.5.0",
100
100
  "cross-env": "^10.1.0",
101
- "fs-extra": "^11.3.3",
102
- "happy-dom": "^20.7.0",
101
+ "fs-extra": "^11.3.4",
102
+ "happy-dom": "^20.8.3",
103
103
  "knip": "^5.85.0",
104
104
  "release-it": "^19.2.4",
105
105
  "rollup": "^4.59.0",