@kadoa/node-sdk 0.19.0 → 0.19.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -639,6 +639,22 @@ interface AgenticWorkflow {
639
639
  * Additional data for the workflow (e.g. IDs from your system to link data)
640
640
  */
641
641
  'additionalData'?: any | null;
642
+ /**
643
+ * Maximum pages to crawl (default: 10,000, max: 100,000). Only used when navigationMode is \'all-pages\'
644
+ */
645
+ 'maxPages'?: number;
646
+ /**
647
+ * Maximum crawl depth (default: 50, max: 200). Only used when navigationMode is \'all-pages\'
648
+ */
649
+ 'maxDepth'?: number;
650
+ /**
651
+ * Regex patterns to include specific paths during crawling. Only used when navigationMode is \'all-pages\'
652
+ */
653
+ 'pathsFilterIn'?: Array<string>;
654
+ /**
655
+ * Regex patterns to exclude specific paths during crawling. Only used when navigationMode is \'all-pages\'
656
+ */
657
+ 'pathsFilterOut'?: Array<string>;
642
658
  /**
643
659
  * Natural language instructions for the AI agent (10-5000 characters). Describe what data to extract and how to navigate the site
644
660
  */
@@ -1159,6 +1175,22 @@ interface WorkflowWithEntityAndFields {
1159
1175
  * Additional data for the workflow (e.g. IDs from your system to link data)
1160
1176
  */
1161
1177
  'additionalData'?: any | null;
1178
+ /**
1179
+ * Maximum pages to crawl (default: 10,000, max: 100,000). Only used when navigationMode is \'all-pages\'
1180
+ */
1181
+ 'maxPages'?: number;
1182
+ /**
1183
+ * Maximum crawl depth (default: 50, max: 200). Only used when navigationMode is \'all-pages\'
1184
+ */
1185
+ 'maxDepth'?: number;
1186
+ /**
1187
+ * Regex patterns to include specific paths during crawling. Only used when navigationMode is \'all-pages\'
1188
+ */
1189
+ 'pathsFilterIn'?: Array<string>;
1190
+ /**
1191
+ * Regex patterns to exclude specific paths during crawling. Only used when navigationMode is \'all-pages\'
1192
+ */
1193
+ 'pathsFilterOut'?: Array<string>;
1162
1194
  /**
1163
1195
  * Entity name for extraction (e.g., \'Product\', \'JobListing\')
1164
1196
  */
@@ -1173,6 +1205,7 @@ declare const WorkflowWithEntityAndFieldsNavigationModeEnum: {
1173
1205
  readonly PaginatedPage: "paginated-page";
1174
1206
  readonly PageAndDetail: "page-and-detail";
1175
1207
  readonly AgenticNavigation: "agentic-navigation";
1208
+ readonly AllPages: "all-pages";
1176
1209
  };
1177
1210
  type WorkflowWithEntityAndFieldsNavigationModeEnum = typeof WorkflowWithEntityAndFieldsNavigationModeEnum[keyof typeof WorkflowWithEntityAndFieldsNavigationModeEnum];
1178
1211
  declare const WorkflowWithEntityAndFieldsIntervalEnum: {
@@ -1267,6 +1300,22 @@ interface WorkflowWithExistingSchema {
1267
1300
  * Additional data for the workflow (e.g. IDs from your system to link data)
1268
1301
  */
1269
1302
  'additionalData'?: any | null;
1303
+ /**
1304
+ * Maximum pages to crawl (default: 10,000, max: 100,000). Only used when navigationMode is \'all-pages\'
1305
+ */
1306
+ 'maxPages'?: number;
1307
+ /**
1308
+ * Maximum crawl depth (default: 50, max: 200). Only used when navigationMode is \'all-pages\'
1309
+ */
1310
+ 'maxDepth'?: number;
1311
+ /**
1312
+ * Regex patterns to include specific paths during crawling. Only used when navigationMode is \'all-pages\'
1313
+ */
1314
+ 'pathsFilterIn'?: Array<string>;
1315
+ /**
1316
+ * Regex patterns to exclude specific paths during crawling. Only used when navigationMode is \'all-pages\'
1317
+ */
1318
+ 'pathsFilterOut'?: Array<string>;
1270
1319
  /**
1271
1320
  * ID of an existing schema configuration. Use this to reference a previously defined schema without re-defining extraction fields
1272
1321
  */
@@ -1277,6 +1326,7 @@ declare const WorkflowWithExistingSchemaNavigationModeEnum: {
1277
1326
  readonly PaginatedPage: "paginated-page";
1278
1327
  readonly PageAndDetail: "page-and-detail";
1279
1328
  readonly AgenticNavigation: "agentic-navigation";
1329
+ readonly AllPages: "all-pages";
1280
1330
  };
1281
1331
  type WorkflowWithExistingSchemaNavigationModeEnum = typeof WorkflowWithExistingSchemaNavigationModeEnum[keyof typeof WorkflowWithExistingSchemaNavigationModeEnum];
1282
1332
  declare const WorkflowWithExistingSchemaIntervalEnum: {
@@ -3632,6 +3682,22 @@ interface V4WorkflowsWorkflowIdMetadataPutRequest {
3632
3682
  * Additional static data for the workflow
3633
3683
  */
3634
3684
  'additionalData'?: object;
3685
+ /**
3686
+ * Maximum pages to crawl (only for crawler workflows)
3687
+ */
3688
+ 'maxPages'?: number;
3689
+ /**
3690
+ * Maximum crawl depth (only for crawler workflows)
3691
+ */
3692
+ 'maxDepth'?: number;
3693
+ /**
3694
+ * Regex patterns to include specific paths (only for crawler workflows)
3695
+ */
3696
+ 'pathsFilterIn'?: Array<string>;
3697
+ /**
3698
+ * Regex patterns to exclude specific paths (only for crawler workflows)
3699
+ */
3700
+ 'pathsFilterOut'?: Array<string>;
3635
3701
  }
3636
3702
  declare const V4WorkflowsWorkflowIdMetadataPutRequestUpdateIntervalEnum: {
3637
3703
  readonly OnlyOnce: "ONLY_ONCE";
@@ -6316,7 +6382,7 @@ declare class ExtractionBuilderService {
6316
6382
  get workflowId(): string;
6317
6383
  get jobId(): string;
6318
6384
  constructor(workflowsCoreService: WorkflowsCoreService, entityResolverService: EntityResolverService, dataFetcherService: DataFetcherService, notificationSetupService: NotificationSetupService);
6319
- extract({ urls, name, description, navigationMode, extraction, additionalData, bypassPreview, }: ExtractOptions): PreparedExtraction;
6385
+ extract({ urls, name, description, navigationMode, extraction, additionalData, bypassPreview, userPrompt, interval, schedules, location, }: ExtractOptions): PreparedExtraction;
6320
6386
  withNotifications(options: Omit<NotificationOptions, "workflowId">): PreparedExtraction;
6321
6387
  withMonitoring(options: WorkflowMonitoringConfig): PreparedExtraction;
6322
6388
  bypassPreview(): PreparedExtraction;
package/dist/index.d.ts CHANGED
@@ -639,6 +639,22 @@ interface AgenticWorkflow {
639
639
  * Additional data for the workflow (e.g. IDs from your system to link data)
640
640
  */
641
641
  'additionalData'?: any | null;
642
+ /**
643
+ * Maximum pages to crawl (default: 10,000, max: 100,000). Only used when navigationMode is \'all-pages\'
644
+ */
645
+ 'maxPages'?: number;
646
+ /**
647
+ * Maximum crawl depth (default: 50, max: 200). Only used when navigationMode is \'all-pages\'
648
+ */
649
+ 'maxDepth'?: number;
650
+ /**
651
+ * Regex patterns to include specific paths during crawling. Only used when navigationMode is \'all-pages\'
652
+ */
653
+ 'pathsFilterIn'?: Array<string>;
654
+ /**
655
+ * Regex patterns to exclude specific paths during crawling. Only used when navigationMode is \'all-pages\'
656
+ */
657
+ 'pathsFilterOut'?: Array<string>;
642
658
  /**
643
659
  * Natural language instructions for the AI agent (10-5000 characters). Describe what data to extract and how to navigate the site
644
660
  */
@@ -1159,6 +1175,22 @@ interface WorkflowWithEntityAndFields {
1159
1175
  * Additional data for the workflow (e.g. IDs from your system to link data)
1160
1176
  */
1161
1177
  'additionalData'?: any | null;
1178
+ /**
1179
+ * Maximum pages to crawl (default: 10,000, max: 100,000). Only used when navigationMode is \'all-pages\'
1180
+ */
1181
+ 'maxPages'?: number;
1182
+ /**
1183
+ * Maximum crawl depth (default: 50, max: 200). Only used when navigationMode is \'all-pages\'
1184
+ */
1185
+ 'maxDepth'?: number;
1186
+ /**
1187
+ * Regex patterns to include specific paths during crawling. Only used when navigationMode is \'all-pages\'
1188
+ */
1189
+ 'pathsFilterIn'?: Array<string>;
1190
+ /**
1191
+ * Regex patterns to exclude specific paths during crawling. Only used when navigationMode is \'all-pages\'
1192
+ */
1193
+ 'pathsFilterOut'?: Array<string>;
1162
1194
  /**
1163
1195
  * Entity name for extraction (e.g., \'Product\', \'JobListing\')
1164
1196
  */
@@ -1173,6 +1205,7 @@ declare const WorkflowWithEntityAndFieldsNavigationModeEnum: {
1173
1205
  readonly PaginatedPage: "paginated-page";
1174
1206
  readonly PageAndDetail: "page-and-detail";
1175
1207
  readonly AgenticNavigation: "agentic-navigation";
1208
+ readonly AllPages: "all-pages";
1176
1209
  };
1177
1210
  type WorkflowWithEntityAndFieldsNavigationModeEnum = typeof WorkflowWithEntityAndFieldsNavigationModeEnum[keyof typeof WorkflowWithEntityAndFieldsNavigationModeEnum];
1178
1211
  declare const WorkflowWithEntityAndFieldsIntervalEnum: {
@@ -1267,6 +1300,22 @@ interface WorkflowWithExistingSchema {
1267
1300
  * Additional data for the workflow (e.g. IDs from your system to link data)
1268
1301
  */
1269
1302
  'additionalData'?: any | null;
1303
+ /**
1304
+ * Maximum pages to crawl (default: 10,000, max: 100,000). Only used when navigationMode is \'all-pages\'
1305
+ */
1306
+ 'maxPages'?: number;
1307
+ /**
1308
+ * Maximum crawl depth (default: 50, max: 200). Only used when navigationMode is \'all-pages\'
1309
+ */
1310
+ 'maxDepth'?: number;
1311
+ /**
1312
+ * Regex patterns to include specific paths during crawling. Only used when navigationMode is \'all-pages\'
1313
+ */
1314
+ 'pathsFilterIn'?: Array<string>;
1315
+ /**
1316
+ * Regex patterns to exclude specific paths during crawling. Only used when navigationMode is \'all-pages\'
1317
+ */
1318
+ 'pathsFilterOut'?: Array<string>;
1270
1319
  /**
1271
1320
  * ID of an existing schema configuration. Use this to reference a previously defined schema without re-defining extraction fields
1272
1321
  */
@@ -1277,6 +1326,7 @@ declare const WorkflowWithExistingSchemaNavigationModeEnum: {
1277
1326
  readonly PaginatedPage: "paginated-page";
1278
1327
  readonly PageAndDetail: "page-and-detail";
1279
1328
  readonly AgenticNavigation: "agentic-navigation";
1329
+ readonly AllPages: "all-pages";
1280
1330
  };
1281
1331
  type WorkflowWithExistingSchemaNavigationModeEnum = typeof WorkflowWithExistingSchemaNavigationModeEnum[keyof typeof WorkflowWithExistingSchemaNavigationModeEnum];
1282
1332
  declare const WorkflowWithExistingSchemaIntervalEnum: {
@@ -3632,6 +3682,22 @@ interface V4WorkflowsWorkflowIdMetadataPutRequest {
3632
3682
  * Additional static data for the workflow
3633
3683
  */
3634
3684
  'additionalData'?: object;
3685
+ /**
3686
+ * Maximum pages to crawl (only for crawler workflows)
3687
+ */
3688
+ 'maxPages'?: number;
3689
+ /**
3690
+ * Maximum crawl depth (only for crawler workflows)
3691
+ */
3692
+ 'maxDepth'?: number;
3693
+ /**
3694
+ * Regex patterns to include specific paths (only for crawler workflows)
3695
+ */
3696
+ 'pathsFilterIn'?: Array<string>;
3697
+ /**
3698
+ * Regex patterns to exclude specific paths (only for crawler workflows)
3699
+ */
3700
+ 'pathsFilterOut'?: Array<string>;
3635
3701
  }
3636
3702
  declare const V4WorkflowsWorkflowIdMetadataPutRequestUpdateIntervalEnum: {
3637
3703
  readonly OnlyOnce: "ONLY_ONCE";
@@ -6316,7 +6382,7 @@ declare class ExtractionBuilderService {
6316
6382
  get workflowId(): string;
6317
6383
  get jobId(): string;
6318
6384
  constructor(workflowsCoreService: WorkflowsCoreService, entityResolverService: EntityResolverService, dataFetcherService: DataFetcherService, notificationSetupService: NotificationSetupService);
6319
- extract({ urls, name, description, navigationMode, extraction, additionalData, bypassPreview, }: ExtractOptions): PreparedExtraction;
6385
+ extract({ urls, name, description, navigationMode, extraction, additionalData, bypassPreview, userPrompt, interval, schedules, location, }: ExtractOptions): PreparedExtraction;
6320
6386
  withNotifications(options: Omit<NotificationOptions, "workflowId">): PreparedExtraction;
6321
6387
  withMonitoring(options: WorkflowMonitoringConfig): PreparedExtraction;
6322
6388
  bypassPreview(): PreparedExtraction;
package/dist/index.js CHANGED
@@ -4598,7 +4598,11 @@ var ExtractionBuilderService = class {
4598
4598
  navigationMode,
4599
4599
  extraction,
4600
4600
  additionalData,
4601
- bypassPreview
4601
+ bypassPreview,
4602
+ userPrompt,
4603
+ interval,
4604
+ schedules,
4605
+ location
4602
4606
  }) {
4603
4607
  let entity = "ai-detection";
4604
4608
  if (extraction) {
@@ -4610,6 +4614,9 @@ var ExtractionBuilderService = class {
4610
4614
  entity = builtSchema.entityName ? { name: builtSchema.entityName, fields: builtSchema.fields } : { fields: builtSchema.fields };
4611
4615
  }
4612
4616
  }
4617
+ if (userPrompt) {
4618
+ this._userPrompt = userPrompt;
4619
+ }
4613
4620
  this._options = {
4614
4621
  urls,
4615
4622
  name,
@@ -4617,7 +4624,11 @@ var ExtractionBuilderService = class {
4617
4624
  navigationMode: navigationMode || "single-page",
4618
4625
  entity,
4619
4626
  bypassPreview: bypassPreview ?? false,
4620
- additionalData
4627
+ additionalData,
4628
+ userPrompt,
4629
+ interval,
4630
+ schedules,
4631
+ location
4621
4632
  };
4622
4633
  return this;
4623
4634
  }
@@ -4679,15 +4690,12 @@ var ExtractionBuilderService = class {
4679
4690
  fields: typeof entity === "object" && "fields" in entity ? entity.fields : []
4680
4691
  };
4681
4692
  } else {
4682
- resolvedEntity = await this.entityResolverService.resolveEntity(
4683
- entity,
4684
- {
4685
- link: urls[0],
4686
- location: this._options.location,
4687
- navigationMode,
4688
- selectorMode: useSelectorMode
4689
- }
4690
- );
4693
+ resolvedEntity = await this.entityResolverService.resolveEntity(entity, {
4694
+ link: urls[0],
4695
+ location: this._options.location,
4696
+ navigationMode,
4697
+ selectorMode: useSelectorMode
4698
+ });
4691
4699
  }
4692
4700
  const workflow = await this.workflowsCoreService.create({
4693
4701
  urls,
@@ -5227,7 +5235,7 @@ var WSS_API_URI = process.env.KADOA_WSS_API_URI ?? "wss://realtime.kadoa.com";
5227
5235
  var REALTIME_API_URI = process.env.KADOA_REALTIME_API_URI ?? "https://realtime.kadoa.com";
5228
5236
 
5229
5237
  // src/version.ts
5230
- var SDK_VERSION = "0.19.0";
5238
+ var SDK_VERSION = "0.19.2";
5231
5239
  var SDK_NAME = "kadoa-node-sdk";
5232
5240
  var SDK_LANGUAGE = "node";
5233
5241