@botpress/api 0.50.0 → 0.50.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/gen/state.ts CHANGED
@@ -10541,38 +10541,50 @@ export const state = {
10541
10541
  "type": "object",
10542
10542
  "properties": {
10543
10543
  "parsing": {
10544
- "default": {},
10545
10544
  "type": "object",
10546
10545
  "properties": {
10547
10546
  "minimumParagraphLength": {
10548
- "default": 500,
10549
10547
  "type": "integer",
10550
10548
  "minimum": 50,
10551
- "maximum": 2000
10549
+ "maximum": 2000,
10550
+ "description": "The minimum length a standalone paragraph should have. If a paragraph is shorter than this, it will be merged with the next immediate paragraph."
10551
+ },
10552
+ "smartCleanup": {
10553
+ "type": "boolean",
10554
+ "description": "(Team/Enterprise plan only, charged as AI Spend) Enabling this will use a lightweight/inexpensive LLM to clean up the extracted content of PDF files before indexing them to increase the quality of the stored vectors, as PDFs often store raw text in unusual ways which when extracted may result in formatting issues (e.g. broken sentences/paragraphs, unexpected headings, garbled characters, etc.) that can affect retrieval performance for certain user queries if left untouched.\n\nNotes:\n- This feature is only available in Team and Enterprise plans.\n- This feature is only available for PDF files. If the file isn't a PDF, this setting will be ignored and no AI Spend will be incurred.\n- We recommend using this feature for PDFs that have custom layouts or design. For simple text-based PDFs like documents and books, this feature is usually not necessary.\n- The smart cleanup takes some time to perform due to the LLM calls involved, so enabling it will increase the total time it takes to index the file.\n- We take steps to prevent the original text from being fundamentally changed but due to the nature of LLMs this could theoretically still happen so it's recommended to review the passages generated for the file after indexing to ensure the content is still accurate.\n- This feature is limited to the first 30 pages or 20 KB of text in the PDF file (whichever comes first). If the file has more content than these limits then the rest of the file will be indexed as-is without any cleanup. If you need to clean up the content of the entire file, consider splitting it into smaller files."
10552
10555
  }
10553
10556
  },
10554
10557
  "additionalProperties": false
10555
10558
  },
10556
10559
  "chunking": {
10557
- "default": {},
10558
10560
  "type": "object",
10559
10561
  "properties": {
10560
10562
  "maximumChunkLength": {
10561
- "default": 1250,
10562
10563
  "type": "integer",
10563
10564
  "minimum": 100,
10564
- "maximum": 5000
10565
+ "maximum": 5000,
10566
+ "description": "The maximum length of a chunk in characters."
10567
+ },
10568
+ "embeddedContextLevels": {
10569
+ "type": "integer",
10570
+ "minimum": 0,
10571
+ "maximum": 3,
10572
+ "description": "The number of surrounding context levels to include in the vector embedding of the chunk."
10573
+ },
10574
+ "embedBreadcrumb": {
10575
+ "type": "boolean",
10576
+ "description": "Include the breadcrumb of the chunk in the vector embedding."
10565
10577
  }
10566
10578
  },
10567
10579
  "additionalProperties": false
10568
10580
  },
10569
10581
  "summarization": {
10570
- "default": {},
10571
10582
  "type": "object",
10572
10583
  "properties": {
10573
10584
  "enable": {
10574
10585
  "default": false,
10575
- "type": "boolean"
10586
+ "type": "boolean",
10587
+ "description": "(Team/Enterprise plan only, charged as AI Spend) Create summaries for this file and index them as standalone vectors. Enabling this option will incur in AI Spend cost (charged to the workspace of the bot) to generate the summaries based on the amount of content in the file and the summarization model used.\n\nPlease note that this feature is only available in Team and Enterprise plans."
10576
10588
  },
10577
10589
  "modelType": {
10578
10590
  "default": "balanced",
@@ -10581,23 +10593,24 @@ export const state = {
10581
10593
  "inexpensive",
10582
10594
  "balanced",
10583
10595
  "accurate"
10584
- ]
10596
+ ],
10597
+ "description": "The model type to use for summarization."
10585
10598
  },
10586
10599
  "minimumInputLength": {
10587
- "default": 3000,
10588
10600
  "type": "integer",
10589
10601
  "minimum": 1000,
10590
- "maximum": 10000
10602
+ "maximum": 10000,
10603
+ "description": "The minimum length a section of the file should have to create a summary of it."
10591
10604
  },
10592
10605
  "outputTokenLimit": {
10593
- "default": 2000,
10594
10606
  "type": "integer",
10595
10607
  "minimum": 1000,
10596
- "maximum": 10000
10608
+ "maximum": 10000,
10609
+ "description": "The maximum length of a summary (in tokens)."
10597
10610
  },
10598
10611
  "generateMasterSummary": {
10599
- "default": true,
10600
- "type": "boolean"
10612
+ "type": "boolean",
10613
+ "description": "Generate a summary of the entire file and index it as a standalone vector."
10601
10614
  }
10602
10615
  },
10603
10616
  "additionalProperties": false
@@ -12418,7 +12431,7 @@ export const state = {
12418
12431
  "title": "Botpress API",
12419
12432
  "description": "API for Botpress Cloud",
12420
12433
  "server": "https://api.botpress.cloud",
12421
- "version": "0.50.0",
12434
+ "version": "0.50.2",
12422
12435
  "prefix": "v1"
12423
12436
  },
12424
12437
  "errors": [