@knowledge-stack/ksapi 1.106.2 → 1.107.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @knowledge-stack/ksapi@1.106.2
1
+ # @knowledge-stack/ksapi@1.107.0
2
2
 
3
3
  A TypeScript SDK client for the localhost API.
4
4
 
@@ -577,7 +577,7 @@ and is automatically generated by the
577
577
  [OpenAPI Generator](https://openapi-generator.tech) project:
578
578
 
579
579
  - API version: `0.1.0`
580
- - Package version: `1.106.2`
580
+ - Package version: `1.107.0`
581
581
  - Generator version: `7.21.0`
582
582
  - Build package: `org.openapitools.codegen.languages.TypeScriptFetchClientCodegen`
583
583
 
@@ -30,11 +30,17 @@ export interface ChunkMetadata {
30
30
  */
31
31
  s3Urls?: Array<string>;
32
32
  /**
33
- * LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text.
33
+ * LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text, and for JSON/YAML chunks (with summarize_for_embedding) as the sole dense embedding text.
34
34
  * @type {string}
35
35
  * @memberof ChunkMetadata
36
36
  */
37
37
  summary?: string | null;
38
+ /**
39
+ * When True, this chunk's dense embedding is built from its LLM-generated summary (see summary) instead of its raw content. Set for parsed JSON/YAML single chunks so noisy structured text does not dominate the vector; the raw content is still kept for display and sparse (keyword) retrieval. Enrichment generates the summary when this is set and summary is empty.
40
+ * @type {boolean}
41
+ * @memberof ChunkMetadata
42
+ */
43
+ summarizeForEmbedding?: boolean;
38
44
  /**
39
45
  * S3 URI to extracted PDF text used for LLM grounding during enrichment
40
46
  * @type {string}
@@ -31,6 +31,7 @@ export function ChunkMetadataFromJSONTyped(json, ignoreDiscriminator) {
31
31
  'polygons': json['polygons'] == null ? undefined : (json['polygons'].map(PolygonReferenceFromJSON)),
32
32
  's3Urls': json['s3_urls'] == null ? undefined : json['s3_urls'],
33
33
  'summary': json['summary'] == null ? undefined : json['summary'],
34
+ 'summarizeForEmbedding': json['summarize_for_embedding'] == null ? undefined : json['summarize_for_embedding'],
34
35
  'extractedTextS3Uri': json['extracted_text_s3_uri'] == null ? undefined : json['extracted_text_s3_uri'],
35
36
  'secondaryTaxonomy': json['secondary_taxonomy'] == null ? undefined : ImageTaxonomyFromJSON(json['secondary_taxonomy']),
36
37
  'sheetName': json['sheet_name'] == null ? undefined : json['sheet_name'],
@@ -55,6 +56,7 @@ export function ChunkMetadataToJSONTyped(value, ignoreDiscriminator = false) {
55
56
  'polygons': value['polygons'] == null ? undefined : (value['polygons'].map(PolygonReferenceToJSON)),
56
57
  's3_urls': value['s3Urls'],
57
58
  'summary': value['summary'],
59
+ 'summarize_for_embedding': value['summarizeForEmbedding'],
58
60
  'extracted_text_s3_uri': value['extractedTextS3Uri'],
59
61
  'secondary_taxonomy': ImageTaxonomyToJSON(value['secondaryTaxonomy']),
60
62
  'sheet_name': value['sheetName'],
@@ -21,6 +21,8 @@ export declare const DocumentType: {
21
21
  readonly Xlsx: "XLSX";
22
22
  readonly Csv: "CSV";
23
23
  readonly Pptx: "PPTX";
24
+ readonly Json: "JSON";
25
+ readonly Yaml: "YAML";
24
26
  readonly Unknown: "UNKNOWN";
25
27
  };
26
28
  export type DocumentType = typeof DocumentType[keyof typeof DocumentType];
@@ -23,6 +23,8 @@ export const DocumentType = {
23
23
  Xlsx: 'XLSX',
24
24
  Csv: 'CSV',
25
25
  Pptx: 'PPTX',
26
+ Json: 'JSON',
27
+ Yaml: 'YAML',
26
28
  Unknown: 'UNKNOWN'
27
29
  };
28
30
  export function instanceOfDocumentType(value) {
@@ -30,11 +30,17 @@ export interface ChunkMetadata {
30
30
  */
31
31
  s3Urls?: Array<string>;
32
32
  /**
33
- * LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text.
33
+ * LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text, and for JSON/YAML chunks (with summarize_for_embedding) as the sole dense embedding text.
34
34
  * @type {string}
35
35
  * @memberof ChunkMetadata
36
36
  */
37
37
  summary?: string | null;
38
+ /**
39
+ * When True, this chunk's dense embedding is built from its LLM-generated summary (see summary) instead of its raw content. Set for parsed JSON/YAML single chunks so noisy structured text does not dominate the vector; the raw content is still kept for display and sparse (keyword) retrieval. Enrichment generates the summary when this is set and summary is empty.
40
+ * @type {boolean}
41
+ * @memberof ChunkMetadata
42
+ */
43
+ summarizeForEmbedding?: boolean;
38
44
  /**
39
45
  * S3 URI to extracted PDF text used for LLM grounding during enrichment
40
46
  * @type {string}
@@ -39,6 +39,7 @@ function ChunkMetadataFromJSONTyped(json, ignoreDiscriminator) {
39
39
  'polygons': json['polygons'] == null ? undefined : (json['polygons'].map(PolygonReference_1.PolygonReferenceFromJSON)),
40
40
  's3Urls': json['s3_urls'] == null ? undefined : json['s3_urls'],
41
41
  'summary': json['summary'] == null ? undefined : json['summary'],
42
+ 'summarizeForEmbedding': json['summarize_for_embedding'] == null ? undefined : json['summarize_for_embedding'],
42
43
  'extractedTextS3Uri': json['extracted_text_s3_uri'] == null ? undefined : json['extracted_text_s3_uri'],
43
44
  'secondaryTaxonomy': json['secondary_taxonomy'] == null ? undefined : (0, ImageTaxonomy_1.ImageTaxonomyFromJSON)(json['secondary_taxonomy']),
44
45
  'sheetName': json['sheet_name'] == null ? undefined : json['sheet_name'],
@@ -63,6 +64,7 @@ function ChunkMetadataToJSONTyped(value, ignoreDiscriminator = false) {
63
64
  'polygons': value['polygons'] == null ? undefined : (value['polygons'].map(PolygonReference_1.PolygonReferenceToJSON)),
64
65
  's3_urls': value['s3Urls'],
65
66
  'summary': value['summary'],
67
+ 'summarize_for_embedding': value['summarizeForEmbedding'],
66
68
  'extracted_text_s3_uri': value['extractedTextS3Uri'],
67
69
  'secondary_taxonomy': (0, ImageTaxonomy_1.ImageTaxonomyToJSON)(value['secondaryTaxonomy']),
68
70
  'sheet_name': value['sheetName'],
@@ -21,6 +21,8 @@ export declare const DocumentType: {
21
21
  readonly Xlsx: "XLSX";
22
22
  readonly Csv: "CSV";
23
23
  readonly Pptx: "PPTX";
24
+ readonly Json: "JSON";
25
+ readonly Yaml: "YAML";
24
26
  readonly Unknown: "UNKNOWN";
25
27
  };
26
28
  export type DocumentType = typeof DocumentType[keyof typeof DocumentType];
@@ -31,6 +31,8 @@ exports.DocumentType = {
31
31
  Xlsx: 'XLSX',
32
32
  Csv: 'CSV',
33
33
  Pptx: 'PPTX',
34
+ Json: 'JSON',
35
+ Yaml: 'YAML',
34
36
  Unknown: 'UNKNOWN'
35
37
  };
36
38
  function instanceOfDocumentType(value) {
@@ -10,6 +10,7 @@ Name | Type
10
10
  `polygons` | [Array&lt;PolygonReference&gt;](PolygonReference.md)
11
11
  `s3Urls` | Array&lt;string&gt;
12
12
  `summary` | string
13
+ `summarizeForEmbedding` | boolean
13
14
  `extractedTextS3Uri` | string
14
15
  `secondaryTaxonomy` | [ImageTaxonomy](ImageTaxonomy.md)
15
16
  `sheetName` | string
@@ -32,6 +33,7 @@ const example = {
32
33
  "polygons": null,
33
34
  "s3Urls": null,
34
35
  "summary": null,
36
+ "summarizeForEmbedding": null,
35
37
  "extractedTextS3Uri": null,
36
38
  "secondaryTaxonomy": null,
37
39
  "sheetName": null,
@@ -589,7 +589,7 @@ example().catch(console.error);
589
589
  | **sortOrder** | `PathOrder` | Sort order for results (default: LOGICAL) | [Optional] [Defaults to `undefined`] [Enum: LOGICAL, NAME, UPDATED_AT, CREATED_AT] |
590
590
  | **sortDir** | `SortDirection` | Sort direction; overrides the column\&#39;s natural default | [Optional] [Defaults to `undefined`] [Enum: ASC, DESC] |
591
591
  | **ownerId** | `string` | Filter to documents owned by this user | [Optional] [Defaults to `undefined`] |
592
- | **documentType** | `DocumentType` | Filter to documents of this type | [Optional] [Defaults to `undefined`] [Enum: PDF, DOCX, PLAINTEXT, IMAGE, XLSX, CSV, PPTX, UNKNOWN] |
592
+ | **documentType** | `DocumentType` | Filter to documents of this type | [Optional] [Defaults to `undefined`] [Enum: PDF, DOCX, PLAINTEXT, IMAGE, XLSX, CSV, PPTX, JSON, YAML, UNKNOWN] |
593
593
  | **withTags** | `boolean` | Include tags in the response (default: false) | [Optional] [Defaults to `false`] |
594
594
  | **limit** | `number` | Number of items per page | [Optional] [Defaults to `20`] |
595
595
  | **offset** | `number` | Number of items to skip | [Optional] [Defaults to `0`] |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@knowledge-stack/ksapi",
3
- "version": "1.106.2",
3
+ "version": "1.107.0",
4
4
  "description": "OpenAPI client for @knowledge-stack/ksapi",
5
5
  "author": "OpenAPI-Generator",
6
6
  "repository": {
@@ -47,11 +47,17 @@ export interface ChunkMetadata {
47
47
  */
48
48
  s3Urls?: Array<string>;
49
49
  /**
50
- * LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text.
50
+ * LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text, and for JSON/YAML chunks (with summarize_for_embedding) as the sole dense embedding text.
51
51
  * @type {string}
52
52
  * @memberof ChunkMetadata
53
53
  */
54
54
  summary?: string | null;
55
+ /**
56
+ * When True, this chunk's dense embedding is built from its LLM-generated summary (see summary) instead of its raw content. Set for parsed JSON/YAML single chunks so noisy structured text does not dominate the vector; the raw content is still kept for display and sparse (keyword) retrieval. Enrichment generates the summary when this is set and summary is empty.
57
+ * @type {boolean}
58
+ * @memberof ChunkMetadata
59
+ */
60
+ summarizeForEmbedding?: boolean;
55
61
  /**
56
62
  * S3 URI to extracted PDF text used for LLM grounding during enrichment
57
63
  * @type {string}
@@ -159,6 +165,7 @@ export function ChunkMetadataFromJSONTyped(json: any, ignoreDiscriminator: boole
159
165
  'polygons': json['polygons'] == null ? undefined : ((json['polygons'] as Array<any>).map(PolygonReferenceFromJSON)),
160
166
  's3Urls': json['s3_urls'] == null ? undefined : json['s3_urls'],
161
167
  'summary': json['summary'] == null ? undefined : json['summary'],
168
+ 'summarizeForEmbedding': json['summarize_for_embedding'] == null ? undefined : json['summarize_for_embedding'],
162
169
  'extractedTextS3Uri': json['extracted_text_s3_uri'] == null ? undefined : json['extracted_text_s3_uri'],
163
170
  'secondaryTaxonomy': json['secondary_taxonomy'] == null ? undefined : ImageTaxonomyFromJSON(json['secondary_taxonomy']),
164
171
  'sheetName': json['sheet_name'] == null ? undefined : json['sheet_name'],
@@ -187,6 +194,7 @@ export function ChunkMetadataToJSONTyped(value?: ChunkMetadata | null, ignoreDis
187
194
  'polygons': value['polygons'] == null ? undefined : ((value['polygons'] as Array<any>).map(PolygonReferenceToJSON)),
188
195
  's3_urls': value['s3Urls'],
189
196
  'summary': value['summary'],
197
+ 'summarize_for_embedding': value['summarizeForEmbedding'],
190
198
  'extracted_text_s3_uri': value['extractedTextS3Uri'],
191
199
  'secondary_taxonomy': ImageTaxonomyToJSON(value['secondaryTaxonomy']),
192
200
  'sheet_name': value['sheetName'],
@@ -25,6 +25,8 @@ export const DocumentType = {
25
25
  Xlsx: 'XLSX',
26
26
  Csv: 'CSV',
27
27
  Pptx: 'PPTX',
28
+ Json: 'JSON',
29
+ Yaml: 'YAML',
28
30
  Unknown: 'UNKNOWN'
29
31
  } as const;
30
32
  export type DocumentType = typeof DocumentType[keyof typeof DocumentType];