@knowledge-stack/ksapi 1.106.2 → 1.107.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/esm/models/ChunkMetadata.d.ts +7 -1
- package/dist/esm/models/ChunkMetadata.js +2 -0
- package/dist/esm/models/DocumentType.d.ts +2 -0
- package/dist/esm/models/DocumentType.js +2 -0
- package/dist/models/ChunkMetadata.d.ts +7 -1
- package/dist/models/ChunkMetadata.js +2 -0
- package/dist/models/DocumentType.d.ts +2 -0
- package/dist/models/DocumentType.js +2 -0
- package/docs/ChunkMetadata.md +2 -0
- package/docs/DocumentsApi.md +1 -1
- package/package.json +1 -1
- package/src/models/ChunkMetadata.ts +9 -1
- package/src/models/DocumentType.ts +2 -0
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @knowledge-stack/ksapi@1.
|
|
1
|
+
# @knowledge-stack/ksapi@1.107.0
|
|
2
2
|
|
|
3
3
|
A TypeScript SDK client for the localhost API.
|
|
4
4
|
|
|
@@ -577,7 +577,7 @@ and is automatically generated by the
|
|
|
577
577
|
[OpenAPI Generator](https://openapi-generator.tech) project:
|
|
578
578
|
|
|
579
579
|
- API version: `0.1.0`
|
|
580
|
-
- Package version: `1.
|
|
580
|
+
- Package version: `1.107.0`
|
|
581
581
|
- Generator version: `7.21.0`
|
|
582
582
|
- Build package: `org.openapitools.codegen.languages.TypeScriptFetchClientCodegen`
|
|
583
583
|
|
|
@@ -30,11 +30,17 @@ export interface ChunkMetadata {
|
|
|
30
30
|
*/
|
|
31
31
|
s3Urls?: Array<string>;
|
|
32
32
|
/**
|
|
33
|
-
* LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text.
|
|
33
|
+
* LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text, and for JSON/YAML chunks (with summarize_for_embedding) as the sole dense embedding text.
|
|
34
34
|
* @type {string}
|
|
35
35
|
* @memberof ChunkMetadata
|
|
36
36
|
*/
|
|
37
37
|
summary?: string | null;
|
|
38
|
+
/**
|
|
39
|
+
* When True, this chunk's dense embedding is built from its LLM-generated summary (see summary) instead of its raw content. Set for parsed JSON/YAML single chunks so noisy structured text does not dominate the vector; the raw content is still kept for display and sparse (keyword) retrieval. Enrichment generates the summary when this is set and summary is empty.
|
|
40
|
+
* @type {boolean}
|
|
41
|
+
* @memberof ChunkMetadata
|
|
42
|
+
*/
|
|
43
|
+
summarizeForEmbedding?: boolean;
|
|
38
44
|
/**
|
|
39
45
|
* S3 URI to extracted PDF text used for LLM grounding during enrichment
|
|
40
46
|
* @type {string}
|
|
@@ -31,6 +31,7 @@ export function ChunkMetadataFromJSONTyped(json, ignoreDiscriminator) {
|
|
|
31
31
|
'polygons': json['polygons'] == null ? undefined : (json['polygons'].map(PolygonReferenceFromJSON)),
|
|
32
32
|
's3Urls': json['s3_urls'] == null ? undefined : json['s3_urls'],
|
|
33
33
|
'summary': json['summary'] == null ? undefined : json['summary'],
|
|
34
|
+
'summarizeForEmbedding': json['summarize_for_embedding'] == null ? undefined : json['summarize_for_embedding'],
|
|
34
35
|
'extractedTextS3Uri': json['extracted_text_s3_uri'] == null ? undefined : json['extracted_text_s3_uri'],
|
|
35
36
|
'secondaryTaxonomy': json['secondary_taxonomy'] == null ? undefined : ImageTaxonomyFromJSON(json['secondary_taxonomy']),
|
|
36
37
|
'sheetName': json['sheet_name'] == null ? undefined : json['sheet_name'],
|
|
@@ -55,6 +56,7 @@ export function ChunkMetadataToJSONTyped(value, ignoreDiscriminator = false) {
|
|
|
55
56
|
'polygons': value['polygons'] == null ? undefined : (value['polygons'].map(PolygonReferenceToJSON)),
|
|
56
57
|
's3_urls': value['s3Urls'],
|
|
57
58
|
'summary': value['summary'],
|
|
59
|
+
'summarize_for_embedding': value['summarizeForEmbedding'],
|
|
58
60
|
'extracted_text_s3_uri': value['extractedTextS3Uri'],
|
|
59
61
|
'secondary_taxonomy': ImageTaxonomyToJSON(value['secondaryTaxonomy']),
|
|
60
62
|
'sheet_name': value['sheetName'],
|
|
@@ -21,6 +21,8 @@ export declare const DocumentType: {
|
|
|
21
21
|
readonly Xlsx: "XLSX";
|
|
22
22
|
readonly Csv: "CSV";
|
|
23
23
|
readonly Pptx: "PPTX";
|
|
24
|
+
readonly Json: "JSON";
|
|
25
|
+
readonly Yaml: "YAML";
|
|
24
26
|
readonly Unknown: "UNKNOWN";
|
|
25
27
|
};
|
|
26
28
|
export type DocumentType = typeof DocumentType[keyof typeof DocumentType];
|
|
@@ -30,11 +30,17 @@ export interface ChunkMetadata {
|
|
|
30
30
|
*/
|
|
31
31
|
s3Urls?: Array<string>;
|
|
32
32
|
/**
|
|
33
|
-
* LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text.
|
|
33
|
+
* LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text, and for JSON/YAML chunks (with summarize_for_embedding) as the sole dense embedding text.
|
|
34
34
|
* @type {string}
|
|
35
35
|
* @memberof ChunkMetadata
|
|
36
36
|
*/
|
|
37
37
|
summary?: string | null;
|
|
38
|
+
/**
|
|
39
|
+
* When True, this chunk's dense embedding is built from its LLM-generated summary (see summary) instead of its raw content. Set for parsed JSON/YAML single chunks so noisy structured text does not dominate the vector; the raw content is still kept for display and sparse (keyword) retrieval. Enrichment generates the summary when this is set and summary is empty.
|
|
40
|
+
* @type {boolean}
|
|
41
|
+
* @memberof ChunkMetadata
|
|
42
|
+
*/
|
|
43
|
+
summarizeForEmbedding?: boolean;
|
|
38
44
|
/**
|
|
39
45
|
* S3 URI to extracted PDF text used for LLM grounding during enrichment
|
|
40
46
|
* @type {string}
|
|
@@ -39,6 +39,7 @@ function ChunkMetadataFromJSONTyped(json, ignoreDiscriminator) {
|
|
|
39
39
|
'polygons': json['polygons'] == null ? undefined : (json['polygons'].map(PolygonReference_1.PolygonReferenceFromJSON)),
|
|
40
40
|
's3Urls': json['s3_urls'] == null ? undefined : json['s3_urls'],
|
|
41
41
|
'summary': json['summary'] == null ? undefined : json['summary'],
|
|
42
|
+
'summarizeForEmbedding': json['summarize_for_embedding'] == null ? undefined : json['summarize_for_embedding'],
|
|
42
43
|
'extractedTextS3Uri': json['extracted_text_s3_uri'] == null ? undefined : json['extracted_text_s3_uri'],
|
|
43
44
|
'secondaryTaxonomy': json['secondary_taxonomy'] == null ? undefined : (0, ImageTaxonomy_1.ImageTaxonomyFromJSON)(json['secondary_taxonomy']),
|
|
44
45
|
'sheetName': json['sheet_name'] == null ? undefined : json['sheet_name'],
|
|
@@ -63,6 +64,7 @@ function ChunkMetadataToJSONTyped(value, ignoreDiscriminator = false) {
|
|
|
63
64
|
'polygons': value['polygons'] == null ? undefined : (value['polygons'].map(PolygonReference_1.PolygonReferenceToJSON)),
|
|
64
65
|
's3_urls': value['s3Urls'],
|
|
65
66
|
'summary': value['summary'],
|
|
67
|
+
'summarize_for_embedding': value['summarizeForEmbedding'],
|
|
66
68
|
'extracted_text_s3_uri': value['extractedTextS3Uri'],
|
|
67
69
|
'secondary_taxonomy': (0, ImageTaxonomy_1.ImageTaxonomyToJSON)(value['secondaryTaxonomy']),
|
|
68
70
|
'sheet_name': value['sheetName'],
|
|
@@ -21,6 +21,8 @@ export declare const DocumentType: {
|
|
|
21
21
|
readonly Xlsx: "XLSX";
|
|
22
22
|
readonly Csv: "CSV";
|
|
23
23
|
readonly Pptx: "PPTX";
|
|
24
|
+
readonly Json: "JSON";
|
|
25
|
+
readonly Yaml: "YAML";
|
|
24
26
|
readonly Unknown: "UNKNOWN";
|
|
25
27
|
};
|
|
26
28
|
export type DocumentType = typeof DocumentType[keyof typeof DocumentType];
|
package/docs/ChunkMetadata.md
CHANGED
|
@@ -10,6 +10,7 @@ Name | Type
|
|
|
10
10
|
`polygons` | [Array<PolygonReference>](PolygonReference.md)
|
|
11
11
|
`s3Urls` | Array<string>
|
|
12
12
|
`summary` | string
|
|
13
|
+
`summarizeForEmbedding` | boolean
|
|
13
14
|
`extractedTextS3Uri` | string
|
|
14
15
|
`secondaryTaxonomy` | [ImageTaxonomy](ImageTaxonomy.md)
|
|
15
16
|
`sheetName` | string
|
|
@@ -32,6 +33,7 @@ const example = {
|
|
|
32
33
|
"polygons": null,
|
|
33
34
|
"s3Urls": null,
|
|
34
35
|
"summary": null,
|
|
36
|
+
"summarizeForEmbedding": null,
|
|
35
37
|
"extractedTextS3Uri": null,
|
|
36
38
|
"secondaryTaxonomy": null,
|
|
37
39
|
"sheetName": null,
|
package/docs/DocumentsApi.md
CHANGED
|
@@ -589,7 +589,7 @@ example().catch(console.error);
|
|
|
589
589
|
| **sortOrder** | `PathOrder` | Sort order for results (default: LOGICAL) | [Optional] [Defaults to `undefined`] [Enum: LOGICAL, NAME, UPDATED_AT, CREATED_AT] |
|
|
590
590
|
| **sortDir** | `SortDirection` | Sort direction; overrides the column\'s natural default | [Optional] [Defaults to `undefined`] [Enum: ASC, DESC] |
|
|
591
591
|
| **ownerId** | `string` | Filter to documents owned by this user | [Optional] [Defaults to `undefined`] |
|
|
592
|
-
| **documentType** | `DocumentType` | Filter to documents of this type | [Optional] [Defaults to `undefined`] [Enum: PDF, DOCX, PLAINTEXT, IMAGE, XLSX, CSV, PPTX, UNKNOWN] |
|
|
592
|
+
| **documentType** | `DocumentType` | Filter to documents of this type | [Optional] [Defaults to `undefined`] [Enum: PDF, DOCX, PLAINTEXT, IMAGE, XLSX, CSV, PPTX, JSON, YAML, UNKNOWN] |
|
|
593
593
|
| **withTags** | `boolean` | Include tags in the response (default: false) | [Optional] [Defaults to `false`] |
|
|
594
594
|
| **limit** | `number` | Number of items per page | [Optional] [Defaults to `20`] |
|
|
595
595
|
| **offset** | `number` | Number of items to skip | [Optional] [Defaults to `0`] |
|
package/package.json
CHANGED
|
@@ -47,11 +47,17 @@ export interface ChunkMetadata {
|
|
|
47
47
|
*/
|
|
48
48
|
s3Urls?: Array<string>;
|
|
49
49
|
/**
|
|
50
|
-
* LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text.
|
|
50
|
+
* LLM-generated summary of the chunk content. Used for TABLE and HTML chunks to enrich embedding text, and for JSON/YAML chunks (with summarize_for_embedding) as the sole dense embedding text.
|
|
51
51
|
* @type {string}
|
|
52
52
|
* @memberof ChunkMetadata
|
|
53
53
|
*/
|
|
54
54
|
summary?: string | null;
|
|
55
|
+
/**
|
|
56
|
+
* When True, this chunk's dense embedding is built from its LLM-generated summary (see summary) instead of its raw content. Set for parsed JSON/YAML single chunks so noisy structured text does not dominate the vector; the raw content is still kept for display and sparse (keyword) retrieval. Enrichment generates the summary when this is set and summary is empty.
|
|
57
|
+
* @type {boolean}
|
|
58
|
+
* @memberof ChunkMetadata
|
|
59
|
+
*/
|
|
60
|
+
summarizeForEmbedding?: boolean;
|
|
55
61
|
/**
|
|
56
62
|
* S3 URI to extracted PDF text used for LLM grounding during enrichment
|
|
57
63
|
* @type {string}
|
|
@@ -159,6 +165,7 @@ export function ChunkMetadataFromJSONTyped(json: any, ignoreDiscriminator: boole
|
|
|
159
165
|
'polygons': json['polygons'] == null ? undefined : ((json['polygons'] as Array<any>).map(PolygonReferenceFromJSON)),
|
|
160
166
|
's3Urls': json['s3_urls'] == null ? undefined : json['s3_urls'],
|
|
161
167
|
'summary': json['summary'] == null ? undefined : json['summary'],
|
|
168
|
+
'summarizeForEmbedding': json['summarize_for_embedding'] == null ? undefined : json['summarize_for_embedding'],
|
|
162
169
|
'extractedTextS3Uri': json['extracted_text_s3_uri'] == null ? undefined : json['extracted_text_s3_uri'],
|
|
163
170
|
'secondaryTaxonomy': json['secondary_taxonomy'] == null ? undefined : ImageTaxonomyFromJSON(json['secondary_taxonomy']),
|
|
164
171
|
'sheetName': json['sheet_name'] == null ? undefined : json['sheet_name'],
|
|
@@ -187,6 +194,7 @@ export function ChunkMetadataToJSONTyped(value?: ChunkMetadata | null, ignoreDis
|
|
|
187
194
|
'polygons': value['polygons'] == null ? undefined : ((value['polygons'] as Array<any>).map(PolygonReferenceToJSON)),
|
|
188
195
|
's3_urls': value['s3Urls'],
|
|
189
196
|
'summary': value['summary'],
|
|
197
|
+
'summarize_for_embedding': value['summarizeForEmbedding'],
|
|
190
198
|
'extracted_text_s3_uri': value['extractedTextS3Uri'],
|
|
191
199
|
'secondary_taxonomy': ImageTaxonomyToJSON(value['secondaryTaxonomy']),
|
|
192
200
|
'sheet_name': value['sheetName'],
|