node-type-registry 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/data/data-chunks.d.ts +18 -0
- package/data/data-chunks.js +101 -0
- package/data/data-file-embedding.js +17 -9
- package/data/index.d.ts +1 -0
- package/data/index.js +3 -1
- package/esm/data/data-chunks.d.ts +18 -0
- package/esm/data/data-chunks.js +98 -0
- package/esm/data/data-file-embedding.js +17 -9
- package/esm/data/index.d.ts +1 -0
- package/esm/data/index.js +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { NodeTypeDefinition } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Standalone chunking node type.
|
|
4
|
+
*
|
|
5
|
+
* Creates an embedding_chunks record that provisions a chunks table with:
|
|
6
|
+
* - FK to parent table (CASCADE delete)
|
|
7
|
+
* - content text field
|
|
8
|
+
* - chunk_index integer field
|
|
9
|
+
* - embedding vector(N) field with HNSW index
|
|
10
|
+
* - metadata jsonb field
|
|
11
|
+
* - RLS policies inherited from parent
|
|
12
|
+
* - Optional job trigger for automatic chunking on INSERT/UPDATE
|
|
13
|
+
*
|
|
14
|
+
* This node is also composed internally by DataFileEmbedding (enabled by
|
|
15
|
+
* default in extract mode). Use it standalone when you want a chunks table
|
|
16
|
+
* without the full file-embedding pipeline.
|
|
17
|
+
*/
|
|
18
|
+
export declare const DataChunks: NodeTypeDefinition;
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DataChunks = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Standalone chunking node type.
|
|
6
|
+
*
|
|
7
|
+
* Creates an embedding_chunks record that provisions a chunks table with:
|
|
8
|
+
* - FK to parent table (CASCADE delete)
|
|
9
|
+
* - content text field
|
|
10
|
+
* - chunk_index integer field
|
|
11
|
+
* - embedding vector(N) field with HNSW index
|
|
12
|
+
* - metadata jsonb field
|
|
13
|
+
* - RLS policies inherited from parent
|
|
14
|
+
* - Optional job trigger for automatic chunking on INSERT/UPDATE
|
|
15
|
+
*
|
|
16
|
+
* This node is also composed internally by DataFileEmbedding (enabled by
|
|
17
|
+
* default in extract mode). Use it standalone when you want a chunks table
|
|
18
|
+
* without the full file-embedding pipeline.
|
|
19
|
+
*/
|
|
20
|
+
exports.DataChunks = {
|
|
21
|
+
name: 'DataChunks',
|
|
22
|
+
slug: 'data_chunks',
|
|
23
|
+
category: 'data',
|
|
24
|
+
display_name: 'Chunks',
|
|
25
|
+
description: 'Creates a chunked-embedding child table for any parent table. ' +
|
|
26
|
+
'Provisions the chunks table with content, chunk_index, embedding vector, ' +
|
|
27
|
+
'metadata, HNSW index, inherited RLS, and optional job trigger for ' +
|
|
28
|
+
'automatic text splitting. Composed internally by DataFileEmbedding ' +
|
|
29
|
+
'(enabled by default in extract mode) but can also be used standalone.',
|
|
30
|
+
parameter_schema: {
|
|
31
|
+
type: 'object',
|
|
32
|
+
properties: {
|
|
33
|
+
// ── Content config ─────────────────────────────────────────────
|
|
34
|
+
content_field_name: {
|
|
35
|
+
type: 'string',
|
|
36
|
+
format: 'column-ref',
|
|
37
|
+
description: 'Name of the text content column in the chunks table',
|
|
38
|
+
default: 'content'
|
|
39
|
+
},
|
|
40
|
+
// ── Chunking strategy ──────────────────────────────────────────
|
|
41
|
+
chunk_size: {
|
|
42
|
+
type: 'integer',
|
|
43
|
+
description: 'Maximum number of characters per chunk',
|
|
44
|
+
default: 1000
|
|
45
|
+
},
|
|
46
|
+
chunk_overlap: {
|
|
47
|
+
type: 'integer',
|
|
48
|
+
description: 'Number of overlapping characters between consecutive chunks',
|
|
49
|
+
default: 200
|
|
50
|
+
},
|
|
51
|
+
chunk_strategy: {
|
|
52
|
+
type: 'string',
|
|
53
|
+
enum: ['fixed', 'sentence', 'paragraph', 'semantic'],
|
|
54
|
+
description: 'Strategy for splitting text into chunks',
|
|
55
|
+
default: 'paragraph'
|
|
56
|
+
},
|
|
57
|
+
// ── Embedding config ───────────────────────────────────────────
|
|
58
|
+
dimensions: {
|
|
59
|
+
type: 'integer',
|
|
60
|
+
description: 'Vector dimensions for per-chunk embeddings',
|
|
61
|
+
default: 768
|
|
62
|
+
},
|
|
63
|
+
metric: {
|
|
64
|
+
type: 'string',
|
|
65
|
+
enum: ['cosine', 'l2', 'ip'],
|
|
66
|
+
description: 'Distance metric for the HNSW index on chunk embeddings',
|
|
67
|
+
default: 'cosine'
|
|
68
|
+
},
|
|
69
|
+
// ── Table naming ───────────────────────────────────────────────
|
|
70
|
+
chunks_table_name: {
|
|
71
|
+
type: 'string',
|
|
72
|
+
description: 'Override the chunks table name. Defaults to {parent_table}_chunks.',
|
|
73
|
+
},
|
|
74
|
+
// ── Metadata ───────────────────────────────────────────────────
|
|
75
|
+
metadata_fields: {
|
|
76
|
+
type: 'array',
|
|
77
|
+
items: { type: 'string' },
|
|
78
|
+
description: 'Field names from the parent table to copy into chunk metadata'
|
|
79
|
+
},
|
|
80
|
+
// ── Job trigger ────────────────────────────────────────────────
|
|
81
|
+
enqueue_chunking_job: {
|
|
82
|
+
type: 'boolean',
|
|
83
|
+
description: 'Whether to create a job trigger that auto-enqueues chunking ' +
|
|
84
|
+
'on parent INSERT/UPDATE',
|
|
85
|
+
default: true
|
|
86
|
+
},
|
|
87
|
+
chunking_task_name: {
|
|
88
|
+
type: 'string',
|
|
89
|
+
description: 'Task identifier for the chunking job queue',
|
|
90
|
+
default: 'generate_chunks'
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
tags: [
|
|
95
|
+
'embedding',
|
|
96
|
+
'chunks',
|
|
97
|
+
'vector',
|
|
98
|
+
'ai',
|
|
99
|
+
'rag'
|
|
100
|
+
]
|
|
101
|
+
};
|
|
@@ -9,9 +9,10 @@ exports.DataFileEmbedding = {
|
|
|
9
9
|
description: 'Generic, MIME-scoped embedding node for file tables. Supports two modes: ' +
|
|
10
10
|
'direct (whole-file to single vector, e.g. CLIP for images) when extraction ' +
|
|
11
11
|
'is omitted, or extract (file to text to chunks to per-chunk vectors) when ' +
|
|
12
|
-
'extraction config is provided. Composes SearchVector + DataJobTrigger ' +
|
|
13
|
-
'
|
|
14
|
-
'MIME scopes, field
|
|
12
|
+
'extraction config is provided. Composes SearchVector + DataJobTrigger + ' +
|
|
13
|
+
'DataChunks (enabled by default in extract mode) internally. Multiple ' +
|
|
14
|
+
'instances can coexist on the same table with different MIME scopes, field ' +
|
|
15
|
+
'names, and embedding strategies.',
|
|
15
16
|
parameter_schema: {
|
|
16
17
|
type: 'object',
|
|
17
18
|
properties: {
|
|
@@ -114,12 +115,18 @@ exports.DataFileEmbedding = {
|
|
|
114
115
|
}
|
|
115
116
|
}
|
|
116
117
|
},
|
|
117
|
-
// ── Chunking
|
|
118
|
+
// ── Chunking (enabled by default in extract mode) ──────────────
|
|
119
|
+
include_chunks: {
|
|
120
|
+
type: 'boolean',
|
|
121
|
+
description: 'Whether to create a chunks table via DataChunks. Defaults to true ' +
|
|
122
|
+
'when extraction is provided, false in direct mode. Set explicitly ' +
|
|
123
|
+
'to override.',
|
|
124
|
+
},
|
|
118
125
|
chunks: {
|
|
119
126
|
type: 'object',
|
|
120
|
-
description: 'Chunking configuration
|
|
121
|
-
'
|
|
122
|
-
'
|
|
127
|
+
description: 'Chunking configuration passed through to DataChunks. When ' +
|
|
128
|
+
'include_chunks is true (or defaults to true in extract mode), these ' +
|
|
129
|
+
'params configure the chunks table, embedding dimensions, strategy, etc.',
|
|
123
130
|
properties: {
|
|
124
131
|
content_field_name: {
|
|
125
132
|
type: 'string',
|
|
@@ -144,8 +151,9 @@ exports.DataFileEmbedding = {
|
|
|
144
151
|
default: 'paragraph'
|
|
145
152
|
},
|
|
146
153
|
metadata_fields: {
|
|
147
|
-
type: '
|
|
148
|
-
|
|
154
|
+
type: 'array',
|
|
155
|
+
items: { type: 'string' },
|
|
156
|
+
description: 'Field names from parent to copy into chunk metadata'
|
|
149
157
|
},
|
|
150
158
|
enqueue_chunking_job: {
|
|
151
159
|
type: 'boolean',
|
package/data/index.d.ts
CHANGED
package/data/index.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.TableUserSettings = exports.TableUserProfiles = exports.TableOrganizationSettings = exports.SearchVector = exports.SearchUnified = exports.SearchTrgm = exports.SearchSpatialAggregate = exports.SearchSpatial = exports.SearchFullText = exports.SearchBm25 = exports.DataTimestamps = exports.DataTags = exports.DataStatusField = exports.DataSoftDelete = exports.DataSlug = exports.DataPublishable = exports.DataPeoplestamps = exports.DataOwnershipInEntity = exports.DataOwnedFields = exports.DataJsonb = exports.DataLimitCounter = exports.DataJobTrigger = exports.DataInheritFromParent = exports.DataInflection = exports.DataImmutableFields = exports.DataImageEmbedding = exports.DataId = exports.DataForceCurrentUser = exports.DataFeatureFlag = exports.DataFileEmbedding = exports.DataEntityMembership = exports.DataDirectOwner = exports.DataCompositeField = void 0;
|
|
3
|
+
exports.TableUserSettings = exports.TableUserProfiles = exports.TableOrganizationSettings = exports.SearchVector = exports.SearchUnified = exports.SearchTrgm = exports.SearchSpatialAggregate = exports.SearchSpatial = exports.SearchFullText = exports.SearchBm25 = exports.DataTimestamps = exports.DataTags = exports.DataStatusField = exports.DataSoftDelete = exports.DataSlug = exports.DataPublishable = exports.DataPeoplestamps = exports.DataOwnershipInEntity = exports.DataOwnedFields = exports.DataJsonb = exports.DataLimitCounter = exports.DataJobTrigger = exports.DataInheritFromParent = exports.DataInflection = exports.DataImmutableFields = exports.DataImageEmbedding = exports.DataId = exports.DataForceCurrentUser = exports.DataFeatureFlag = exports.DataFileEmbedding = exports.DataEntityMembership = exports.DataDirectOwner = exports.DataCompositeField = exports.DataChunks = void 0;
|
|
4
|
+
var data_chunks_1 = require("./data-chunks");
|
|
5
|
+
Object.defineProperty(exports, "DataChunks", { enumerable: true, get: function () { return data_chunks_1.DataChunks; } });
|
|
4
6
|
var data_composite_field_1 = require("./data-composite-field");
|
|
5
7
|
Object.defineProperty(exports, "DataCompositeField", { enumerable: true, get: function () { return data_composite_field_1.DataCompositeField; } });
|
|
6
8
|
var data_direct_owner_1 = require("./data-direct-owner");
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { NodeTypeDefinition } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Standalone chunking node type.
|
|
4
|
+
*
|
|
5
|
+
* Creates an embedding_chunks record that provisions a chunks table with:
|
|
6
|
+
* - FK to parent table (CASCADE delete)
|
|
7
|
+
* - content text field
|
|
8
|
+
* - chunk_index integer field
|
|
9
|
+
* - embedding vector(N) field with HNSW index
|
|
10
|
+
* - metadata jsonb field
|
|
11
|
+
* - RLS policies inherited from parent
|
|
12
|
+
* - Optional job trigger for automatic chunking on INSERT/UPDATE
|
|
13
|
+
*
|
|
14
|
+
* This node is also composed internally by DataFileEmbedding (enabled by
|
|
15
|
+
* default in extract mode). Use it standalone when you want a chunks table
|
|
16
|
+
* without the full file-embedding pipeline.
|
|
17
|
+
*/
|
|
18
|
+
export declare const DataChunks: NodeTypeDefinition;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standalone chunking node type.
|
|
3
|
+
*
|
|
4
|
+
* Creates an embedding_chunks record that provisions a chunks table with:
|
|
5
|
+
* - FK to parent table (CASCADE delete)
|
|
6
|
+
* - content text field
|
|
7
|
+
* - chunk_index integer field
|
|
8
|
+
* - embedding vector(N) field with HNSW index
|
|
9
|
+
* - metadata jsonb field
|
|
10
|
+
* - RLS policies inherited from parent
|
|
11
|
+
* - Optional job trigger for automatic chunking on INSERT/UPDATE
|
|
12
|
+
*
|
|
13
|
+
* This node is also composed internally by DataFileEmbedding (enabled by
|
|
14
|
+
* default in extract mode). Use it standalone when you want a chunks table
|
|
15
|
+
* without the full file-embedding pipeline.
|
|
16
|
+
*/
|
|
17
|
+
export const DataChunks = {
|
|
18
|
+
name: 'DataChunks',
|
|
19
|
+
slug: 'data_chunks',
|
|
20
|
+
category: 'data',
|
|
21
|
+
display_name: 'Chunks',
|
|
22
|
+
description: 'Creates a chunked-embedding child table for any parent table. ' +
|
|
23
|
+
'Provisions the chunks table with content, chunk_index, embedding vector, ' +
|
|
24
|
+
'metadata, HNSW index, inherited RLS, and optional job trigger for ' +
|
|
25
|
+
'automatic text splitting. Composed internally by DataFileEmbedding ' +
|
|
26
|
+
'(enabled by default in extract mode) but can also be used standalone.',
|
|
27
|
+
parameter_schema: {
|
|
28
|
+
type: 'object',
|
|
29
|
+
properties: {
|
|
30
|
+
// ── Content config ─────────────────────────────────────────────
|
|
31
|
+
content_field_name: {
|
|
32
|
+
type: 'string',
|
|
33
|
+
format: 'column-ref',
|
|
34
|
+
description: 'Name of the text content column in the chunks table',
|
|
35
|
+
default: 'content'
|
|
36
|
+
},
|
|
37
|
+
// ── Chunking strategy ──────────────────────────────────────────
|
|
38
|
+
chunk_size: {
|
|
39
|
+
type: 'integer',
|
|
40
|
+
description: 'Maximum number of characters per chunk',
|
|
41
|
+
default: 1000
|
|
42
|
+
},
|
|
43
|
+
chunk_overlap: {
|
|
44
|
+
type: 'integer',
|
|
45
|
+
description: 'Number of overlapping characters between consecutive chunks',
|
|
46
|
+
default: 200
|
|
47
|
+
},
|
|
48
|
+
chunk_strategy: {
|
|
49
|
+
type: 'string',
|
|
50
|
+
enum: ['fixed', 'sentence', 'paragraph', 'semantic'],
|
|
51
|
+
description: 'Strategy for splitting text into chunks',
|
|
52
|
+
default: 'paragraph'
|
|
53
|
+
},
|
|
54
|
+
// ── Embedding config ───────────────────────────────────────────
|
|
55
|
+
dimensions: {
|
|
56
|
+
type: 'integer',
|
|
57
|
+
description: 'Vector dimensions for per-chunk embeddings',
|
|
58
|
+
default: 768
|
|
59
|
+
},
|
|
60
|
+
metric: {
|
|
61
|
+
type: 'string',
|
|
62
|
+
enum: ['cosine', 'l2', 'ip'],
|
|
63
|
+
description: 'Distance metric for the HNSW index on chunk embeddings',
|
|
64
|
+
default: 'cosine'
|
|
65
|
+
},
|
|
66
|
+
// ── Table naming ───────────────────────────────────────────────
|
|
67
|
+
chunks_table_name: {
|
|
68
|
+
type: 'string',
|
|
69
|
+
description: 'Override the chunks table name. Defaults to {parent_table}_chunks.',
|
|
70
|
+
},
|
|
71
|
+
// ── Metadata ───────────────────────────────────────────────────
|
|
72
|
+
metadata_fields: {
|
|
73
|
+
type: 'array',
|
|
74
|
+
items: { type: 'string' },
|
|
75
|
+
description: 'Field names from the parent table to copy into chunk metadata'
|
|
76
|
+
},
|
|
77
|
+
// ── Job trigger ────────────────────────────────────────────────
|
|
78
|
+
enqueue_chunking_job: {
|
|
79
|
+
type: 'boolean',
|
|
80
|
+
description: 'Whether to create a job trigger that auto-enqueues chunking ' +
|
|
81
|
+
'on parent INSERT/UPDATE',
|
|
82
|
+
default: true
|
|
83
|
+
},
|
|
84
|
+
chunking_task_name: {
|
|
85
|
+
type: 'string',
|
|
86
|
+
description: 'Task identifier for the chunking job queue',
|
|
87
|
+
default: 'generate_chunks'
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
tags: [
|
|
92
|
+
'embedding',
|
|
93
|
+
'chunks',
|
|
94
|
+
'vector',
|
|
95
|
+
'ai',
|
|
96
|
+
'rag'
|
|
97
|
+
]
|
|
98
|
+
};
|
|
@@ -6,9 +6,10 @@ export const DataFileEmbedding = {
|
|
|
6
6
|
description: 'Generic, MIME-scoped embedding node for file tables. Supports two modes: ' +
|
|
7
7
|
'direct (whole-file to single vector, e.g. CLIP for images) when extraction ' +
|
|
8
8
|
'is omitted, or extract (file to text to chunks to per-chunk vectors) when ' +
|
|
9
|
-
'extraction config is provided. Composes SearchVector + DataJobTrigger ' +
|
|
10
|
-
'
|
|
11
|
-
'MIME scopes, field
|
|
9
|
+
'extraction config is provided. Composes SearchVector + DataJobTrigger + ' +
|
|
10
|
+
'DataChunks (enabled by default in extract mode) internally. Multiple ' +
|
|
11
|
+
'instances can coexist on the same table with different MIME scopes, field ' +
|
|
12
|
+
'names, and embedding strategies.',
|
|
12
13
|
parameter_schema: {
|
|
13
14
|
type: 'object',
|
|
14
15
|
properties: {
|
|
@@ -111,12 +112,18 @@ export const DataFileEmbedding = {
|
|
|
111
112
|
}
|
|
112
113
|
}
|
|
113
114
|
},
|
|
114
|
-
// ── Chunking
|
|
115
|
+
// ── Chunking (enabled by default in extract mode) ──────────────
|
|
116
|
+
include_chunks: {
|
|
117
|
+
type: 'boolean',
|
|
118
|
+
description: 'Whether to create a chunks table via DataChunks. Defaults to true ' +
|
|
119
|
+
'when extraction is provided, false in direct mode. Set explicitly ' +
|
|
120
|
+
'to override.',
|
|
121
|
+
},
|
|
115
122
|
chunks: {
|
|
116
123
|
type: 'object',
|
|
117
|
-
description: 'Chunking configuration
|
|
118
|
-
'
|
|
119
|
-
'
|
|
124
|
+
description: 'Chunking configuration passed through to DataChunks. When ' +
|
|
125
|
+
'include_chunks is true (or defaults to true in extract mode), these ' +
|
|
126
|
+
'params configure the chunks table, embedding dimensions, strategy, etc.',
|
|
120
127
|
properties: {
|
|
121
128
|
content_field_name: {
|
|
122
129
|
type: 'string',
|
|
@@ -141,8 +148,9 @@ export const DataFileEmbedding = {
|
|
|
141
148
|
default: 'paragraph'
|
|
142
149
|
},
|
|
143
150
|
metadata_fields: {
|
|
144
|
-
type: '
|
|
145
|
-
|
|
151
|
+
type: 'array',
|
|
152
|
+
items: { type: 'string' },
|
|
153
|
+
description: 'Field names from parent to copy into chunk metadata'
|
|
146
154
|
},
|
|
147
155
|
enqueue_chunking_job: {
|
|
148
156
|
type: 'boolean',
|
package/esm/data/index.d.ts
CHANGED
package/esm/data/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-type-registry",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.29.0",
|
|
4
4
|
"description": "Node type definitions for the Constructive blueprint system. Single source of truth for all Authz*, Data*, Relation*, and View* node types.",
|
|
5
5
|
"author": "Constructive <developers@constructive.io>",
|
|
6
6
|
"main": "index.js",
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"registry",
|
|
48
48
|
"graphile"
|
|
49
49
|
],
|
|
50
|
-
"gitHead": "
|
|
50
|
+
"gitHead": "44e6712bd8a37e2089418a69d801d67651c89350"
|
|
51
51
|
}
|