node-type-registry 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ import type { NodeTypeDefinition } from '../types';
2
+ export declare const DataFileEmbedding: NodeTypeDefinition;
@@ -0,0 +1,186 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DataFileEmbedding = void 0;
4
+ exports.DataFileEmbedding = {
5
+ name: 'DataFileEmbedding',
6
+ slug: 'data_file_embedding',
7
+ category: 'data',
8
+ display_name: 'File Embedding',
9
+ description: 'Generic, MIME-scoped embedding node for file tables. Supports two modes: ' +
10
+ 'direct (whole-file to single vector, e.g. CLIP for images) when extraction ' +
11
+ 'is omitted, or extract (file to text to chunks to per-chunk vectors) when ' +
12
+ 'extraction config is provided. Composes SearchVector + DataJobTrigger ' +
13
+ 'internally. Multiple instances can coexist on the same table with different ' +
14
+ 'MIME scopes, field names, and embedding strategies.',
15
+ parameter_schema: {
16
+ type: 'object',
17
+ properties: {
18
+ // ── Vector config (passed through to SearchVector) ─────────────
19
+ field_name: {
20
+ type: 'string',
21
+ format: 'column-ref',
22
+ description: 'Name of the vector embedding column',
23
+ default: 'embedding'
24
+ },
25
+ dimensions: {
26
+ type: 'integer',
27
+ description: 'Vector dimensions (e.g. 512 for CLIP, 768 for nomic, 1536 for ada-002)',
28
+ default: 768
29
+ },
30
+ index_method: {
31
+ type: 'string',
32
+ enum: ['hnsw', 'ivfflat'],
33
+ description: 'Index type for similarity search',
34
+ default: 'hnsw'
35
+ },
36
+ metric: {
37
+ type: 'string',
38
+ enum: ['cosine', 'l2', 'ip'],
39
+ description: 'Distance metric',
40
+ default: 'cosine'
41
+ },
42
+ index_options: {
43
+ type: 'object',
44
+ description: 'Index-specific options. HNSW: {m, ef_construction}. IVFFlat: {lists}.',
45
+ default: {}
46
+ },
47
+ // ── MIME scoping ───────────────────────────────────────────────
48
+ mime_patterns: {
49
+ type: 'array',
50
+ items: { type: 'string' },
51
+ description: 'MIME type LIKE patterns to match. Multiple patterns are OR\'d together. ' +
52
+ 'Examples: [\'image/%\'], [\'application/pdf\', \'text/%\'], [\'audio/%\'].',
53
+ default: ['image/%']
54
+ },
55
+ // ── Job routing ────────────────────────────────────────────────
56
+ task_identifier: {
57
+ type: 'string',
58
+ description: 'Job task identifier for the worker. In direct mode this is the ' +
59
+ 'embedding worker; in extract mode this is the extraction worker.',
60
+ default: 'process_file_embedding'
61
+ },
62
+ events: {
63
+ type: 'array',
64
+ items: { type: 'string', enum: ['INSERT', 'UPDATE'] },
65
+ description: 'Trigger events that fire the job',
66
+ default: ['INSERT']
67
+ },
68
+ payload_custom: {
69
+ type: 'object',
70
+ additionalProperties: { type: 'string', format: 'column-ref' },
71
+ description: 'Custom payload key-to-column mapping for the job trigger',
72
+ default: {
73
+ file_id: 'id',
74
+ key: 'key',
75
+ mime_type: 'mime_type',
76
+ bucket_id: 'bucket_id'
77
+ }
78
+ },
79
+ trigger_conditions: {
80
+ description: 'Additional compound conditions beyond MIME filtering. ' +
81
+ 'Merged with the auto-generated MIME conditions via AND. ' +
82
+ 'Use this to add status checks, field guards, etc.',
83
+ 'x-codegen-type': 'TriggerCondition | TriggerCondition[]',
84
+ oneOf: [
85
+ { $ref: '#/$defs/triggerCondition' },
86
+ { type: 'array', items: { $ref: '#/$defs/triggerCondition' } }
87
+ ]
88
+ },
89
+ // ── Extraction config (optional — enables extract mode) ────────
90
+ extraction: {
91
+ type: 'object',
92
+ description: 'Text extraction configuration. When present, the generator creates ' +
93
+ 'extraction output fields on the table and configures SearchVector with ' +
94
+ 'source_fields + stale tracking. When absent, the node operates in direct ' +
95
+ 'mode (single vector per file, no text extraction).',
96
+ properties: {
97
+ text_field: {
98
+ type: 'string',
99
+ format: 'column-ref',
100
+ description: 'Field to store extracted text/markdown',
101
+ default: 'extracted_text'
102
+ },
103
+ metadata_field: {
104
+ type: 'string',
105
+ format: 'column-ref',
106
+ description: 'JSONB field for extraction metadata (page count, language, etc.)',
107
+ default: 'extracted_metadata'
108
+ },
109
+ status_field: {
110
+ type: 'string',
111
+ format: 'column-ref',
112
+ description: 'Extraction lifecycle status field',
113
+ default: 'extraction_status'
114
+ }
115
+ }
116
+ },
117
+ // ── Chunking config (optional — creates embedding_chunks) ──────
118
+ chunks: {
119
+ type: 'object',
120
+ description: 'Chunking configuration. Creates an embedding_chunks record that drives ' +
121
+ 'automatic text splitting and per-chunk embedding. Only meaningful when ' +
122
+ 'extraction is also provided.',
123
+ properties: {
124
+ content_field_name: {
125
+ type: 'string',
126
+ format: 'column-ref',
127
+ description: 'Name of the text content column in the chunks table',
128
+ default: 'content'
129
+ },
130
+ chunk_size: {
131
+ type: 'integer',
132
+ description: 'Maximum number of characters per chunk',
133
+ default: 1000
134
+ },
135
+ chunk_overlap: {
136
+ type: 'integer',
137
+ description: 'Number of overlapping characters between consecutive chunks',
138
+ default: 200
139
+ },
140
+ chunk_strategy: {
141
+ type: 'string',
142
+ enum: ['fixed', 'sentence', 'paragraph', 'semantic'],
143
+ description: 'Strategy for splitting text into chunks',
144
+ default: 'paragraph'
145
+ },
146
+ metadata_fields: {
147
+ type: 'object',
148
+ description: 'Metadata fields from parent to copy into chunks'
149
+ },
150
+ enqueue_chunking_job: {
151
+ type: 'boolean',
152
+ description: 'Whether to auto-enqueue a chunking job on insert/update',
153
+ default: true
154
+ },
155
+ chunking_task_name: {
156
+ type: 'string',
157
+ description: 'Task identifier for the chunking job queue',
158
+ default: 'generate_chunks'
159
+ }
160
+ }
161
+ },
162
+ // ── Stale tracking (meaningful in extract mode) ────────────────
163
+ stale_strategy: {
164
+ type: 'string',
165
+ enum: ['column', 'null', 'hash'],
166
+ description: 'Strategy for tracking embedding staleness when extraction is enabled. ' +
167
+ 'column: embedding_stale boolean. null: set embedding to NULL. hash: md5 hash.',
168
+ default: 'column'
169
+ },
170
+ include_stale_field: {
171
+ type: 'boolean',
172
+ description: 'Whether to include the embedding_stale boolean field (extract mode)',
173
+ default: true
174
+ }
175
+ }
176
+ },
177
+ tags: [
178
+ 'embedding',
179
+ 'vector',
180
+ 'ai',
181
+ 'composition',
182
+ 'jobs',
183
+ 'multimodal',
184
+ 'files'
185
+ ]
186
+ };
package/data/index.d.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  export { DataCompositeField } from './data-composite-field';
2
2
  export { DataDirectOwner } from './data-direct-owner';
3
3
  export { DataEntityMembership } from './data-entity-membership';
4
+ export { DataFileEmbedding } from './data-file-embedding';
4
5
  export { DataFeatureFlag } from './data-feature-flag';
5
6
  export { DataForceCurrentUser } from './data-force-current-user';
6
7
  export { DataId } from './data-id';
package/data/index.js CHANGED
@@ -1,12 +1,14 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.TableUserSettings = exports.TableUserProfiles = exports.TableOrganizationSettings = exports.SearchVector = exports.SearchUnified = exports.SearchTrgm = exports.SearchSpatialAggregate = exports.SearchSpatial = exports.SearchFullText = exports.SearchBm25 = exports.DataTimestamps = exports.DataTags = exports.DataStatusField = exports.DataSoftDelete = exports.DataSlug = exports.DataPublishable = exports.DataPeoplestamps = exports.DataOwnershipInEntity = exports.DataOwnedFields = exports.DataJsonb = exports.DataLimitCounter = exports.DataJobTrigger = exports.DataInheritFromParent = exports.DataInflection = exports.DataImmutableFields = exports.DataImageEmbedding = exports.DataId = exports.DataForceCurrentUser = exports.DataFeatureFlag = exports.DataEntityMembership = exports.DataDirectOwner = exports.DataCompositeField = void 0;
3
+ exports.TableUserSettings = exports.TableUserProfiles = exports.TableOrganizationSettings = exports.SearchVector = exports.SearchUnified = exports.SearchTrgm = exports.SearchSpatialAggregate = exports.SearchSpatial = exports.SearchFullText = exports.SearchBm25 = exports.DataTimestamps = exports.DataTags = exports.DataStatusField = exports.DataSoftDelete = exports.DataSlug = exports.DataPublishable = exports.DataPeoplestamps = exports.DataOwnershipInEntity = exports.DataOwnedFields = exports.DataJsonb = exports.DataLimitCounter = exports.DataJobTrigger = exports.DataInheritFromParent = exports.DataInflection = exports.DataImmutableFields = exports.DataImageEmbedding = exports.DataId = exports.DataForceCurrentUser = exports.DataFeatureFlag = exports.DataFileEmbedding = exports.DataEntityMembership = exports.DataDirectOwner = exports.DataCompositeField = void 0;
4
4
  var data_composite_field_1 = require("./data-composite-field");
5
5
  Object.defineProperty(exports, "DataCompositeField", { enumerable: true, get: function () { return data_composite_field_1.DataCompositeField; } });
6
6
  var data_direct_owner_1 = require("./data-direct-owner");
7
7
  Object.defineProperty(exports, "DataDirectOwner", { enumerable: true, get: function () { return data_direct_owner_1.DataDirectOwner; } });
8
8
  var data_entity_membership_1 = require("./data-entity-membership");
9
9
  Object.defineProperty(exports, "DataEntityMembership", { enumerable: true, get: function () { return data_entity_membership_1.DataEntityMembership; } });
10
+ var data_file_embedding_1 = require("./data-file-embedding");
11
+ Object.defineProperty(exports, "DataFileEmbedding", { enumerable: true, get: function () { return data_file_embedding_1.DataFileEmbedding; } });
10
12
  var data_feature_flag_1 = require("./data-feature-flag");
11
13
  Object.defineProperty(exports, "DataFeatureFlag", { enumerable: true, get: function () { return data_feature_flag_1.DataFeatureFlag; } });
12
14
  var data_force_current_user_1 = require("./data-force-current-user");
@@ -0,0 +1,2 @@
1
+ import type { NodeTypeDefinition } from '../types';
2
+ export declare const DataFileEmbedding: NodeTypeDefinition;
@@ -0,0 +1,183 @@
1
+ export const DataFileEmbedding = {
2
+ name: 'DataFileEmbedding',
3
+ slug: 'data_file_embedding',
4
+ category: 'data',
5
+ display_name: 'File Embedding',
6
+ description: 'Generic, MIME-scoped embedding node for file tables. Supports two modes: ' +
7
+ 'direct (whole-file to single vector, e.g. CLIP for images) when extraction ' +
8
+ 'is omitted, or extract (file to text to chunks to per-chunk vectors) when ' +
9
+ 'extraction config is provided. Composes SearchVector + DataJobTrigger ' +
10
+ 'internally. Multiple instances can coexist on the same table with different ' +
11
+ 'MIME scopes, field names, and embedding strategies.',
12
+ parameter_schema: {
13
+ type: 'object',
14
+ properties: {
15
+ // ── Vector config (passed through to SearchVector) ─────────────
16
+ field_name: {
17
+ type: 'string',
18
+ format: 'column-ref',
19
+ description: 'Name of the vector embedding column',
20
+ default: 'embedding'
21
+ },
22
+ dimensions: {
23
+ type: 'integer',
24
+ description: 'Vector dimensions (e.g. 512 for CLIP, 768 for nomic, 1536 for ada-002)',
25
+ default: 768
26
+ },
27
+ index_method: {
28
+ type: 'string',
29
+ enum: ['hnsw', 'ivfflat'],
30
+ description: 'Index type for similarity search',
31
+ default: 'hnsw'
32
+ },
33
+ metric: {
34
+ type: 'string',
35
+ enum: ['cosine', 'l2', 'ip'],
36
+ description: 'Distance metric',
37
+ default: 'cosine'
38
+ },
39
+ index_options: {
40
+ type: 'object',
41
+ description: 'Index-specific options. HNSW: {m, ef_construction}. IVFFlat: {lists}.',
42
+ default: {}
43
+ },
44
+ // ── MIME scoping ───────────────────────────────────────────────
45
+ mime_patterns: {
46
+ type: 'array',
47
+ items: { type: 'string' },
48
+ description: 'MIME type LIKE patterns to match. Multiple patterns are OR\'d together. ' +
49
+ 'Examples: [\'image/%\'], [\'application/pdf\', \'text/%\'], [\'audio/%\'].',
50
+ default: ['image/%']
51
+ },
52
+ // ── Job routing ────────────────────────────────────────────────
53
+ task_identifier: {
54
+ type: 'string',
55
+ description: 'Job task identifier for the worker. In direct mode this is the ' +
56
+ 'embedding worker; in extract mode this is the extraction worker.',
57
+ default: 'process_file_embedding'
58
+ },
59
+ events: {
60
+ type: 'array',
61
+ items: { type: 'string', enum: ['INSERT', 'UPDATE'] },
62
+ description: 'Trigger events that fire the job',
63
+ default: ['INSERT']
64
+ },
65
+ payload_custom: {
66
+ type: 'object',
67
+ additionalProperties: { type: 'string', format: 'column-ref' },
68
+ description: 'Custom payload key-to-column mapping for the job trigger',
69
+ default: {
70
+ file_id: 'id',
71
+ key: 'key',
72
+ mime_type: 'mime_type',
73
+ bucket_id: 'bucket_id'
74
+ }
75
+ },
76
+ trigger_conditions: {
77
+ description: 'Additional compound conditions beyond MIME filtering. ' +
78
+ 'Merged with the auto-generated MIME conditions via AND. ' +
79
+ 'Use this to add status checks, field guards, etc.',
80
+ 'x-codegen-type': 'TriggerCondition | TriggerCondition[]',
81
+ oneOf: [
82
+ { $ref: '#/$defs/triggerCondition' },
83
+ { type: 'array', items: { $ref: '#/$defs/triggerCondition' } }
84
+ ]
85
+ },
86
+ // ── Extraction config (optional — enables extract mode) ────────
87
+ extraction: {
88
+ type: 'object',
89
+ description: 'Text extraction configuration. When present, the generator creates ' +
90
+ 'extraction output fields on the table and configures SearchVector with ' +
91
+ 'source_fields + stale tracking. When absent, the node operates in direct ' +
92
+ 'mode (single vector per file, no text extraction).',
93
+ properties: {
94
+ text_field: {
95
+ type: 'string',
96
+ format: 'column-ref',
97
+ description: 'Field to store extracted text/markdown',
98
+ default: 'extracted_text'
99
+ },
100
+ metadata_field: {
101
+ type: 'string',
102
+ format: 'column-ref',
103
+ description: 'JSONB field for extraction metadata (page count, language, etc.)',
104
+ default: 'extracted_metadata'
105
+ },
106
+ status_field: {
107
+ type: 'string',
108
+ format: 'column-ref',
109
+ description: 'Extraction lifecycle status field',
110
+ default: 'extraction_status'
111
+ }
112
+ }
113
+ },
114
+ // ── Chunking config (optional — creates embedding_chunks) ──────
115
+ chunks: {
116
+ type: 'object',
117
+ description: 'Chunking configuration. Creates an embedding_chunks record that drives ' +
118
+ 'automatic text splitting and per-chunk embedding. Only meaningful when ' +
119
+ 'extraction is also provided.',
120
+ properties: {
121
+ content_field_name: {
122
+ type: 'string',
123
+ format: 'column-ref',
124
+ description: 'Name of the text content column in the chunks table',
125
+ default: 'content'
126
+ },
127
+ chunk_size: {
128
+ type: 'integer',
129
+ description: 'Maximum number of characters per chunk',
130
+ default: 1000
131
+ },
132
+ chunk_overlap: {
133
+ type: 'integer',
134
+ description: 'Number of overlapping characters between consecutive chunks',
135
+ default: 200
136
+ },
137
+ chunk_strategy: {
138
+ type: 'string',
139
+ enum: ['fixed', 'sentence', 'paragraph', 'semantic'],
140
+ description: 'Strategy for splitting text into chunks',
141
+ default: 'paragraph'
142
+ },
143
+ metadata_fields: {
144
+ type: 'object',
145
+ description: 'Metadata fields from parent to copy into chunks'
146
+ },
147
+ enqueue_chunking_job: {
148
+ type: 'boolean',
149
+ description: 'Whether to auto-enqueue a chunking job on insert/update',
150
+ default: true
151
+ },
152
+ chunking_task_name: {
153
+ type: 'string',
154
+ description: 'Task identifier for the chunking job queue',
155
+ default: 'generate_chunks'
156
+ }
157
+ }
158
+ },
159
+ // ── Stale tracking (meaningful in extract mode) ────────────────
160
+ stale_strategy: {
161
+ type: 'string',
162
+ enum: ['column', 'null', 'hash'],
163
+ description: 'Strategy for tracking embedding staleness when extraction is enabled. ' +
164
+ 'column: embedding_stale boolean. null: set embedding to NULL. hash: md5 hash.',
165
+ default: 'column'
166
+ },
167
+ include_stale_field: {
168
+ type: 'boolean',
169
+ description: 'Whether to include the embedding_stale boolean field (extract mode)',
170
+ default: true
171
+ }
172
+ }
173
+ },
174
+ tags: [
175
+ 'embedding',
176
+ 'vector',
177
+ 'ai',
178
+ 'composition',
179
+ 'jobs',
180
+ 'multimodal',
181
+ 'files'
182
+ ]
183
+ };
@@ -1,6 +1,7 @@
1
1
  export { DataCompositeField } from './data-composite-field';
2
2
  export { DataDirectOwner } from './data-direct-owner';
3
3
  export { DataEntityMembership } from './data-entity-membership';
4
+ export { DataFileEmbedding } from './data-file-embedding';
4
5
  export { DataFeatureFlag } from './data-feature-flag';
5
6
  export { DataForceCurrentUser } from './data-force-current-user';
6
7
  export { DataId } from './data-id';
package/esm/data/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  export { DataCompositeField } from './data-composite-field';
2
2
  export { DataDirectOwner } from './data-direct-owner';
3
3
  export { DataEntityMembership } from './data-entity-membership';
4
+ export { DataFileEmbedding } from './data-file-embedding';
4
5
  export { DataFeatureFlag } from './data-feature-flag';
5
6
  export { DataForceCurrentUser } from './data-force-current-user';
6
7
  export { DataId } from './data-id';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-type-registry",
3
- "version": "0.24.0",
3
+ "version": "0.25.0",
4
4
  "description": "Node type definitions for the Constructive blueprint system. Single source of truth for all Authz*, Data*, Relation*, and View* node types.",
5
5
  "author": "Constructive <developers@constructive.io>",
6
6
  "main": "index.js",
@@ -47,5 +47,5 @@
47
47
  "registry",
48
48
  "graphile"
49
49
  ],
50
- "gitHead": "04633dc47399ffc9a5cefacfb0a90451acee988d"
50
+ "gitHead": "7409479a981ff63a0937b5406a1c206a07b264ad"
51
51
  }