@forzalabs/remora 1.0.21 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/actions/automap.js +26 -42
- package/actions/compile.js +27 -43
- package/actions/create_consumer.js +24 -40
- package/actions/create_producer.js +16 -32
- package/actions/debug.js +18 -34
- package/actions/deploy.js +30 -46
- package/actions/discover.js +13 -29
- package/actions/init.js +29 -45
- package/actions/mock.js +16 -32
- package/actions/run.js +34 -52
- package/actions/sample.js +42 -58
- package/index.js +38 -43
- package/package.json +4 -4
- package/workers/ExecutorWorker.js +18 -32
- package/Constants.js +0 -34
- package/core/Affirm.js +0 -42
- package/core/Algo.js +0 -160
- package/core/dste/DSTE.js +0 -113
- package/core/logger/DebugLogService.js +0 -48
- package/core/logger/DevelopmentLogService.js +0 -70
- package/core/logger/LocalLogService.js +0 -70
- package/core/logger/Logger.js +0 -54
- package/database/DatabaseEngine.js +0 -149
- package/database/DatabaseStructure.js +0 -27
- package/definitions/DatasetDefinitions.js +0 -2
- package/definitions/ExecutorDefinitions.js +0 -2
- package/definitions/ProcessENV.js +0 -2
- package/definitions/agents/DestinationDriver.js +0 -2
- package/definitions/agents/SourceDriver.js +0 -2
- package/definitions/cli.js +0 -2
- package/definitions/database/ApiKeys.js +0 -2
- package/definitions/database/Stored.js +0 -7
- package/definitions/database/UsageStat.js +0 -2
- package/definitions/database/User.js +0 -2
- package/definitions/json_schemas/consumer-schema.json +0 -1226
- package/definitions/json_schemas/producer-schema.json +0 -308
- package/definitions/json_schemas/project-schema.json +0 -100
- package/definitions/json_schemas/source-schema.json +0 -249
- package/definitions/requests/ConsumerRequest.js +0 -2
- package/definitions/requests/Developer.js +0 -2
- package/definitions/requests/Mapping.js +0 -2
- package/definitions/requests/ProducerRequest.js +0 -2
- package/definitions/requests/Request.js +0 -2
- package/definitions/resources/Compiled.js +0 -2
- package/definitions/resources/Consumer.js +0 -2
- package/definitions/resources/Environment.js +0 -2
- package/definitions/resources/Library.js +0 -2
- package/definitions/resources/Producer.js +0 -2
- package/definitions/resources/Project.js +0 -2
- package/definitions/resources/Schema.js +0 -2
- package/definitions/resources/Source.js +0 -2
- package/definitions/temp.js +0 -2
- package/definitions/transform/Transformations.js +0 -2
- package/drivers/DeltaShareDriver.js +0 -186
- package/drivers/DriverFactory.js +0 -72
- package/drivers/DriverHelper.js +0 -248
- package/drivers/HttpApiDriver.js +0 -208
- package/drivers/RedshiftDriver.js +0 -184
- package/drivers/files/LocalDestinationDriver.js +0 -146
- package/drivers/files/LocalSourceDriver.js +0 -405
- package/drivers/s3/S3DestinationDriver.js +0 -197
- package/drivers/s3/S3SourceDriver.js +0 -495
- package/engines/CryptoEngine.js +0 -75
- package/engines/Environment.js +0 -170
- package/engines/ProcessENVManager.js +0 -83
- package/engines/RandomEngine.js +0 -47
- package/engines/SecretManager.js +0 -23
- package/engines/UserManager.js +0 -66
- package/engines/ai/AutoMapperEngine.js +0 -37
- package/engines/ai/DeveloperEngine.js +0 -497
- package/engines/ai/LLM.js +0 -255
- package/engines/consumer/ConsumerManager.js +0 -218
- package/engines/consumer/ConsumerOnFinishManager.js +0 -202
- package/engines/dataset/Dataset.js +0 -824
- package/engines/dataset/DatasetManager.js +0 -211
- package/engines/dataset/DatasetRecord.js +0 -120
- package/engines/dataset/DatasetRecordPool.js +0 -77
- package/engines/execution/RequestExecutor.js +0 -67
- package/engines/parsing/CSVParser.js +0 -60
- package/engines/parsing/LineParser.js +0 -71
- package/engines/parsing/ParseCompression.js +0 -101
- package/engines/parsing/ParseHelper.js +0 -18
- package/engines/parsing/ParseManager.js +0 -54
- package/engines/parsing/XLSParser.js +0 -87
- package/engines/parsing/XMLParser.js +0 -115
- package/engines/producer/ProducerEngine.js +0 -127
- package/engines/producer/ProducerManager.js +0 -43
- package/engines/scheduler/CronScheduler.js +0 -222
- package/engines/scheduler/QueueManager.js +0 -314
- package/engines/schema/SchemaValidator.js +0 -67
- package/engines/transform/JoinEngine.js +0 -232
- package/engines/transform/TransformationEngine.js +0 -277
- package/engines/transform/TypeCaster.js +0 -59
- package/engines/usage/DataframeManager.js +0 -55
- package/engines/usage/UsageDataManager.js +0 -151
- package/engines/usage/UsageManager.js +0 -65
- package/engines/validation/Validator.js +0 -216
- package/executors/ConsumerExecutor.js +0 -280
- package/executors/Executor.js +0 -177
- package/executors/ExecutorOrchestrator.js +0 -331
- package/executors/ExecutorPerformance.js +0 -17
- package/executors/ExecutorProgress.js +0 -54
- package/executors/ExecutorScope.js +0 -52
- package/executors/OutputExecutor.js +0 -118
- package/executors/ProducerExecutor.js +0 -108
- package/helper/Helper.js +0 -149
- package/helper/Logger.js +0 -84
- package/helper/Runtime.js +0 -20
- package/helper/Settings.js +0 -13
- package/licencing/LicenceManager.js +0 -64
- package/settings.js +0 -12
|
@@ -1,308 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"title": "Producer Schema",
|
|
4
|
-
"description": "Schema for defining data producers",
|
|
5
|
-
"type": "object",
|
|
6
|
-
"properties": {
|
|
7
|
-
"$schema": {
|
|
8
|
-
"type": "string",
|
|
9
|
-
"format": "uri"
|
|
10
|
-
},
|
|
11
|
-
"name": {
|
|
12
|
-
"type": "string",
|
|
13
|
-
"description": "The name of the producer"
|
|
14
|
-
},
|
|
15
|
-
"description": {
|
|
16
|
-
"type": "string",
|
|
17
|
-
"description": "Optional description of the producer"
|
|
18
|
-
},
|
|
19
|
-
"source": {
|
|
20
|
-
"type": "string",
|
|
21
|
-
"description": "The name of the Source"
|
|
22
|
-
},
|
|
23
|
-
"dimensions": {
|
|
24
|
-
"type": "array",
|
|
25
|
-
"description": "Dimensions for the producer",
|
|
26
|
-
"items": {
|
|
27
|
-
"type": "object",
|
|
28
|
-
"properties": {
|
|
29
|
-
"name": {
|
|
30
|
-
"type": "string",
|
|
31
|
-
"description": "The name of the dimension. This is the output name of this dimension."
|
|
32
|
-
},
|
|
33
|
-
"description": {
|
|
34
|
-
"type": "string",
|
|
35
|
-
"description": "Optional description of the dimension"
|
|
36
|
-
},
|
|
37
|
-
"type": {
|
|
38
|
-
"type": "string",
|
|
39
|
-
"enum": [
|
|
40
|
-
"string",
|
|
41
|
-
"number",
|
|
42
|
-
"datetime",
|
|
43
|
-
"boolean"
|
|
44
|
-
],
|
|
45
|
-
"description": "The data type of the dimension"
|
|
46
|
-
},
|
|
47
|
-
"alias": {
|
|
48
|
-
"type": "string",
|
|
49
|
-
"description": "The SQL column or field key that corresponds to this dimension. If left empty, the column name is assumed to be the same as the dimension name."
|
|
50
|
-
},
|
|
51
|
-
"pk": {
|
|
52
|
-
"type": "boolean",
|
|
53
|
-
"description": "Whether this is a primary key column or not"
|
|
54
|
-
},
|
|
55
|
-
"classification": {
|
|
56
|
-
"type": "array",
|
|
57
|
-
"items": {
|
|
58
|
-
"type": "string",
|
|
59
|
-
"enum": [
|
|
60
|
-
"PHI",
|
|
61
|
-
"PII",
|
|
62
|
-
"GDPR"
|
|
63
|
-
],
|
|
64
|
-
"description": "Classification categories for this dimension"
|
|
65
|
-
}
|
|
66
|
-
},
|
|
67
|
-
"mask": {
|
|
68
|
-
"type": "string",
|
|
69
|
-
"description": "Masking type to apply to this dimension. Predefined values: 'hash' (replaces with a hashed value), 'mask' (replaces characters with a mask character), 'crypt' (encrypts the value), 'random' (replaces with a random value), 'seeded-random' (replaces with a random value generated from a seed), 'none' (no masking). You can also use environment variables by using the {your-env-var} notation or any custom string value.",
|
|
70
|
-
"examples": [
|
|
71
|
-
"hash",
|
|
72
|
-
"mask",
|
|
73
|
-
"crypt",
|
|
74
|
-
"random",
|
|
75
|
-
"seeded-random",
|
|
76
|
-
"none",
|
|
77
|
-
"{REMORA_MASK_IN_DEV}"
|
|
78
|
-
]
|
|
79
|
-
},
|
|
80
|
-
"sourceFilename": {
|
|
81
|
-
"type": "boolean",
|
|
82
|
-
"description": "When true, this dimension will be populated with the source filename. Only valid for file-based producers (local, aws-s3) and only one dimension per producer can have this set to true. Useful when reading multiple files with wildcard patterns to track which file each row came from."
|
|
83
|
-
}
|
|
84
|
-
},
|
|
85
|
-
"required": [
|
|
86
|
-
"name",
|
|
87
|
-
"type"
|
|
88
|
-
],
|
|
89
|
-
"additionalProperties": false
|
|
90
|
-
},
|
|
91
|
-
"minItems": 1
|
|
92
|
-
},
|
|
93
|
-
"measures": {
|
|
94
|
-
"type": "array",
|
|
95
|
-
"description": "Optional measures for the producer",
|
|
96
|
-
"items": {
|
|
97
|
-
"type": "object",
|
|
98
|
-
"properties": {
|
|
99
|
-
"name": {
|
|
100
|
-
"type": "string",
|
|
101
|
-
"description": "The name of the measure"
|
|
102
|
-
},
|
|
103
|
-
"description": {
|
|
104
|
-
"type": "string",
|
|
105
|
-
"description": "Optional description of the measure"
|
|
106
|
-
},
|
|
107
|
-
"sql": {
|
|
108
|
-
"type": "string",
|
|
109
|
-
"description": "SQL command used to create a metric"
|
|
110
|
-
}
|
|
111
|
-
},
|
|
112
|
-
"required": [
|
|
113
|
-
"name",
|
|
114
|
-
"sql"
|
|
115
|
-
],
|
|
116
|
-
"additionalProperties": false
|
|
117
|
-
}
|
|
118
|
-
},
|
|
119
|
-
"settings": {
|
|
120
|
-
"type": "object",
|
|
121
|
-
"compressionType": {
|
|
122
|
-
"type": "string",
|
|
123
|
-
"enum": [
|
|
124
|
-
"GZ",
|
|
125
|
-
"TAR",
|
|
126
|
-
"ZIP"
|
|
127
|
-
],
|
|
128
|
-
"description": "The compression type used from the file to read"
|
|
129
|
-
},
|
|
130
|
-
"description": "Settings for the producer",
|
|
131
|
-
"properties": {
|
|
132
|
-
"sqlTable": {
|
|
133
|
-
"type": "string",
|
|
134
|
-
"description": "The SQL table name, which is a concise way of setting the sql property to 'SELECT * FROM <sql table name>'"
|
|
135
|
-
},
|
|
136
|
-
"direct": {
|
|
137
|
-
"type": "boolean",
|
|
138
|
-
"description": "If true, no view is created on the producer side due to permission limitations"
|
|
139
|
-
},
|
|
140
|
-
"sql": {
|
|
141
|
-
"type": "string",
|
|
142
|
-
"description": "The name of the file that has the SQL statement to run to create this producer"
|
|
143
|
-
},
|
|
144
|
-
"fileKey": {
|
|
145
|
-
"type": "string",
|
|
146
|
-
"description": "For S3/local sources: the file key/path that identifies the file to read. For HTTP API sources: the API endpoint path (e.g., '/api/v1/users')"
|
|
147
|
-
},
|
|
148
|
-
"fileType": {
|
|
149
|
-
"type": "string",
|
|
150
|
-
"enum": [
|
|
151
|
-
"JSON",
|
|
152
|
-
"JSONL",
|
|
153
|
-
"CSV",
|
|
154
|
-
"TXT",
|
|
155
|
-
"XLS",
|
|
156
|
-
"XLSX",
|
|
157
|
-
"XML",
|
|
158
|
-
"PARQUET"
|
|
159
|
-
],
|
|
160
|
-
"description": "The type of file to read."
|
|
161
|
-
},
|
|
162
|
-
"delimiter": {
|
|
163
|
-
"type": "string",
|
|
164
|
-
"description": "The column delimiter for CSV or TXT files if different from the default (,)."
|
|
165
|
-
},
|
|
166
|
-
"hasHeaderRow": {
|
|
167
|
-
"type": "boolean",
|
|
168
|
-
"description": "For TXT files, specifies whether the file has a header row containing column names. Defaults to true."
|
|
169
|
-
},
|
|
170
|
-
"sheetName": {
|
|
171
|
-
"type": "string",
|
|
172
|
-
"description": "For Excel files (.xls/.xlsx), specifies the name of the sheet to read data from. If not specified, the first sheet will be used."
|
|
173
|
-
}
|
|
174
|
-
},
|
|
175
|
-
"additionalProperties": false
|
|
176
|
-
},
|
|
177
|
-
"_version": {
|
|
178
|
-
"type": "number",
|
|
179
|
-
"description": "Version number of the producer configuration"
|
|
180
|
-
}
|
|
181
|
-
},
|
|
182
|
-
"required": [
|
|
183
|
-
"name",
|
|
184
|
-
"source",
|
|
185
|
-
"dimensions",
|
|
186
|
-
"settings"
|
|
187
|
-
],
|
|
188
|
-
"additionalProperties": false,
|
|
189
|
-
"examples": [
|
|
190
|
-
{
|
|
191
|
-
"name": "CustomerData",
|
|
192
|
-
"description": "Producer for customer data from the main database",
|
|
193
|
-
"source": "Production PostgreSQL Database",
|
|
194
|
-
"dimensions": [
|
|
195
|
-
{
|
|
196
|
-
"name": "customer_id",
|
|
197
|
-
"type": "string",
|
|
198
|
-
"pk": true
|
|
199
|
-
},
|
|
200
|
-
{
|
|
201
|
-
"name": "full_name",
|
|
202
|
-
"type": "string",
|
|
203
|
-
"classification": [
|
|
204
|
-
"PII",
|
|
205
|
-
"GDPR"
|
|
206
|
-
],
|
|
207
|
-
"mask": "hash"
|
|
208
|
-
},
|
|
209
|
-
{
|
|
210
|
-
"name": "email",
|
|
211
|
-
"type": "string",
|
|
212
|
-
"format": {
|
|
213
|
-
"type": "string",
|
|
214
|
-
"description": "Format for datetype of the source ('YYYYMMDD', 'DD-MM-YYYY')"
|
|
215
|
-
},
|
|
216
|
-
"classification": [
|
|
217
|
-
"PII",
|
|
218
|
-
"GDPR"
|
|
219
|
-
],
|
|
220
|
-
"mask": "mask"
|
|
221
|
-
},
|
|
222
|
-
{
|
|
223
|
-
"name": "signup_date",
|
|
224
|
-
"type": "datetime"
|
|
225
|
-
}
|
|
226
|
-
],
|
|
227
|
-
"measures": [
|
|
228
|
-
{
|
|
229
|
-
"name": "total_orders",
|
|
230
|
-
"description": "Total number of orders by this customer",
|
|
231
|
-
"sql": "COUNT(order_id)"
|
|
232
|
-
},
|
|
233
|
-
{
|
|
234
|
-
"name": "total_spent",
|
|
235
|
-
"description": "Total amount spent by this customer",
|
|
236
|
-
"sql": "SUM(order_amount)"
|
|
237
|
-
}
|
|
238
|
-
],
|
|
239
|
-
"settings": {
|
|
240
|
-
"sqlTable": "customers"
|
|
241
|
-
},
|
|
242
|
-
"_version": 1
|
|
243
|
-
},
|
|
244
|
-
{
|
|
245
|
-
"name": "SalesData",
|
|
246
|
-
"description": "Producer for sales data from S3 bucket",
|
|
247
|
-
"source": "Data Lake",
|
|
248
|
-
"dimensions": [
|
|
249
|
-
{
|
|
250
|
-
"name": "transaction_id",
|
|
251
|
-
"type": "string",
|
|
252
|
-
"pk": true
|
|
253
|
-
},
|
|
254
|
-
{
|
|
255
|
-
"name": "product_id",
|
|
256
|
-
"type": "string"
|
|
257
|
-
},
|
|
258
|
-
{
|
|
259
|
-
"name": "sale_amount",
|
|
260
|
-
"type": "number"
|
|
261
|
-
},
|
|
262
|
-
{
|
|
263
|
-
"name": "sale_date",
|
|
264
|
-
"type": "datetime"
|
|
265
|
-
}
|
|
266
|
-
],
|
|
267
|
-
"settings": {
|
|
268
|
-
"fileKey": "sales/daily_sales.csv",
|
|
269
|
-
"fileType": "CSV"
|
|
270
|
-
},
|
|
271
|
-
"_version": 2
|
|
272
|
-
},
|
|
273
|
-
{
|
|
274
|
-
"name": "APIUsers",
|
|
275
|
-
"description": "Producer for user data from REST API",
|
|
276
|
-
"source": "REST API with Bearer Token",
|
|
277
|
-
"dimensions": [
|
|
278
|
-
{
|
|
279
|
-
"name": "user_id",
|
|
280
|
-
"type": "string",
|
|
281
|
-
"pk": true
|
|
282
|
-
},
|
|
283
|
-
{
|
|
284
|
-
"name": "username",
|
|
285
|
-
"type": "string"
|
|
286
|
-
},
|
|
287
|
-
{
|
|
288
|
-
"name": "email",
|
|
289
|
-
"type": "string",
|
|
290
|
-
"classification": [
|
|
291
|
-
"PII",
|
|
292
|
-
"GDPR"
|
|
293
|
-
],
|
|
294
|
-
"mask": "mask"
|
|
295
|
-
},
|
|
296
|
-
{
|
|
297
|
-
"name": "created_at",
|
|
298
|
-
"type": "datetime"
|
|
299
|
-
}
|
|
300
|
-
],
|
|
301
|
-
"settings": {
|
|
302
|
-
"fileKey": "/api/v1/users",
|
|
303
|
-
"fileType": "JSON"
|
|
304
|
-
},
|
|
305
|
-
"_version": 1
|
|
306
|
-
}
|
|
307
|
-
]
|
|
308
|
-
}
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"title": "Project Schema",
|
|
4
|
-
"description": "Schema for defining remora project configuration",
|
|
5
|
-
"type": "object",
|
|
6
|
-
"required": ["name", "version", "consumers", "producers", "sources", "schemas", "settings"],
|
|
7
|
-
"properties": {
|
|
8
|
-
"$schema": {
|
|
9
|
-
"type": "string",
|
|
10
|
-
"format": "uri"
|
|
11
|
-
},
|
|
12
|
-
"name": {
|
|
13
|
-
"type": "string",
|
|
14
|
-
"description": "Name of the remora project"
|
|
15
|
-
},
|
|
16
|
-
"version": {
|
|
17
|
-
"type": "string",
|
|
18
|
-
"pattern": "^\\d+\\.\\d+\\.\\d+$",
|
|
19
|
-
"description": "Version of the project in semver format"
|
|
20
|
-
},
|
|
21
|
-
"description": {
|
|
22
|
-
"type": "string",
|
|
23
|
-
"description": "Optional description of the project"
|
|
24
|
-
},
|
|
25
|
-
"consumers": {
|
|
26
|
-
"type": "array",
|
|
27
|
-
"items": {
|
|
28
|
-
"type": "string",
|
|
29
|
-
"pattern": "^/"
|
|
30
|
-
},
|
|
31
|
-
"description": "Array of consumer paths"
|
|
32
|
-
},
|
|
33
|
-
"producers": {
|
|
34
|
-
"type": "array",
|
|
35
|
-
"items": {
|
|
36
|
-
"type": "string",
|
|
37
|
-
"pattern": "^/"
|
|
38
|
-
},
|
|
39
|
-
"description": "Array of producer paths"
|
|
40
|
-
},
|
|
41
|
-
"sources": {
|
|
42
|
-
"type": "array",
|
|
43
|
-
"items": {
|
|
44
|
-
"type": "string",
|
|
45
|
-
"pattern": "^/"
|
|
46
|
-
},
|
|
47
|
-
"description": "Array of source paths"
|
|
48
|
-
},
|
|
49
|
-
"schemas": {
|
|
50
|
-
"type": "array",
|
|
51
|
-
"items": {
|
|
52
|
-
"type": "string",
|
|
53
|
-
"pattern": "^/"
|
|
54
|
-
},
|
|
55
|
-
"description": "Array of schema paths"
|
|
56
|
-
},
|
|
57
|
-
"settings": {
|
|
58
|
-
"type": "object",
|
|
59
|
-
"required": ["SQL_MAX_QUERY_ROWS"],
|
|
60
|
-
"properties": {
|
|
61
|
-
"SQL_MAX_QUERY_ROWS": {
|
|
62
|
-
"type": "integer",
|
|
63
|
-
"minimum": 1,
|
|
64
|
-
"description": "Maximum number of rows for SQL queries"
|
|
65
|
-
},
|
|
66
|
-
"STRING_MAX_CHARACTERS_LENGTH": {
|
|
67
|
-
"type": "integer",
|
|
68
|
-
"minimum": 1,
|
|
69
|
-
"description": "Maximum length for string fields"
|
|
70
|
-
},
|
|
71
|
-
"MAX_ITEMS_IN_MEMORY": {
|
|
72
|
-
"type": "integer",
|
|
73
|
-
"minimum": 1,
|
|
74
|
-
"description": "Maximum number of items to keep in memory"
|
|
75
|
-
},
|
|
76
|
-
"DEBUG_MODE": {
|
|
77
|
-
"type": "boolean",
|
|
78
|
-
"description": "Enable logging of internal steps"
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
},
|
|
83
|
-
"additionalProperties": false,
|
|
84
|
-
"examples": [
|
|
85
|
-
{
|
|
86
|
-
"$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/project-schema.json",
|
|
87
|
-
"name": "analytics-project",
|
|
88
|
-
"version": "1.0.0",
|
|
89
|
-
"description": "Analytics data processing project",
|
|
90
|
-
"consumers": ["/consumers"],
|
|
91
|
-
"producers": ["/producers"],
|
|
92
|
-
"sources": ["/sources"],
|
|
93
|
-
"schemas": ["/schemas"],
|
|
94
|
-
"settings": {
|
|
95
|
-
"SQL_MAX_QUERY_ROWS": 10000,
|
|
96
|
-
"DEBUG_MODE": true
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
]
|
|
100
|
-
}
|
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"title": "Source Schema",
|
|
4
|
-
"description": "Schema for defining data sources and their authentication methods",
|
|
5
|
-
"type": "object",
|
|
6
|
-
"properties": {
|
|
7
|
-
"$schema": {
|
|
8
|
-
"type": "string",
|
|
9
|
-
"format": "uri"
|
|
10
|
-
},
|
|
11
|
-
"name": {
|
|
12
|
-
"type": "string",
|
|
13
|
-
"description": "The name of the data source"
|
|
14
|
-
},
|
|
15
|
-
"description": {
|
|
16
|
-
"type": "string",
|
|
17
|
-
"description": "Optional description of the data source"
|
|
18
|
-
},
|
|
19
|
-
"engine": {
|
|
20
|
-
"type": "string",
|
|
21
|
-
"enum": [
|
|
22
|
-
"aws-redshift",
|
|
23
|
-
"aws-dynamodb",
|
|
24
|
-
"aws-s3",
|
|
25
|
-
"postgres",
|
|
26
|
-
"local",
|
|
27
|
-
"delta-share",
|
|
28
|
-
"http-api"
|
|
29
|
-
],
|
|
30
|
-
"description": "The type of data engine"
|
|
31
|
-
},
|
|
32
|
-
"authentication": {
|
|
33
|
-
"type": "object",
|
|
34
|
-
"description": "Authentication details for connecting to the data source. You can use environment variables by using the {your-env-var} notation",
|
|
35
|
-
"properties": {
|
|
36
|
-
"method": {
|
|
37
|
-
"type": "string",
|
|
38
|
-
"enum": [
|
|
39
|
-
"iam",
|
|
40
|
-
"username-password",
|
|
41
|
-
"access-secret-key",
|
|
42
|
-
"arn",
|
|
43
|
-
"implicit",
|
|
44
|
-
"bearer-token",
|
|
45
|
-
"api-key",
|
|
46
|
-
"none"
|
|
47
|
-
],
|
|
48
|
-
"description": "The authentication method to use"
|
|
49
|
-
},
|
|
50
|
-
"host": {
|
|
51
|
-
"type": "string",
|
|
52
|
-
"description": "Hostname or endpoint of the data source"
|
|
53
|
-
},
|
|
54
|
-
"user": {
|
|
55
|
-
"type": "string",
|
|
56
|
-
"description": "Username for authentication"
|
|
57
|
-
},
|
|
58
|
-
"password": {
|
|
59
|
-
"type": "string",
|
|
60
|
-
"description": "Password for authentication"
|
|
61
|
-
},
|
|
62
|
-
"database": {
|
|
63
|
-
"type": "string",
|
|
64
|
-
"description": "Database name"
|
|
65
|
-
},
|
|
66
|
-
"workgroup": {
|
|
67
|
-
"type": "string",
|
|
68
|
-
"description": "Workgroup name"
|
|
69
|
-
},
|
|
70
|
-
"schema": {
|
|
71
|
-
"type": "string",
|
|
72
|
-
"description": "Database schema name"
|
|
73
|
-
},
|
|
74
|
-
"table": {
|
|
75
|
-
"type": "string",
|
|
76
|
-
"description": "Table name (used by some engines like delta-share)"
|
|
77
|
-
},
|
|
78
|
-
"port": {
|
|
79
|
-
"type": "string",
|
|
80
|
-
"description": "Port number for the connection"
|
|
81
|
-
},
|
|
82
|
-
"accessKey": {
|
|
83
|
-
"type": "string",
|
|
84
|
-
"description": "AWS access key ID"
|
|
85
|
-
},
|
|
86
|
-
"secretKey": {
|
|
87
|
-
"type": "string",
|
|
88
|
-
"description": "AWS secret access key"
|
|
89
|
-
},
|
|
90
|
-
"sessionToken": {
|
|
91
|
-
"type": "string",
|
|
92
|
-
"description": "AWS session token (if required)"
|
|
93
|
-
},
|
|
94
|
-
"region": {
|
|
95
|
-
"type": "string",
|
|
96
|
-
"description": "AWS region"
|
|
97
|
-
},
|
|
98
|
-
"bucket": {
|
|
99
|
-
"type": "string",
|
|
100
|
-
"description": "S3 bucket name"
|
|
101
|
-
},
|
|
102
|
-
"iamProfile": {
|
|
103
|
-
"type": "string",
|
|
104
|
-
"description": "IAM role or profile name"
|
|
105
|
-
},
|
|
106
|
-
"clusterId": {
|
|
107
|
-
"type": "string",
|
|
108
|
-
"description": "Redshift cluster identifier"
|
|
109
|
-
},
|
|
110
|
-
"path": {
|
|
111
|
-
"type": "string",
|
|
112
|
-
"description": "The folder path"
|
|
113
|
-
},
|
|
114
|
-
"share": {
|
|
115
|
-
"type": "string",
|
|
116
|
-
"description": "Delta Sharing share name"
|
|
117
|
-
},
|
|
118
|
-
"bearerToken": {
|
|
119
|
-
"type": "string",
|
|
120
|
-
"description": "Bearer token used for authentication (Delta Sharing or HTTP API)"
|
|
121
|
-
},
|
|
122
|
-
"url": {
|
|
123
|
-
"type": "string",
|
|
124
|
-
"format": "uri",
|
|
125
|
-
"description": "Base URL for HTTP API sources"
|
|
126
|
-
},
|
|
127
|
-
"headers": {
|
|
128
|
-
"type": "object",
|
|
129
|
-
"description": "Custom HTTP headers for API requests",
|
|
130
|
-
"additionalProperties": {
|
|
131
|
-
"type": "string"
|
|
132
|
-
}
|
|
133
|
-
},
|
|
134
|
-
"queryParams": {
|
|
135
|
-
"type": "object",
|
|
136
|
-
"description": "Default query parameters for API requests",
|
|
137
|
-
"additionalProperties": {
|
|
138
|
-
"type": "string"
|
|
139
|
-
}
|
|
140
|
-
},
|
|
141
|
-
"httpMethod": {
|
|
142
|
-
"type": "string",
|
|
143
|
-
"enum": ["GET", "POST", "PUT", "PATCH", "DELETE"],
|
|
144
|
-
"description": "HTTP method to use for API requests",
|
|
145
|
-
"default": "GET"
|
|
146
|
-
},
|
|
147
|
-
"apiKey": {
|
|
148
|
-
"type": "string",
|
|
149
|
-
"description": "API key for api-key authentication method"
|
|
150
|
-
},
|
|
151
|
-
"apiKeyHeader": {
|
|
152
|
-
"type": "string",
|
|
153
|
-
"description": "Header name for API key (defaults to X-API-Key)",
|
|
154
|
-
"default": "X-API-Key"
|
|
155
|
-
},
|
|
156
|
-
"timeout": {
|
|
157
|
-
"type": "number",
|
|
158
|
-
"description": "Request timeout in milliseconds",
|
|
159
|
-
"default": 30000,
|
|
160
|
-
"minimum": 1000
|
|
161
|
-
}
|
|
162
|
-
},
|
|
163
|
-
"required": ["method"]
|
|
164
|
-
},
|
|
165
|
-
"_version": {
|
|
166
|
-
"type": "number",
|
|
167
|
-
"description": "Version number of the source configuration"
|
|
168
|
-
}
|
|
169
|
-
},
|
|
170
|
-
"required": [
|
|
171
|
-
"name",
|
|
172
|
-
"engine",
|
|
173
|
-
"authentication"
|
|
174
|
-
],
|
|
175
|
-
"additionalProperties": false,
|
|
176
|
-
"examples": [
|
|
177
|
-
{
|
|
178
|
-
"name": "Production PostgreSQL Database",
|
|
179
|
-
"description": "Main production database for customer data",
|
|
180
|
-
"engine": "postgres",
|
|
181
|
-
"authentication": {
|
|
182
|
-
"method": "username-password",
|
|
183
|
-
"host": "prod-db.example.com",
|
|
184
|
-
"user": "app_user",
|
|
185
|
-
"password": "password123",
|
|
186
|
-
"database": "customers",
|
|
187
|
-
"schema": "public",
|
|
188
|
-
"port": "5432"
|
|
189
|
-
},
|
|
190
|
-
"_version": 1
|
|
191
|
-
},
|
|
192
|
-
{
|
|
193
|
-
"name": "Data Lake",
|
|
194
|
-
"description": "AWS S3 bucket containing analytics data",
|
|
195
|
-
"engine": "aws-s3",
|
|
196
|
-
"authentication": {
|
|
197
|
-
"method": "access-secret-key",
|
|
198
|
-
"accessKey": "AKIAIOSFODNN7EXAMPLE",
|
|
199
|
-
"secretKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
|
200
|
-
"region": "us-east-1",
|
|
201
|
-
"bucket": "analytics-data"
|
|
202
|
-
},
|
|
203
|
-
"_version": 2
|
|
204
|
-
},
|
|
205
|
-
{
|
|
206
|
-
"name": "Redshift Data Warehouse",
|
|
207
|
-
"engine": "aws-redshift",
|
|
208
|
-
"authentication": {
|
|
209
|
-
"method": "iam",
|
|
210
|
-
"host": "redshift-cluster.region.redshift.amazonaws.com",
|
|
211
|
-
"database": "analytics",
|
|
212
|
-
"port": "5439",
|
|
213
|
-
"iamProfile": "redshift-access-role",
|
|
214
|
-
"region": "us-west-2",
|
|
215
|
-
"clusterId": "analytics-cluster"
|
|
216
|
-
},
|
|
217
|
-
"_version": 1
|
|
218
|
-
},
|
|
219
|
-
{
|
|
220
|
-
"name": "REST API with Bearer Token",
|
|
221
|
-
"description": "HTTP API source with bearer token authentication",
|
|
222
|
-
"engine": "http-api",
|
|
223
|
-
"authentication": {
|
|
224
|
-
"method": "bearer-token",
|
|
225
|
-
"url": "https://api.example.com",
|
|
226
|
-
"bearerToken": "{API_BEARER_TOKEN}",
|
|
227
|
-
"headers": {
|
|
228
|
-
"Accept": "application/json"
|
|
229
|
-
},
|
|
230
|
-
"timeout": 30000
|
|
231
|
-
},
|
|
232
|
-
"_version": 1
|
|
233
|
-
},
|
|
234
|
-
{
|
|
235
|
-
"name": "Public REST API",
|
|
236
|
-
"description": "Public HTTP API with no authentication",
|
|
237
|
-
"engine": "http-api",
|
|
238
|
-
"authentication": {
|
|
239
|
-
"method": "none",
|
|
240
|
-
"url": "https://api.publicapis.org",
|
|
241
|
-
"headers": {
|
|
242
|
-
"Accept": "application/json"
|
|
243
|
-
},
|
|
244
|
-
"httpMethod": "GET"
|
|
245
|
-
},
|
|
246
|
-
"_version": 1
|
|
247
|
-
}
|
|
248
|
-
]
|
|
249
|
-
}
|