@tmdc-solutions/mcp-beta 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stdio.cjs +100 -45
- package/dist/stdio.js +100 -45
- package/package.json +1 -2
package/dist/stdio.cjs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
"use strict"; function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function
|
|
2
|
+
"use strict"; function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }var _mcpjs = require('@modelcontextprotocol/sdk/server/mcp.js');var _stdiojs = require('@modelcontextprotocol/sdk/server/stdio.js');var _zod = require('zod'); var _zod2 = _interopRequireDefault(_zod);var _faker = require('@faker-js/faker');var _zodtojsonschema = require('zod-to-json-schema');var at=_zod2.default.object({fields:_zod2.default.array(_zod2.default.object({fieldName:_zod2.default.string(),defaultValue:_zod2.default.string().or(_zod2.default.number()).optional(),unique:_zod2.default.boolean().optional(),prefix:_zod2.default.string().optional(),suffix:_zod2.default.string().optional(),fakerConfig:_zod2.default.object({module:_zod2.default.enum(Object.keys(_faker.faker)),method:_zod2.default.string(),options:_zod2.default.any().optional(),dependencies:_zod2.default.array(_zod2.default.object({optionName:_zod2.default.string(),fieldName:_zod2.default.string()}),{description:`This is to pass inter related fields to the faker method for producing value that is based on other fields.
|
|
3
3
|
Make sure to pass the fields with dependencies as the last fields in the fields array.
|
|
4
4
|
Example:
|
|
5
|
-
If you want to generate email that is based on the first name and last name, you can pass the original fieldName of first name and last name fields as fieldName of dependencies and optionName as firstName and lastName.`}).optional()})}),{description:"The fields to generate"}),path:_zod2.default.string({description:"Absolute full path where file needs to be created. Always send full absolute path"}).default("~/tmp/mock-data/"),filename:_zod2.default.string().optional().default("mock-data.csv"),parentMapping:_zod2.default.array(_zod2.default.object({fieldName:_zod2.default.string(),parentFieldName:_zod2.default.string(),parentFieldValue:_zod2.default.string()})).optional().default([]),count:_zod2.default.number({description:"The number of items to generate"}).min(1).max(5e6).default(10)}),ye=at.extend({children:_zod2.default.lazy(()=>ye.array()).optional().default([])}),ct={file:ye,type:_zod2.default.enum(["csv","json","text"],{description:"The output type of the generated data"}).optional().default("csv"),append:_zod2.default.boolean().optional().default(!1),bufferSize:_zod2.default.number().optional().default(1e4),count:_zod2.default.number({description:"The number of items to generate"}).min(1).max(5e6).default(10)};function be(
|
|
5
|
+
If you want to generate email that is based on the first name and last name, you can pass the original fieldName of first name and last name fields as fieldName of dependencies and optionName as firstName and lastName.`}).optional()})}),{description:"The fields to generate"}),path:_zod2.default.string({description:"Absolute full path where file needs to be created. Always send full absolute path"}).default("~/tmp/mock-data/"),filename:_zod2.default.string().optional().default("mock-data.csv"),parentMapping:_zod2.default.array(_zod2.default.object({fieldName:_zod2.default.string(),parentFieldName:_zod2.default.string(),parentFieldValue:_zod2.default.string()})).optional().default([]),count:_zod2.default.number({description:"The number of items to generate"}).min(1).max(5e6).default(10)}),ye=at.extend({children:_zod2.default.lazy(()=>ye.array()).optional().default([])}),ct={file:ye,type:_zod2.default.enum(["csv","json","text"],{description:"The output type of the generated data"}).optional().default("csv"),append:_zod2.default.boolean().optional().default(!1),bufferSize:_zod2.default.number().optional().default(1e4),count:_zod2.default.number({description:"The number of items to generate"}).min(1).max(5e6).default(10)};function be(s){s.tool("mock-data-pattern-schema","Schema for producing mock data patterns",{},()=>({isError:!1,content:[{type:"text",text:JSON.stringify(_zodtojsonschema.zodToJsonSchema.call(void 0, _zod2.default.object(ct)),null,2)}]}))}var _yaml = require('yaml');var _fs = require('fs');var _path = require('path'); var _path2 = _interopRequireDefault(_path);var ft=`Depot in DataOS is a Resource used to connect different data sources to DataOS by abstracting the complexities associated with the underlying source system (including protocols, credentials, and connection schemas).
|
|
6
6
|
It enables users to establish connections and retrieve data from various data sources, such as file systems (e.g., AWS S3, Google GCS, Azure Blob Storage), data lake systems, database systems (e.g., Redshift, SnowflakeDB, Bigquery, Postgres), and event systems (e.g., Kafka, Pulsar) without moving the data.
|
|
7
|
-
`,gt=_zod.z.object({acl:_zod.z.enum(["r","rw"],{description:"Access control level - 'r' for read-only, 'rw' for read-write"}),type:_zod.z.literal("key-value-properties").describe("Type of secret storage"),data:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Key-value pairs for authentication credentials (e.g., username, password, access keys)"}),files:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"File paths for credential files (e.g., JSON key files for GCP)"}).optional()}),ht={description:_zod.z.string({description:"Human-readable description of the depot"}),external:_zod.z.boolean({description:"Whether the depot connects to an external data source"}).default(!0),connectionSecret:_zod.z.array(gt).optional()},yt=n=>{let{type:p,spec:e,s3:s,abfss:r,wasbs:l,redshift:v,elasticsearch:w,opensearch:b,eventhub:g,pulsar:d,bigquery:h,gcs:c,kafka:S,mongodb:m,mysql:f,oracle:t,postgresql:a,snowflake:y,mssql:x}=n;switch(p){case"JDBC":if(!e)return"Error: 'JDBC' type requires 'spec' property with subprotocol, host, port, and database.";if(s||r||l||v||w||b||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'JDBC' type should only have 'spec' property, not other configuration properties.";break;case"S3":if(!s)return"Error: 'S3' type requires 's3' property with scheme, bucket, and relativePath.";if(e||r||l||v||w||b||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'S3' type should only have 's3' property, not other configuration properties.";break;case"ABFSS":if(!r)return"Error: 'ABFSS' type requires 'abfss' property with account, container, and relativePath.";if(e||s||l||v||w||b||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'ABFSS' type should only have 'abfss' property, not other configuration properties.";break;case"WASBS":if(!l)return"Error: 'WASBS' type requires 'wasbs' property with account, container, and relativePath.";if(e||s||r||v||w||b||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'WASBS' type should only have 'wasbs' property, not other configuration properties.";break;case"REDSHIFT":if(!v)return"Error: 'REDSHIFT' type requires 'redshift' property with host, database, bucket, and relativePath.";if(e||s||r||l||w||b||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'REDSHIFT' type should only have 'redshift' property, not other configuration properties.";break;case"ELASTICSEARCH":if(!w)return"Error: 'ELASTICSEARCH' type requires 'elasticsearch' property with nodes.";if(e||s||r||l||v||b||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'ELASTICSEARCH' type should only have 'elasticsearch' property, not other configuration properties.";break;case"OPENSEARCH":if(!b)return"Error: 'OPENSEARCH' type requires 'opensearch' property with nodes.";if(e||s||r||l||v||w||g||d||h||c||S||m||f||t||a||y||x)return"Error: 'OPENSEARCH' type should only have 'opensearch' property, not other configuration properties.";break;case"EVENTHUB":if(!g)return"Error: 'EVENTHUB' type requires 'eventhub' property with endpoint.";if(e||s||r||l||v||w||b||d||h||c||S||m||f||t||a||y||x)return"Error: 'EVENTHUB' type should only have 'eventhub' property, not other configuration properties.";break;case"PULSAR":if(!d)return"Error: 'PULSAR' type requires 'pulsar' property with adminUrl and serviceUrl.";if(e||s||r||l||v||w||b||g||h||c||S||m||f||t||a||y||x)return"Error: 'PULSAR' type should only have 'pulsar' property, not other configuration properties.";break;case"BIGQUERY":if(!h)return"Error: 'BIGQUERY' type requires 'bigquery' property with project.";if(e||s||r||l||v||w||b||g||d||c||S||m||f||t||a||y||x)return"Error: 'BIGQUERY' type should only have 'bigquery' property, not other configuration properties.";break;case"GCS":if(!c)return"Error: 'GCS' type requires 'gcs' property with bucket and relativePath.";if(e||s||r||l||v||w||b||g||d||h||S||m||f||t||a||y||x)return"Error: 'GCS' type should only have 'gcs' property, not other configuration properties.";break;case"KAFKA":if(!S)return"Error: 'KAFKA' type requires 'kafka' property with brokers.";if(e||s||r||l||v||w||b||g||d||h||c||m||f||t||a||y||x)return"Error: 'KAFKA' type should only have 'kafka' property, not other configuration properties.";break;case"MONGODB":if(!m)return"Error: 'MONGODB' type requires 'mongodb' property with subprotocol and nodes.";if(e||s||r||l||v||w||b||g||d||h||c||S||f||t||a||y||x)return"Error: 'MONGODB' type should only have 'mongodb' property, not other configuration properties.";break;case"MYSQL":if(!f)return"Error: 'MYSQL' type requires 'mysql' property with host and port.";if(e||s||r||l||v||w||b||g||d||h||c||S||m||t||a||y||x)return"Error: 'MYSQL' type should only have 'mysql' property, not other configuration properties.";break;case"ORACLE":if(!t)return"Error: 'ORACLE' type requires 'oracle' property with subprotocol, host, port, and service.";if(e||s||r||l||v||w||b||g||d||h||c||S||m||f||a||y||x)return"Error: 'ORACLE' type should only have 'oracle' property, not other configuration properties.";break;case"POSTGRESQL":if(!a)return"Error: 'POSTGRESQL' type requires 'postgresql' property with host, port, and database.";if(e||s||r||l||v||w||b||g||d||h||c||S||m||f||t||y||x)return"Error: 'POSTGRESQL' type should only have 'postgresql' property, not other configuration properties.";break;case"SNOWFLAKE":if(!y)return"Error: 'SNOWFLAKE' type requires 'snowflake' property with warehouse, url, and database.";if(e||s||r||l||v||w||b||g||d||h||c||S||m||f||t||a||x)return"Error: 'SNOWFLAKE' type should only have 'snowflake' property, not other configuration properties.";break;case"MSSQL":if(!x)return"Error: 'MSSQL' type requires 'mssql' property with host, port, and database.";if(e||s||r||l||v||w||b||g||d||h||c||S||m||f||t||a||y)return"Error: 'MSSQL' type should only have 'mssql' property, not other configuration properties.";break;default:return`Error: Invalid depot type '${p}'. Supported types are: JDBC, S3, ABFSS, WASBS, REDSHIFT, ELASTICSEARCH, OPENSEARCH, EVENTHUB, PULSAR, BIGQUERY, GCS, KAFKA, MONGODB, MYSQL, ORACLE, POSTGRESQL, SNOWFLAKE, MSSQL.`}return null},bt={name:_zod.z.string({description:"Depot name - alphanumeric with hyphens, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9])"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the depot"}),secrets:_zod.z.array(_zod.z.object({name:_zod.z.string({description:"Reference name for the secret"}),keys:_zod.z.array(_zod.z.string(),{description:"Specific keys to extract from the secret"}).optional(),allkeys:_zod.z.boolean({description:"Whether to use all keys from the secret"})})).optional(),owner:_zod.z.string({description:"Owner of the depot resource"}).optional(),compute:_zod.z.string({description:"Compute resource for the depot (default: runnable-default)"}).optional(),depot:_zod.z.object({type:_zod.z.enum(["JDBC","S3","ABFSS","WASBS","REDSHIFT","ELASTICSEARCH","OPENSEARCH","EVENTHUB","PULSAR","BIGQUERY","GCS","KAFKA","MONGODB","MYSQL","ORACLE","POSTGRESQL","SNOWFLAKE","MSSQL"],{description:"Type of depot connection"}),spec:_zod.z.object({subprotocol:_zod.z.string({description:"JDBC subprotocol (e.g., 'postgresql', 'mysql', 'sqlserver', 'oracle:thin')"}),host:_zod.z.string({description:"Database server hostname or IP address"}),port:_zod.z.number({description:"Database server port number"}),database:_zod.z.string({description:"Database name to connect to"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., SSL settings)"}).optional()}).optional(),s3:_zod.z.object({scheme:_zod.z.enum(["s3a","s3"],{description:"S3 protocol scheme"}),bucket:_zod.z.string({description:"S3 bucket name"}),relativePath:_zod.z.string({description:"Path within the bucket"}),format:_zod.z.string({description:"Data format (e.g., 'ICEBERG', 'PARQUET', 'JSON', 'CSV')"}).optional()}).optional(),abfss:_zod.z.object({account:_zod.z.string({description:"Azure storage account name"}),container:_zod.z.string({description:"Container name within the storage account"}),relativePath:_zod.z.string({description:"Path within the container"}),format:_zod.z.string({description:"Data format (e.g., 'PARQUET', 'JSON', 'CSV')"}).optional()}).optional(),wasbs:_zod.z.object({account:_zod.z.string({description:"Azure storage account name"}),container:_zod.z.string({description:"Container name within the storage account"}),relativePath:_zod.z.string({description:"Path within the container"}),format:_zod.z.string({description:"Data format (e.g., 'PARQUET', 'JSON', 'CSV')"}).optional()}).optional(),redshift:_zod.z.object({host:_zod.z.string({description:"Redshift cluster endpoint"}),subprotocol:_zod.z.string({description:"JDBC subprotocol for Redshift"}).optional(),port:_zod.z.number({description:"Redshift port (default: 5439)"}).default(5439),database:_zod.z.string({description:"Redshift database name"}),bucket:_zod.z.string({description:"S3 bucket for data staging"}),relativePath:_zod.z.string({description:"Path within the S3 bucket"})}).optional(),elasticsearch:_zod.z.object({nodes:_zod.z.array(_zod.z.string(),{description:"List of Elasticsearch nodes (format: 'host:port')"})}).optional(),opensearch:_zod.z.object({nodes:_zod.z.array(_zod.z.string(),{description:"List of OpenSearch nodes (format: 'host:port')"})}).optional(),eventhub:_zod.z.object({endpoint:_zod.z.string({description:"Event Hub namespace endpoint (format: 'sb://namespace.servicebus.windows.net/')"})}).optional(),pulsar:_zod.z.object({adminUrl:_zod.z.string({description:"Pulsar admin API URL"}),serviceUrl:_zod.z.string({description:"Pulsar broker service URL"}),tenant:_zod.z.string({description:"Pulsar tenant name"}).optional()}).optional(),bigquery:_zod.z.object({project:_zod.z.string({description:"Google Cloud project ID"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional BigQuery connection parameters"}).optional()}).optional(),gcs:_zod.z.object({bucket:_zod.z.string({description:"GCS bucket name"}),relativePath:_zod.z.string({description:"Path within the bucket"})}).optional(),kafka:_zod.z.object({brokers:_zod.z.array(_zod.z.string(),{description:"List of Kafka broker addresses"}),schemaRegistryUrl:_zod.z.string({description:"Schema Registry URL for Avro/JSON schemas"}).optional()}).optional(),mongodb:_zod.z.object({subprotocol:_zod.z.string({description:"MongoDB connection protocol (e.g., 'mongodb+srv')"}),nodes:_zod.z.array(_zod.z.string(),{description:"MongoDB cluster nodes"})}).optional(),mysql:_zod.z.object({host:_zod.z.string({description:"MySQL server hostname"}),port:_zod.z.number({description:"MySQL server port (default: 3306)"}).default(3306),database:_zod.z.string({description:"MySQL database name"}).optional(),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., SSL settings)"}).optional()}).optional(),oracle:_zod.z.object({subprotocol:_zod.z.string({description:"Oracle JDBC subprotocol (e.g., 'oracle:thin')"}),host:_zod.z.string({description:"Oracle server hostname"}),port:_zod.z.number({description:"Oracle server port (default: 1521)"}).default(1521),service:_zod.z.string({description:"Oracle service name"})}).optional(),postgresql:_zod.z.object({host:_zod.z.string({description:"PostgreSQL server hostname"}),port:_zod.z.number({description:"PostgreSQL server port (default: 5432)"}).default(5432),database:_zod.z.string({description:"PostgreSQL database name"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., sslmode)"}).optional()}).optional(),snowflake:_zod.z.object({warehouse:_zod.z.string({description:"Snowflake warehouse name"}),url:_zod.z.string({description:"Snowflake account URL"}),database:_zod.z.string({description:"Snowflake database name"})}).optional(),mssql:_zod.z.object({host:_zod.z.string({description:"SQL Server hostname"}),port:_zod.z.number({description:"SQL Server port (default: 1433)"}).default(1433),database:_zod.z.string({description:"SQL Server database name"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., encrypt)"}).optional()}).optional()}),...ht,source:_zod.z.string({description:"Maps the depot to the metadata source name in Metis. Running a scanner job on this depot will save the metadata in Metis DB under the specified 'source' name. If this key-value property is not mentioned, the metadata will surface under the depot name on Metis UI."}).optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-depot.yaml"}),fileName:_zod.z.string({description:"File name for the depot file. example: my-depot.yaml"})},ve=n=>{n.tool("create-depot-source",ft,bt,async({name:p,tags:e,depot:s,owner:r,compute:l,source:v,description:w,external:b,connectionSecret:g,path:d,fileName:h})=>{let c=yt(s);if(c)return{content:[{type:"text",text:c}]};let S=_yaml.stringify.call(void 0, {name:p,tags:e,type:"depot",version:"v2alpha",layer:"user",...r&&{owner:r},...l&&{compute:l},...v&&{source:v},description:w,external:b,...g&&{connectionSecret:g},depot:s});try{let m=d.includes(".yaml")||d.includes(".yml")?d:d.endsWith("/")?d+h:`${d}/${h}`,f=_path2.default.dirname(m);return _fs.existsSync.call(void 0, f)||_fs.mkdirSync.call(void 0, f,{recursive:!0}),_fs.writeFileSync.call(void 0, m,S),{content:[{type:"text",text:`Depot source created successfully at ${m}`},{type:"text",text:`File content: ${S}`}]}}catch(m){return{content:[{type:"text",text:`Error creating depot source: ${m}`}]}}})};var xt=`Flare Workflow in DataOS is a declarative stack for large-scale data processing using Apache Spark. It provides comprehensive solutions for data ingestion, transformation, enrichment, profiling, quality assessment, and syndication on both batch and incremental data.
|
|
7
|
+
`,gt=_zod.z.object({acl:_zod.z.enum(["r","rw"],{description:"Access control level - 'r' for read-only, 'rw' for read-write"}),type:_zod.z.literal("key-value-properties").describe("Type of secret storage"),data:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Key-value pairs for authentication credentials (e.g., username, password, access keys)"}),files:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"File paths for credential files (e.g., JSON key files for GCP)"}).optional()}),ht={description:_zod.z.string({description:"Human-readable description of the depot"}),external:_zod.z.boolean({description:"Whether the depot connects to an external data source"}).default(!0),connectionSecret:_zod.z.array(gt).optional()},yt=s=>{let{type:l,spec:e,s3:a,abfss:c,wasbs:r,redshift:v,elasticsearch:S,opensearch:y,eventhub:g,pulsar:d,bigquery:m,gcs:p,kafka:w,mongodb:f,mysql:h,oracle:t,postgresql:n,snowflake:b,mssql:x}=s;switch(l){case"JDBC":if(!e)return"Error: 'JDBC' type requires 'spec' property with subprotocol, host, port, and database.";if(a||c||r||v||S||y||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'JDBC' type should only have 'spec' property, not other configuration properties.";break;case"S3":if(!a)return"Error: 'S3' type requires 's3' property with scheme, bucket, and relativePath.";if(e||c||r||v||S||y||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'S3' type should only have 's3' property, not other configuration properties.";break;case"ABFSS":if(!c)return"Error: 'ABFSS' type requires 'abfss' property with account, container, and relativePath.";if(e||a||r||v||S||y||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'ABFSS' type should only have 'abfss' property, not other configuration properties.";break;case"WASBS":if(!r)return"Error: 'WASBS' type requires 'wasbs' property with account, container, and relativePath.";if(e||a||c||v||S||y||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'WASBS' type should only have 'wasbs' property, not other configuration properties.";break;case"REDSHIFT":if(!v)return"Error: 'REDSHIFT' type requires 'redshift' property with host, database, bucket, and relativePath.";if(e||a||c||r||S||y||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'REDSHIFT' type should only have 'redshift' property, not other configuration properties.";break;case"ELASTICSEARCH":if(!S)return"Error: 'ELASTICSEARCH' type requires 'elasticsearch' property with nodes.";if(e||a||c||r||v||y||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'ELASTICSEARCH' type should only have 'elasticsearch' property, not other configuration properties.";break;case"OPENSEARCH":if(!y)return"Error: 'OPENSEARCH' type requires 'opensearch' property with nodes.";if(e||a||c||r||v||S||g||d||m||p||w||f||h||t||n||b||x)return"Error: 'OPENSEARCH' type should only have 'opensearch' property, not other configuration properties.";break;case"EVENTHUB":if(!g)return"Error: 'EVENTHUB' type requires 'eventhub' property with endpoint.";if(e||a||c||r||v||S||y||d||m||p||w||f||h||t||n||b||x)return"Error: 'EVENTHUB' type should only have 'eventhub' property, not other configuration properties.";break;case"PULSAR":if(!d)return"Error: 'PULSAR' type requires 'pulsar' property with adminUrl and serviceUrl.";if(e||a||c||r||v||S||y||g||m||p||w||f||h||t||n||b||x)return"Error: 'PULSAR' type should only have 'pulsar' property, not other configuration properties.";break;case"BIGQUERY":if(!m)return"Error: 'BIGQUERY' type requires 'bigquery' property with project.";if(e||a||c||r||v||S||y||g||d||p||w||f||h||t||n||b||x)return"Error: 'BIGQUERY' type should only have 'bigquery' property, not other configuration properties.";break;case"GCS":if(!p)return"Error: 'GCS' type requires 'gcs' property with bucket and relativePath.";if(e||a||c||r||v||S||y||g||d||m||w||f||h||t||n||b||x)return"Error: 'GCS' type should only have 'gcs' property, not other configuration properties.";break;case"KAFKA":if(!w)return"Error: 'KAFKA' type requires 'kafka' property with brokers.";if(e||a||c||r||v||S||y||g||d||m||p||f||h||t||n||b||x)return"Error: 'KAFKA' type should only have 'kafka' property, not other configuration properties.";break;case"MONGODB":if(!f)return"Error: 'MONGODB' type requires 'mongodb' property with subprotocol and nodes.";if(e||a||c||r||v||S||y||g||d||m||p||w||h||t||n||b||x)return"Error: 'MONGODB' type should only have 'mongodb' property, not other configuration properties.";break;case"MYSQL":if(!h)return"Error: 'MYSQL' type requires 'mysql' property with host and port.";if(e||a||c||r||v||S||y||g||d||m||p||w||f||t||n||b||x)return"Error: 'MYSQL' type should only have 'mysql' property, not other configuration properties.";break;case"ORACLE":if(!t)return"Error: 'ORACLE' type requires 'oracle' property with subprotocol, host, port, and service.";if(e||a||c||r||v||S||y||g||d||m||p||w||f||h||n||b||x)return"Error: 'ORACLE' type should only have 'oracle' property, not other configuration properties.";break;case"POSTGRESQL":if(!n)return"Error: 'POSTGRESQL' type requires 'postgresql' property with host, port, and database.";if(e||a||c||r||v||S||y||g||d||m||p||w||f||h||t||b||x)return"Error: 'POSTGRESQL' type should only have 'postgresql' property, not other configuration properties.";break;case"SNOWFLAKE":if(!b)return"Error: 'SNOWFLAKE' type requires 'snowflake' property with warehouse, url, and database.";if(e||a||c||r||v||S||y||g||d||m||p||w||f||h||t||n||x)return"Error: 'SNOWFLAKE' type should only have 'snowflake' property, not other configuration properties.";break;case"MSSQL":if(!x)return"Error: 'MSSQL' type requires 'mssql' property with host, port, and database.";if(e||a||c||r||v||S||y||g||d||m||p||w||f||h||t||n||b)return"Error: 'MSSQL' type should only have 'mssql' property, not other configuration properties.";break;default:return`Error: Invalid depot type '${l}'. Supported types are: JDBC, S3, ABFSS, WASBS, REDSHIFT, ELASTICSEARCH, OPENSEARCH, EVENTHUB, PULSAR, BIGQUERY, GCS, KAFKA, MONGODB, MYSQL, ORACLE, POSTGRESQL, SNOWFLAKE, MSSQL.`}return null},bt={name:_zod.z.string({description:"Depot name - alphanumeric with hyphens, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9])"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the depot"}),secrets:_zod.z.array(_zod.z.object({name:_zod.z.string({description:"Reference name for the secret"}),keys:_zod.z.array(_zod.z.string(),{description:"Specific keys to extract from the secret"}).optional(),allkeys:_zod.z.boolean({description:"Whether to use all keys from the secret"})})).optional(),owner:_zod.z.string({description:"Owner of the depot resource"}).optional(),compute:_zod.z.string({description:"Compute resource for the depot (default: runnable-default)"}).optional(),depot:_zod.z.object({type:_zod.z.enum(["JDBC","S3","ABFSS","WASBS","REDSHIFT","ELASTICSEARCH","OPENSEARCH","EVENTHUB","PULSAR","BIGQUERY","GCS","KAFKA","MONGODB","MYSQL","ORACLE","POSTGRESQL","SNOWFLAKE","MSSQL"],{description:"Type of depot connection"}),spec:_zod.z.object({subprotocol:_zod.z.string({description:"JDBC subprotocol (e.g., 'postgresql', 'mysql', 'sqlserver', 'oracle:thin')"}),host:_zod.z.string({description:"Database server hostname or IP address"}),port:_zod.z.number({description:"Database server port number"}),database:_zod.z.string({description:"Database name to connect to"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., SSL settings)"}).optional()}).optional(),s3:_zod.z.object({scheme:_zod.z.enum(["s3a","s3"],{description:"S3 protocol scheme"}),bucket:_zod.z.string({description:"S3 bucket name"}),relativePath:_zod.z.string({description:"Path within the bucket"}),format:_zod.z.string({description:"Data format (e.g., 'ICEBERG', 'PARQUET', 'JSON', 'CSV')"}).optional()}).optional(),abfss:_zod.z.object({account:_zod.z.string({description:"Azure storage account name"}),container:_zod.z.string({description:"Container name within the storage account"}),relativePath:_zod.z.string({description:"Path within the container"}),format:_zod.z.string({description:"Data format (e.g., 'PARQUET', 'JSON', 'CSV')"}).optional()}).optional(),wasbs:_zod.z.object({account:_zod.z.string({description:"Azure storage account name"}),container:_zod.z.string({description:"Container name within the storage account"}),relativePath:_zod.z.string({description:"Path within the container"}),format:_zod.z.string({description:"Data format (e.g., 'PARQUET', 'JSON', 'CSV')"}).optional()}).optional(),redshift:_zod.z.object({host:_zod.z.string({description:"Redshift cluster endpoint"}),subprotocol:_zod.z.string({description:"JDBC subprotocol for Redshift"}).optional(),port:_zod.z.number({description:"Redshift port (default: 5439)"}).default(5439),database:_zod.z.string({description:"Redshift database name"}),bucket:_zod.z.string({description:"S3 bucket for data staging"}),relativePath:_zod.z.string({description:"Path within the S3 bucket"})}).optional(),elasticsearch:_zod.z.object({nodes:_zod.z.array(_zod.z.string(),{description:"List of Elasticsearch nodes (format: 'host:port')"})}).optional(),opensearch:_zod.z.object({nodes:_zod.z.array(_zod.z.string(),{description:"List of OpenSearch nodes (format: 'host:port')"})}).optional(),eventhub:_zod.z.object({endpoint:_zod.z.string({description:"Event Hub namespace endpoint (format: 'sb://namespace.servicebus.windows.net/')"})}).optional(),pulsar:_zod.z.object({adminUrl:_zod.z.string({description:"Pulsar admin API URL"}),serviceUrl:_zod.z.string({description:"Pulsar broker service URL"}),tenant:_zod.z.string({description:"Pulsar tenant name"}).optional()}).optional(),bigquery:_zod.z.object({project:_zod.z.string({description:"Google Cloud project ID"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional BigQuery connection parameters"}).optional()}).optional(),gcs:_zod.z.object({bucket:_zod.z.string({description:"GCS bucket name"}),relativePath:_zod.z.string({description:"Path within the bucket"})}).optional(),kafka:_zod.z.object({brokers:_zod.z.array(_zod.z.string(),{description:"List of Kafka broker addresses"}),schemaRegistryUrl:_zod.z.string({description:"Schema Registry URL for Avro/JSON schemas"}).optional()}).optional(),mongodb:_zod.z.object({subprotocol:_zod.z.string({description:"MongoDB connection protocol (e.g., 'mongodb+srv')"}),nodes:_zod.z.array(_zod.z.string(),{description:"MongoDB cluster nodes"})}).optional(),mysql:_zod.z.object({host:_zod.z.string({description:"MySQL server hostname"}),port:_zod.z.number({description:"MySQL server port (default: 3306)"}).default(3306),database:_zod.z.string({description:"MySQL database name"}).optional(),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., SSL settings)"}).optional()}).optional(),oracle:_zod.z.object({subprotocol:_zod.z.string({description:"Oracle JDBC subprotocol (e.g., 'oracle:thin')"}),host:_zod.z.string({description:"Oracle server hostname"}),port:_zod.z.number({description:"Oracle server port (default: 1521)"}).default(1521),service:_zod.z.string({description:"Oracle service name"})}).optional(),postgresql:_zod.z.object({host:_zod.z.string({description:"PostgreSQL server hostname"}),port:_zod.z.number({description:"PostgreSQL server port (default: 5432)"}).default(5432),database:_zod.z.string({description:"PostgreSQL database name"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., sslmode)"}).optional()}).optional(),snowflake:_zod.z.object({warehouse:_zod.z.string({description:"Snowflake warehouse name"}),url:_zod.z.string({description:"Snowflake account URL"}),database:_zod.z.string({description:"Snowflake database name"})}).optional(),mssql:_zod.z.object({host:_zod.z.string({description:"SQL Server hostname"}),port:_zod.z.number({description:"SQL Server port (default: 1433)"}).default(1433),database:_zod.z.string({description:"SQL Server database name"}),params:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional connection parameters (e.g., encrypt)"}).optional()}).optional()}),...ht,source:_zod.z.string({description:"Maps the depot to the metadata source name in Metis. Running a scanner job on this depot will save the metadata in Metis DB under the specified 'source' name. If this key-value property is not mentioned, the metadata will surface under the depot name on Metis UI."}).optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-depot.yaml"}),fileName:_zod.z.string({description:"File name for the depot file. example: my-depot.yaml"})},ve=s=>{s.tool("create-depot-source",ft,bt,async({name:l,tags:e,depot:a,owner:c,compute:r,source:v,description:S,external:y,connectionSecret:g,path:d,fileName:m})=>{let p=yt(a);if(p)return{content:[{type:"text",text:p}]};let w=_yaml.stringify.call(void 0, {name:l,tags:e,type:"depot",version:"v2alpha",layer:"user",...c&&{owner:c},...r&&{compute:r},...v&&{source:v},description:S,external:y,...g&&{connectionSecret:g},depot:a});try{let f=d.includes(".yaml")||d.includes(".yml")?d:d.endsWith("/")?d+m:`${d}/${m}`,h=_path2.default.dirname(f);return _fs.existsSync.call(void 0, h)||_fs.mkdirSync.call(void 0, h,{recursive:!0}),_fs.writeFileSync.call(void 0, f,w),{content:[{type:"text",text:`Depot source created successfully at ${f}`},{type:"text",text:`File content: ${w}`}]}}catch(f){return{content:[{type:"text",text:`Error creating depot source: ${f}`}]}}})};var xt=`Flare Workflow in DataOS is a declarative stack for large-scale data processing using Apache Spark. It provides comprehensive solutions for data ingestion, transformation, enrichment, profiling, quality assessment, and syndication on both batch and incremental data.
|
|
8
8
|
|
|
9
9
|
Flare Workflows support multiple job types:
|
|
10
10
|
- **Batch Jobs**: Process all data in each run, ensuring consistent performance. Ideal for small to medium datasets
|
|
@@ -26,12 +26,12 @@ Examples:
|
|
|
26
26
|
- Data quality validation and monitoring
|
|
27
27
|
- Cross-platform data syndication
|
|
28
28
|
- Advanced analytics and feature engineering
|
|
29
|
-
`,_t=_zod.z.object({coreLimit:_zod.z.string().default("1000m").describe("CPU core limit for the driver (e.g., '1000m', '2000m'). 1000m = 1 CPU core"),cores:_zod.z.number().min(1).default(1).describe("Number of CPU cores allocated to the driver"),memory:_zod.z.string().default("1024m").describe("Memory allocation for the driver (e.g., '1024m', '2Gi')")}).optional().describe("Driver resource configuration for Spark driver"),At=_zod.z.object({coreLimit:_zod.z.string().default("1000m").describe("CPU core limit for each executor (e.g., '1000m', '2000m')"),cores:_zod.z.number().min(1).default(1).describe("Number of CPU cores for each executor"),instances:_zod.z.number().min(1).default(1).describe("Number of executor instances for parallel processing"),memory:_zod.z.string().default("1024m").describe("Memory allocation for each executor (e.g., '1024m', '4Gi')")}).optional().describe("Executor resource configuration for Spark executors"),Lt=_zod.z.object({context:_zod.z.string().describe("Context name for incremental processing (e.g., 'incrinput', 'customer_incremental')"),sql:_zod.z.string().describe("SQL query with placeholders for incremental keys (e.g., 'SELECT * FROM incrinput WHERE date BETWEEN $|start_date| AND $|end_date|')"),keys:_zod.z.array(_zod.z.object({name:_zod.z.string().describe("Name of the incremental key (e.g., 'start_date', 'end_date', 'last_updated_id')"),sql:_zod.z.string().describe("SQL query to obtain the key value (e.g., 'SELECT MAX(date) FROM target_table')")})).describe("Incremental keys for tracking data changes"),state:_zod.z.array(_zod.z.object({key:_zod.z.string().describe("State key name for persistence"),value:_zod.z.string().describe("State value reference for next run")})).describe("State management for incremental processing")}).describe("Incremental load configuration for processing only changed data"),Tt=_zod.z.object({name:_zod.z.string().describe("Reference name for the input dataset (used in SQL queries and transformations)"),dataset:_zod.z.string().describe("Dataset address in UDL format (e.g., 'dataos://lakehouse:retail/customers', 'dataos://s3-depot:bucket/path', 'dataos://bigquery-depot:project.dataset.table')"),query:_zod.z.string().optional().describe(`SQL query for data filtering and selection (e.g., 'SELECT * FROM table WHERE status = "active"')`),format:_zod.z.enum(["iceberg","csv","json","parquet","avro","delta","excel","xml","kafka","pulsar","eventhub","elasticsearch","opensearch","bigquery","snowflake","redshift","mysql","postgresql","oracle","mssql","mongodb"]).optional().describe("Data format of the source dataset"),isStream:_zod.z.boolean().default(!1).describe("Flag indicating if this is a streaming dataset (required for Kafka, Pulsar, EventHub)"),schemaType:_zod.z.enum(["avro","json"]).default("avro").optional().describe("Schema type for the dataset"),schemaPath:_zod.z.string().optional().describe("DataOS address to schema file (e.g., 'dataos://thirdparty:schemas/customer.avsc')"),schemaString:_zod.z.string().optional().describe("Inline schema definition (Avro JSON or Spark struct)"),schemaSubject:_zod.z.string().optional().describe("Subject name for schema registry (for Kafka/Pulsar with schema registry)"),schemaId:_zod.z.number().optional().describe("Schema ID in schema registry (specific version of schema)"),incremental:Lt.optional(),options:_zod.z.record(_zod.z.string(),_zod.z.any()).optional().describe("Additional data load options (branch, connection settings, partitioning, etc.)")}).describe("Input dataset configuration with comprehensive format support"),qt=_zod.z.object({name:_zod.z.string().describe("Reference name for the output dataset (must match transformation step name)"),dataset:_zod.z.string().describe("Dataset address in UDL format with write permissions (e.g., 'dataos://lakehouse:retail/processed_data?acl=rw')"),format:_zod.z.enum(["iceberg","parquet","csv","json","avro","delta","excel","kafka","pulsar","eventhub","elasticsearch","opensearch","bigquery","snowflake","redshift","mysql","postgresql","oracle","mssql"]).optional().describe("Output format for the dataset"),driver:_zod.z.string().optional().describe("JDBC driver class for database outputs (e.g., 'org.postgresql.Driver')"),title:_zod.z.string().optional().describe("Human-readable title for the output dataset"),description:_zod.z.string().optional().describe("Description of the output dataset content and purpose"),tags:_zod.z.array(_zod.z.string()).optional().describe("Tags for categorizing and discovering the output dataset"),options:_zod.z.object({saveMode:_zod.z.enum(["overwrite","append","ignore","error"]).default("overwrite").describe("Save mode: overwrite (replace), append (add), ignore (skip if exists), error (fail if exists)"),extraOptions:_zod.z.record(_zod.z.string(),_zod.z.any()).optional().describe("Format-specific options (warehouse settings, compression, etc.)"),compressionType:_zod.z.enum(["gzip","snappy","lz4","zstd"]).optional().describe("Compression type for the output data"),sort:_zod.z.object({mode:_zod.z.enum(["partition","global"]).describe("Sort mode: partition (within partitions), global (across all data)"),columns:_zod.z.array(_zod.z.object({name:_zod.z.string().describe("Column name to sort by"),order:_zod.z.enum(["asc","desc"]).default("asc").describe("Sort order: ascending or descending")})).describe("Columns to sort by with their order")}).optional().describe("Sorting configuration for optimized data layout"),iceberg:_zod.z.object({merge:_zod.z.object({onClause:_zod.z.string().describe("Join condition for merge operation (e.g., 'old.id = new.id')"),whenClause:_zod.z.string().describe("Merge conditions and actions (e.g., 'matched then update set * when not matched then insert *')")}).optional().describe("Iceberg merge configuration for upsert operations"),properties:_zod.z.record(_zod.z.string(),_zod.z.string()).optional().describe("Iceberg table properties (e.g., 'write.format.default': 'parquet')"),partitionSpec:_zod.z.array(_zod.z.object({type:_zod.z.enum(["identity","bucket","truncate","year","month","day","hour"]).describe("Partition transform type"),column:_zod.z.string().describe("Column to partition on"),asColumn:_zod.z.string().optional().describe("Alias for the partition column"),numBuckets:_zod.z.number().optional().describe("Number of buckets for bucket partitioning"),width:_zod.z.number().optional().describe("Width for truncate partitioning")})).optional().describe("Iceberg partitioning specification for query optimization")}).optional().describe("Iceberg-specific configuration options")}).optional()}).describe("Output dataset configuration with advanced options"),Ct=_zod.z.object({name:_zod.z.enum(["set_type","change_case","any_date","split","concat","replace","trim","substring","regex_extract","hash","encrypt","decrypt","pivot","unpivot","explode","collect","window","rank"]).describe("Flare function name for data transformation"),columns:_zod.z.record(_zod.z.string(),_zod.z.any()).optional().describe("Column-specific function parameters and configurations")}).describe("Flare built-in transformation function"),Dt=_zod.z.object({name:_zod.z.enum(["persist","cache","broadcast","repartition","coalesce"]).describe("Command for data optimization"),sequenceName:_zod.z.string().optional().describe("Sequence name to apply command to"),mode:_zod.z.enum(["MEMORY_ONLY","MEMORY_AND_DISK","DISK_ONLY"]).optional().describe("Persistence mode for caching"),numPartitions:_zod.z.number().optional().describe("Number of partitions for repartitioning")}).describe("Data optimization command"),jt=_zod.z.object({sequence:_zod.z.array(_zod.z.object({name:_zod.z.string().describe("Name of the transformation step (must be unique within job)"),doc:_zod.z.string().optional().describe("Documentation describing the purpose of this transformation step"),sql:_zod.z.string().describe("SQL query for data transformation (can reference input datasets and previous steps)"),classpath:_zod.z.string().optional().describe("Custom Java/Scala class for advanced transformations"),functions:_zod.z.array(Ct).optional().describe("Flare built-in functions to apply after SQL transformation"),commands:_zod.z.array(Dt).optional().describe("Performance optimization commands")})).describe("Sequence of transformation operations")}).describe("Data transformation step with SQL, functions, and commands"),Rt=n=>{let{type:p,column:e,filter:s,validFormat:r,tests:l,sql:v}=n;switch(p){case"column":if(!e)return"Error: 'column' type requires 'column' property with column name to test.";if(!l||l.length===0)return"Error: 'column' type requires 'tests' property with test expressions.";if(v)return"Error: 'column' type should not have 'sql' property.";break;case"sql":if(!v)return"Error: 'sql' type requires 'sql' property with custom SQL query.";if(!l||l.length===0)return"Error: 'sql' type requires 'tests' property with test expressions.";if(e||s||r)return"Error: 'sql' type should not have column-specific properties ('column', 'filter', 'validFormat').";break;default:return`Error: Invalid assertion test type '${p}'. Supported types are: column, sql.`}return null},$t=_zod.z.object({type:_zod.z.enum(["column","sql"],{description:"Type of assertion test: 'column' for column-based tests, 'sql' for custom SQL-based tests"}),column:_zod.z.string({description:"Column name to test"}).optional(),filter:_zod.z.string({description:`SQL filter condition (e.g., 'brand_name == "Nike"')`}).optional(),validFormat:_zod.z.object({regex:_zod.z.string({description:"Regular expression pattern for format validation"})}).optional(),sql:_zod.z.string({description:"Custom SQL query for complex validation"}).optional(),tests:_zod.z.array(_zod.z.string(),{description:"Test expressions (e.g., 'avg > 1000', 'max < 5000', 'missing_count < 10')"})}).describe("Data quality assertion configuration"),Nt={name:_zod.z.string().describe("Flare workflow name - alphanumeric with hyphens, max 48 chars"),tags:_zod.z.array(_zod.z.string()).describe("Tags for categorizing and organizing the workflow"),description:_zod.z.string().describe("Human-readable description of the workflow purpose and data processing logic"),title:_zod.z.string().optional().describe("Display title for the workflow"),owner:_zod.z.string().optional().describe("Owner of the workflow resource"),workspace:_zod.z.string().default("public").describe("Workspace where the workflow will be deployed"),compute:_zod.z.string().default("runnable-default").describe("Compute resource for the workflow (e.g., 'runnable-default', 'runnable-large')")},It={...Nt,jobType:_zod.z.enum(["batch","incremental","data-quality","data-profiling"]).describe("Type of Flare job - batch: process all data, incremental: process only changes, data-quality: validate with assertions, data-profiling: analyze data structure"),stack:_zod.z.enum(["flare:6.0","flare:5.0"]).default("flare:6.0").describe("Flare stack version"),tier:_zod.z.enum(["connect","transform","premium"]).optional().describe("Compute tier for the job"),driver:_t,executor:At,logLevel:_zod.z.enum(["DEBUG","INFO","WARN","ERROR"]).default("INFO").describe("Application log level for debugging and monitoring"),explain:_zod.z.boolean().default(!1).describe("Flag to print Spark logical/physical plans for debugging"),inputs:_zod.z.array(Tt).describe("Input datasets for the workflow to process"),outputs:_zod.z.array(qt).optional().describe("Output datasets where processed data will be written (not required for data-profiling jobs)"),steps:_zod.z.array(jt).optional().describe("Data transformation steps to apply between inputs and outputs"),assertions:_zod.z.array($t).optional().describe("Data quality assertions for validation (required for data-quality jobs)"),scheduling:_zod.z.object({cron:_zod.z.string().describe("Cron expression for scheduled execution (e.g., '0 0 * * *' for daily)"),timezone:_zod.z.string().default("UTC").describe("Timezone for cron schedule"),retries:_zod.z.number().default(3).describe("Number of retry attempts on failure"),concurrency:_zod.z.enum(["forbid","allow","replace"]).default("forbid").describe("Concurrent execution policy")}).optional().describe("Scheduling configuration for automated workflow execution"),dependencies:_zod.z.object({datasets:_zod.z.array(_zod.z.string()).optional().describe("Dataset dependencies that must exist before execution"),workflows:_zod.z.array(_zod.z.string()).optional().describe("Workflow dependencies that must complete before execution"),services:_zod.z.array(_zod.z.string()).optional().describe("Service dependencies that must be available")}).optional().describe("Workflow dependencies and prerequisites"),notifications:_zod.z.object({email:_zod.z.array(_zod.z.string()).optional().describe("Email addresses for notifications"),slack:_zod.z.string().optional().describe("Slack webhook URL for notifications"),teams:_zod.z.string().optional().describe("Microsoft Teams webhook URL"),conditions:_zod.z.array(_zod.z.enum(["success","failure","retry","timeout"])).default(["failure"]).describe("Conditions that trigger notifications")}).optional().describe("Notification configuration for workflow events"),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-flare.yaml"}),fileName:_zod.z.string({description:"File name for the flare workflow file. example: my-flare.yaml"})},we=n=>{n.tool("create-flare-workflow",xt,It,async({name:p,tags:e,description:s,title:r,owner:l,workspace:v,compute:w,jobType:b,stack:g,tier:d,driver:h,executor:c,logLevel:S,explain:m,inputs:f,outputs:t,steps:a,assertions:y,scheduling:x,dependencies:C,notifications:T,path:N,fileName:_})=>{if(y&&y.length>0)for(let Q of y){let V=Rt(Q);if(V)return{content:[{type:"text",text:V}]}}let B={logLevel:S,explain:m,inputs:f};b!=="data-profiling"&&t&&(B.outputs=t),a&&a.length>0&&(B.steps=a),b==="data-quality"&&y&&(B.assertions=y);let z={job:B};h&&(z.driver=h),c&&(z.executor=c);let G={name:p,version:"v1",type:"workflow",tags:e,description:s,...l&&{owner:l},...v&&v!=="public"&&{workspace:v},workflow:{title:r||`${p} - ${b} job`,dag:[{name:`${p}-${b}-job`,title:`${b.charAt(0).toUpperCase()+b.slice(1)} Job`,description:`${b} job for ${s}`,spec:{tags:[...e,b],stack:g,compute:w,...d&&{tier:d},stackSpec:z}}]}};x&&(G.schedule={cron:x.cron,timezone:x.timezone,...x.retries&&{retries:x.retries},concurrency:x.concurrency}),C&&(G.dependencies=C),T&&(G.notifications=T);let re=_yaml.stringify.call(void 0, G);try{let Q=N.includes(".yaml")||N.includes(".yml")?N:N.endsWith("/")?N+_:`${N}/${_}`,V=_path2.default.dirname(Q);return _fs.existsSync.call(void 0, V)||_fs.mkdirSync.call(void 0, V,{recursive:!0}),_fs.writeFileSync.call(void 0, Q,re),{content:[{type:"text",text:`Flare workflow created successfully at ${Q}`},{type:"text",text:`File content: ${re}`}]}}catch(Q){return{content:[{type:"text",text:`Error creating flare workflow: ${Q}`}]}}})};var Bt=`An Instance Secret is a DataOS Resource(https://dataos.info/resources/) designed for securely storing sensitive information at the DataOS Instance-level.
|
|
29
|
+
`,_t=_zod.z.object({coreLimit:_zod.z.string().default("1000m").describe("CPU core limit for the driver (e.g., '1000m', '2000m'). 1000m = 1 CPU core"),cores:_zod.z.number().min(1).default(1).describe("Number of CPU cores allocated to the driver"),memory:_zod.z.string().default("1024m").describe("Memory allocation for the driver (e.g., '1024m', '2Gi')")}).optional().describe("Driver resource configuration for Spark driver"),At=_zod.z.object({coreLimit:_zod.z.string().default("1000m").describe("CPU core limit for each executor (e.g., '1000m', '2000m')"),cores:_zod.z.number().min(1).default(1).describe("Number of CPU cores for each executor"),instances:_zod.z.number().min(1).default(1).describe("Number of executor instances for parallel processing"),memory:_zod.z.string().default("1024m").describe("Memory allocation for each executor (e.g., '1024m', '4Gi')")}).optional().describe("Executor resource configuration for Spark executors"),Tt=_zod.z.object({context:_zod.z.string().describe("Context name for incremental processing (e.g., 'incrinput', 'customer_incremental')"),sql:_zod.z.string().describe("SQL query with placeholders for incremental keys (e.g., 'SELECT * FROM incrinput WHERE date BETWEEN $|start_date| AND $|end_date|')"),keys:_zod.z.array(_zod.z.object({name:_zod.z.string().describe("Name of the incremental key (e.g., 'start_date', 'end_date', 'last_updated_id')"),sql:_zod.z.string().describe("SQL query to obtain the key value (e.g., 'SELECT MAX(date) FROM target_table')")})).describe("Incremental keys for tracking data changes"),state:_zod.z.array(_zod.z.object({key:_zod.z.string().describe("State key name for persistence"),value:_zod.z.string().describe("State value reference for next run")})).describe("State management for incremental processing")}).describe("Incremental load configuration for processing only changed data"),Lt=_zod.z.object({name:_zod.z.string().describe("Reference name for the input dataset (used in SQL queries and transformations)"),dataset:_zod.z.string().describe("Dataset address in UDL format (e.g., 'dataos://lakehouse:retail/customers', 'dataos://s3-depot:bucket/path', 'dataos://bigquery-depot:project.dataset.table')"),query:_zod.z.string().optional().describe(`SQL query for data filtering and selection (e.g., 'SELECT * FROM table WHERE status = "active"')`),format:_zod.z.enum(["iceberg","csv","json","parquet","avro","delta","excel","xml","kafka","pulsar","eventhub","elasticsearch","opensearch","bigquery","snowflake","redshift","mysql","postgresql","oracle","mssql","mongodb"]).optional().describe("Data format of the source dataset"),isStream:_zod.z.boolean().default(!1).describe("Flag indicating if this is a streaming dataset (required for Kafka, Pulsar, EventHub)"),schemaType:_zod.z.enum(["avro","json"]).default("avro").optional().describe("Schema type for the dataset"),schemaPath:_zod.z.string().optional().describe("DataOS address to schema file (e.g., 'dataos://thirdparty:schemas/customer.avsc')"),schemaString:_zod.z.string().optional().describe("Inline schema definition (Avro JSON or Spark struct)"),schemaSubject:_zod.z.string().optional().describe("Subject name for schema registry (for Kafka/Pulsar with schema registry)"),schemaId:_zod.z.number().optional().describe("Schema ID in schema registry (specific version of schema)"),incremental:Tt.optional(),options:_zod.z.record(_zod.z.string(),_zod.z.any()).optional().describe("Additional data load options (branch, connection settings, partitioning, etc.)")}).describe("Input dataset configuration with comprehensive format support"),Ct=_zod.z.object({name:_zod.z.string().describe("Reference name for the output dataset (must match transformation step name)"),dataset:_zod.z.string().describe("Dataset address in UDL format with write permissions (e.g., 'dataos://lakehouse:retail/processed_data?acl=rw')"),format:_zod.z.enum(["iceberg","parquet","csv","json","avro","delta","excel","kafka","pulsar","eventhub","elasticsearch","opensearch","bigquery","snowflake","redshift","mysql","postgresql","oracle","mssql"]).optional().describe("Output format for the dataset"),driver:_zod.z.string().optional().describe("JDBC driver class for database outputs (e.g., 'org.postgresql.Driver')"),title:_zod.z.string().optional().describe("Human-readable title for the output dataset"),description:_zod.z.string().optional().describe("Description of the output dataset content and purpose"),tags:_zod.z.array(_zod.z.string()).optional().describe("Tags for categorizing and discovering the output dataset"),options:_zod.z.object({saveMode:_zod.z.enum(["overwrite","append","ignore","error"]).default("overwrite").describe("Save mode: overwrite (replace), append (add), ignore (skip if exists), error (fail if exists)"),extraOptions:_zod.z.record(_zod.z.string(),_zod.z.any()).optional().describe("Format-specific options (warehouse settings, compression, etc.)"),compressionType:_zod.z.enum(["gzip","snappy","lz4","zstd"]).optional().describe("Compression type for the output data"),sort:_zod.z.object({mode:_zod.z.enum(["partition","global"]).describe("Sort mode: partition (within partitions), global (across all data)"),columns:_zod.z.array(_zod.z.object({name:_zod.z.string().describe("Column name to sort by"),order:_zod.z.enum(["asc","desc"]).default("asc").describe("Sort order: ascending or descending")})).describe("Columns to sort by with their order")}).optional().describe("Sorting configuration for optimized data layout"),iceberg:_zod.z.object({merge:_zod.z.object({onClause:_zod.z.string().describe("Join condition for merge operation (e.g., 'old.id = new.id')"),whenClause:_zod.z.string().describe("Merge conditions and actions (e.g., 'matched then update set * when not matched then insert *')")}).optional().describe("Iceberg merge configuration for upsert operations"),properties:_zod.z.record(_zod.z.string(),_zod.z.string()).optional().describe("Iceberg table properties (e.g., 'write.format.default': 'parquet')"),partitionSpec:_zod.z.array(_zod.z.object({type:_zod.z.enum(["identity","bucket","truncate","year","month","day","hour"]).describe("Partition transform type"),column:_zod.z.string().describe("Column to partition on"),asColumn:_zod.z.string().optional().describe("Alias for the partition column"),numBuckets:_zod.z.number().optional().describe("Number of buckets for bucket partitioning"),width:_zod.z.number().optional().describe("Width for truncate partitioning")})).optional().describe("Iceberg partitioning specification for query optimization")}).optional().describe("Iceberg-specific configuration options")}).optional()}).describe("Output dataset configuration with advanced options"),qt=_zod.z.object({name:_zod.z.enum(["set_type","change_case","any_date","split","concat","replace","trim","substring","regex_extract","hash","encrypt","decrypt","pivot","unpivot","explode","collect","window","rank"]).describe("Flare function name for data transformation"),columns:_zod.z.record(_zod.z.string(),_zod.z.any()).optional().describe("Column-specific function parameters and configurations")}).describe("Flare built-in transformation function"),Dt=_zod.z.object({name:_zod.z.enum(["persist","cache","broadcast","repartition","coalesce"]).describe("Command for data optimization"),sequenceName:_zod.z.string().optional().describe("Sequence name to apply command to"),mode:_zod.z.enum(["MEMORY_ONLY","MEMORY_AND_DISK","DISK_ONLY"]).optional().describe("Persistence mode for caching"),numPartitions:_zod.z.number().optional().describe("Number of partitions for repartitioning")}).describe("Data optimization command"),Rt=_zod.z.object({sequence:_zod.z.array(_zod.z.object({name:_zod.z.string().describe("Name of the transformation step (must be unique within job)"),doc:_zod.z.string().optional().describe("Documentation describing the purpose of this transformation step"),sql:_zod.z.string().describe("SQL query for data transformation (can reference input datasets and previous steps)"),classpath:_zod.z.string().optional().describe("Custom Java/Scala class for advanced transformations"),functions:_zod.z.array(qt).optional().describe("Flare built-in functions to apply after SQL transformation"),commands:_zod.z.array(Dt).optional().describe("Performance optimization commands")})).describe("Sequence of transformation operations")}).describe("Data transformation step with SQL, functions, and commands"),jt=s=>{let{type:l,column:e,filter:a,validFormat:c,tests:r,sql:v}=s;switch(l){case"column":if(!e)return"Error: 'column' type requires 'column' property with column name to test.";if(!r||r.length===0)return"Error: 'column' type requires 'tests' property with test expressions.";if(v)return"Error: 'column' type should not have 'sql' property.";break;case"sql":if(!v)return"Error: 'sql' type requires 'sql' property with custom SQL query.";if(!r||r.length===0)return"Error: 'sql' type requires 'tests' property with test expressions.";if(e||a||c)return"Error: 'sql' type should not have column-specific properties ('column', 'filter', 'validFormat').";break;default:return`Error: Invalid assertion test type '${l}'. Supported types are: column, sql.`}return null},It=_zod.z.object({type:_zod.z.enum(["column","sql"],{description:"Type of assertion test: 'column' for column-based tests, 'sql' for custom SQL-based tests"}),column:_zod.z.string({description:"Column name to test"}).optional(),filter:_zod.z.string({description:`SQL filter condition (e.g., 'brand_name == "Nike"')`}).optional(),validFormat:_zod.z.object({regex:_zod.z.string({description:"Regular expression pattern for format validation"})}).optional(),sql:_zod.z.string({description:"Custom SQL query for complex validation"}).optional(),tests:_zod.z.array(_zod.z.string(),{description:"Test expressions (e.g., 'avg > 1000', 'max < 5000', 'missing_count < 10')"})}).describe("Data quality assertion configuration"),Nt={name:_zod.z.string().describe("Flare workflow name - alphanumeric with hyphens, max 48 chars"),tags:_zod.z.array(_zod.z.string()).describe("Tags for categorizing and organizing the workflow"),description:_zod.z.string().describe("Human-readable description of the workflow purpose and data processing logic"),title:_zod.z.string().optional().describe("Display title for the workflow"),owner:_zod.z.string().optional().describe("Owner of the workflow resource"),workspace:_zod.z.string().default("public").describe("Workspace where the workflow will be deployed"),compute:_zod.z.string().default("runnable-default").describe("Compute resource for the workflow (e.g., 'runnable-default', 'runnable-large')")},$t={...Nt,jobType:_zod.z.enum(["batch","incremental","data-quality","data-profiling"]).describe("Type of Flare job - batch: process all data, incremental: process only changes, data-quality: validate with assertions, data-profiling: analyze data structure"),stack:_zod.z.enum(["flare:6.0","flare:5.0"]).default("flare:6.0").describe("Flare stack version"),tier:_zod.z.enum(["connect","transform","premium"]).optional().describe("Compute tier for the job"),driver:_t,executor:At,logLevel:_zod.z.enum(["DEBUG","INFO","WARN","ERROR"]).default("INFO").describe("Application log level for debugging and monitoring"),explain:_zod.z.boolean().default(!1).describe("Flag to print Spark logical/physical plans for debugging"),inputs:_zod.z.array(Lt).describe("Input datasets for the workflow to process"),outputs:_zod.z.array(Ct).optional().describe("Output datasets where processed data will be written (not required for data-profiling jobs)"),steps:_zod.z.array(Rt).optional().describe("Data transformation steps to apply between inputs and outputs"),assertions:_zod.z.array(It).optional().describe("Data quality assertions for validation (required for data-quality jobs)"),scheduling:_zod.z.object({cron:_zod.z.string().describe("Cron expression for scheduled execution (e.g., '0 0 * * *' for daily)"),timezone:_zod.z.string().default("UTC").describe("Timezone for cron schedule"),retries:_zod.z.number().default(3).describe("Number of retry attempts on failure"),concurrency:_zod.z.enum(["forbid","allow","replace"]).default("forbid").describe("Concurrent execution policy")}).optional().describe("Scheduling configuration for automated workflow execution"),dependencies:_zod.z.object({datasets:_zod.z.array(_zod.z.string()).optional().describe("Dataset dependencies that must exist before execution"),workflows:_zod.z.array(_zod.z.string()).optional().describe("Workflow dependencies that must complete before execution"),services:_zod.z.array(_zod.z.string()).optional().describe("Service dependencies that must be available")}).optional().describe("Workflow dependencies and prerequisites"),notifications:_zod.z.object({email:_zod.z.array(_zod.z.string()).optional().describe("Email addresses for notifications"),slack:_zod.z.string().optional().describe("Slack webhook URL for notifications"),teams:_zod.z.string().optional().describe("Microsoft Teams webhook URL"),conditions:_zod.z.array(_zod.z.enum(["success","failure","retry","timeout"])).default(["failure"]).describe("Conditions that trigger notifications")}).optional().describe("Notification configuration for workflow events"),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-flare.yaml"}),fileName:_zod.z.string({description:"File name for the flare workflow file. example: my-flare.yaml"})},Se=s=>{s.tool("create-flare-workflow",xt,$t,async({name:l,tags:e,description:a,title:c,owner:r,workspace:v,compute:S,jobType:y,stack:g,tier:d,driver:m,executor:p,logLevel:w,explain:f,inputs:h,outputs:t,steps:n,assertions:b,scheduling:x,dependencies:q,notifications:L,path:N,fileName:_})=>{if(b&&b.length>0)for(let Q of b){let V=jt(Q);if(V)return{content:[{type:"text",text:V}]}}let B={logLevel:w,explain:f,inputs:h};y!=="data-profiling"&&t&&(B.outputs=t),n&&n.length>0&&(B.steps=n),y==="data-quality"&&b&&(B.assertions=b);let z={job:B};m&&(z.driver=m),p&&(z.executor=p);let G={name:l,version:"v1",type:"workflow",tags:e,description:a,...r&&{owner:r},...v&&v!=="public"&&{workspace:v},workflow:{title:c||`${l} - ${y} job`,dag:[{name:`${l}-${y}-job`,title:`${y.charAt(0).toUpperCase()+y.slice(1)} Job`,description:`${y} job for ${a}`,spec:{tags:[...e,y],stack:g,compute:S,...d&&{tier:d},stackSpec:z}}]}};x&&(G.schedule={cron:x.cron,timezone:x.timezone,...x.retries&&{retries:x.retries},concurrency:x.concurrency}),q&&(G.dependencies=q),L&&(G.notifications=L);let re=_yaml.stringify.call(void 0, G);try{let Q=N.includes(".yaml")||N.includes(".yml")?N:N.endsWith("/")?N+_:`${N}/${_}`,V=_path2.default.dirname(Q);return _fs.existsSync.call(void 0, V)||_fs.mkdirSync.call(void 0, V,{recursive:!0}),_fs.writeFileSync.call(void 0, Q,re),{content:[{type:"text",text:`Flare workflow created successfully at ${Q}`},{type:"text",text:`File content: ${re}`}]}}catch(Q){return{content:[{type:"text",text:`Error creating flare workflow: ${Q}`}]}}})};var Bt=`An Instance Secret is a DataOS Resource(https://dataos.info/resources/) designed for securely storing sensitive information at the DataOS Instance-level.
|
|
30
30
|
The primary purpose of an Instance Secret is to address the inherent exposure risk associated with directly embedding such confidential data within application code or manifest files (YAML configuration files).
|
|
31
31
|
This includes usernames, passwords, certificates, tokens, and keys. Instance Secrets establish a critical segregation between sensitive data and Resource definitions. This division minimizes inadvertent exposure during various Resource management phases, including creation, viewing, or editing.
|
|
32
32
|
|
|
33
33
|
Complete configuration details: https://dataos.info/resources/instance_secret/configurations/
|
|
34
|
-
`,zt={database:{username:"${DATABASE_USERNAME}",password:"${DATABASE_PASSWORD}"},aws:{accesskeyid:"${AWS_ACCESS_KEY_ID}",secretkey:"${AWS_SECRET_KEY}",awsaccesskeyid:"${AWS_ACCESS_KEY_ID}",awssecretaccesskey:"${AWS_SECRET_ACCESS_KEY}"},azure:{azureendpointsuffix:"${AZURE_ENDPOINT_SUFFIX}",azurestorageaccountkey:"${AZURE_STORAGE_ACCOUNT_KEY}",azurestorageaccountname:"${AZURE_STORAGE_ACCOUNT_NAME}"},gcp:{projectid:"${GCP_PROJECT_ID}",email:"${GCP_SERVICE_ACCOUNT_EMAIL}"},eventhub:{eh_shared_access_key_name:"${EVENTHUB_SHARED_ACCESS_KEY_NAME}",eh_shared_access_key:"${EVENTHUB_SHARED_ACCESS_KEY}"},git:{GITSYNC_USERNAME:"${GIT_USERNAME}",GITSYNC_PASSWORD:"${GIT_PASSWORD_OR_TOKEN}"},redshift:{username:"${REDSHIFT_USERNAME}",password:"${REDSHIFT_PASSWORD}",awsaccesskeyid:"${AWS_ACCESS_KEY_ID}",awssecretaccesskey:"${AWS_SECRET_ACCESS_KEY}"},generic:{custom_key_1:"${CUSTOM_VALUE_1}",custom_key_2:"${CUSTOM_VALUE_2}",api_key:"${API_KEY}",token:"${ACCESS_TOKEN}"}},Wt={description:_zod.z.string({description:"Brief description of the instance-secret's purpose and usage"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the instance secret"}),owner:_zod.z.string({description:"Owner of the instance secret resource"}).optional(),layer:_zod.z.enum(["user","system"],{description:"DataOS layer where the instance secret will be deployed"}).default("user")},Gt=
|
|
34
|
+
`,zt={database:{username:"${DATABASE_USERNAME}",password:"${DATABASE_PASSWORD}"},aws:{accesskeyid:"${AWS_ACCESS_KEY_ID}",secretkey:"${AWS_SECRET_KEY}",awsaccesskeyid:"${AWS_ACCESS_KEY_ID}",awssecretaccesskey:"${AWS_SECRET_ACCESS_KEY}"},azure:{azureendpointsuffix:"${AZURE_ENDPOINT_SUFFIX}",azurestorageaccountkey:"${AZURE_STORAGE_ACCOUNT_KEY}",azurestorageaccountname:"${AZURE_STORAGE_ACCOUNT_NAME}"},gcp:{projectid:"${GCP_PROJECT_ID}",email:"${GCP_SERVICE_ACCOUNT_EMAIL}"},eventhub:{eh_shared_access_key_name:"${EVENTHUB_SHARED_ACCESS_KEY_NAME}",eh_shared_access_key:"${EVENTHUB_SHARED_ACCESS_KEY}"},git:{GITSYNC_USERNAME:"${GIT_USERNAME}",GITSYNC_PASSWORD:"${GIT_PASSWORD_OR_TOKEN}"},redshift:{username:"${REDSHIFT_USERNAME}",password:"${REDSHIFT_PASSWORD}",awsaccesskeyid:"${AWS_ACCESS_KEY_ID}",awssecretaccesskey:"${AWS_SECRET_ACCESS_KEY}"},generic:{custom_key_1:"${CUSTOM_VALUE_1}",custom_key_2:"${CUSTOM_VALUE_2}",api_key:"${API_KEY}",token:"${ACCESS_TOKEN}"}},Wt={description:_zod.z.string({description:"Brief description of the instance-secret's purpose and usage"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the instance secret"}),owner:_zod.z.string({description:"Owner of the instance secret resource"}).optional(),layer:_zod.z.enum(["user","system"],{description:"DataOS layer where the instance secret will be deployed"}).default("user")},Gt=s=>{let{type:l,files:e}=s;switch(l){case"key-value":if(e)return"Error: 'key-value' type should not have 'files' property. Remove the 'files' property for key-value type.";break;case"key-value-properties":if(e&&typeof e=="object"&&!Array.isArray(e)&&(e.hasOwnProperty("truststoreLocation")||e.hasOwnProperty("keystoreLocation")))return"Error: 'key-value-properties' type should have 'files' as a record of file paths (key-value pairs), not certificate-specific properties.";break;case"certificate":if(!e)return"Error: 'certificate' type requires 'files' property with 'truststoreLocation' and 'keystoreLocation'.";if(typeof e!="object"||Array.isArray(e))return"Error: 'certificate' type requires 'files' to be an object with 'truststoreLocation' and 'keystoreLocation'.";if(!e.truststoreLocation||!e.keystoreLocation)return"Error: 'certificate' type requires both 'truststoreLocation' and 'keystoreLocation' in the 'files' object.";break;case"cloud-kernel":if(e)return"Error: 'cloud-kernel' type should not have 'files' property. Remove the 'files' property for cloud-kernel type.";break;default:return`Error: Invalid instance secret type '${l}'. Supported types are: key-value, key-value-properties, certificate, cloud-kernel.`}return null},Qt={name:_zod.z.string({description:"Instance secret name - alphanumeric with hyphens allowed, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9])"}),version:_zod.z.enum(["v1"],{description:"Manifest version for instance secret"}).default("v1"),type:_zod.z.literal("instance-secret",{description:"Resource type - must be 'instance-secret'"}),system:_zod.z.enum(["aws","azure","gcp","eventhub","git","redshift","database","generic"]),instanceSecret:_zod.z.object({type:_zod.z.enum(["key-value","key-value-properties","certificate","cloud-kernel"],{description:"Type of instance secret: 'key-value' stores arbitrary key-value pairs encoded separately in base64, 'key-value-properties' transforms multiple key-value pairs into a single base64 encoded pair, 'certificate' stores TLS certificates and keys, 'cloud-kernel' stores cloud-specific kernel configurations"}),acl:_zod.z.enum(["r","rw"],{description:"Access control level - 'r' for read-only, 'rw' for read-write"}),files:_zod.z.union([_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"For key-value-properties: File paths for credential files (e.g., JSON key files, certificates)"}),_zod.z.object({truststoreLocation:_zod.z.string({description:"File path to the truststore containing trusted certificates"}),keystoreLocation:_zod.z.string({description:"File path to the keystore containing private keys and certificates"})},{description:"For certificate type: File locations for certificate and key storage"})]).optional().describe("Optional files configuration - used for key-value-properties (arbitrary file paths) and certificate types (truststore and keystore locations)")}),secretMetadata:_zod.z.object(Wt,{description:"Common metadata fields for instance secret configuration"}),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Dcouments/project-name/my-secret.yaml"}),fileName:_zod.z.string({description:"File name for the instance secret file. example: my-secret.module.yaml"})},we=s=>{s.tool("create-instance-secrets",Bt,Qt,async({name:l,version:e,type:a,instanceSecret:c,secretMetadata:r,system:v,path:S,fileName:y})=>{let g=Gt(c);if(g)return{content:[{type:"text",text:g}]};let d=_yaml.stringify.call(void 0, {name:l,version:e||"v1",type:a,...r,"instance-secret":{...c,data:zt[v]}});try{let m=S.includes(".yaml")||S.includes(".yml")?S:S.endsWith("/")?S+y:`${S}/${y}`,p=_path2.default.dirname(m);return _fs.existsSync.call(void 0, p)||_fs.mkdirSync.call(void 0, p,{recursive:!0}),_fs.writeFileSync.call(void 0, m,d),{content:[{type:"text",text:`Instance secret created successfully at ${m}`},{type:"text",text:`File content: ${d}`}]}}catch(m){return{content:[{type:"text",text:`Error creating instance secret: ${m}`}]}}})};var Zt=`Scanner Workflow in DataOS is a Stack that functions as a metadata extractor for scheduled or one-time metadata extraction jobs. It enables users to pull information from various data sources and subsequently store this extracted metadata in the Metis DB. These sources include data systems, BI tools, dashboard and visualization tools, databases, data lake systems, messaging services, pipelines, ML-model stores, and many more.
|
|
35
35
|
|
|
36
36
|
Scanner Workflows support comprehensive metadata extraction:
|
|
37
37
|
- **Database Sources**: PostgreSQL, MySQL, Snowflake, BigQuery, Redshift, Oracle, SQL Server
|
|
@@ -47,7 +47,7 @@ Examples:
|
|
|
47
47
|
- Kafka scanner: Extract topic metadata and schemas
|
|
48
48
|
- Data Quality scanner: Extract quality check results and lineage
|
|
49
49
|
- DataOS Resources scanner: Extract metadata from DataOS resources like workflows, depots
|
|
50
|
-
`,J=_zod.z.object({includes:_zod.z.array(_zod.z.string(),{description:"Array of regex patterns to include entities (e.g., ['^customer.*', 'sales_db$'])"}).optional(),excludes:_zod.z.array(_zod.z.string(),{description:"Array of regex patterns to exclude entities (e.g., ['temp.*', '^test_.*'])"}).optional()}).describe("Filter pattern with regex support for precise entity selection"),Xt=_zod.z.object({config:_zod.z.object({type:_zod.z.enum(["Snowflake","BigQuery","Redshift","PostgreSql","MySql","Oracle","MsSql","MariaDB","AzureSql","Kafka","Pulsar","MongoDB","Elasticsearch","OpenSearch"],{description:"Direct connection type to data source"}),hostPort:_zod.z.string({description:"Host and port for connection (e.g., 'localhost:5432', 'cluster.snowflake.com:443')"}).optional(),host:_zod.z.string({description:"Database host address"}).optional(),port:_zod.z.number({description:"Database port number"}).optional(),database:_zod.z.string({description:"Database name to connect to"}).optional(),username:_zod.z.string({description:"Username for authentication"}).optional(),warehouse:_zod.z.string({description:"Snowflake warehouse name"}).optional(),url:_zod.z.string({description:"Connection URL (for Snowflake, MongoDB Atlas, etc.)"}).optional(),project:_zod.z.string({description:"BigQuery project ID"}).optional(),serviceUrl:_zod.z.string({description:"Service URL for Pulsar/Kafka"}).optional(),adminUrl:_zod.z.string({description:"Admin URL for Pulsar"}).optional(),brokers:_zod.z.array(_zod.z.string(),{description:"List of Kafka broker addresses"}).optional(),nodes:_zod.z.array(_zod.z.string(),{description:"List of Elasticsearch/OpenSearch/MongoDB nodes"}).optional()})}).describe("Direct source connection configuration bypassing depot"),er=_zod.z.object({config:_zod.z.object({type:_zod.z.enum(["DatabaseMetadata","BigqueryMetadata","KafkaMetadata","PulsarMetadata","LakehouseMetadata","DashboardMetadata","PipelineMetadata","DataProductMetadata","UserMetadata","DataQualityIndexer","DataProfilingIndexer","QueryUsageIndexer","SodaIndexer"],{description:"Type of metadata source to scan"}),databaseFilterPattern:J.optional(),schemaFilterPattern:J.optional(),tableFilterPattern:J.optional(),projectFilterPattern:J.optional(),datasetFilterPattern:J.optional(),topicFilterPattern:J.optional(),markDeletedTables:_zod.z.boolean({description:"Flag tables as soft-deleted if not present in source system"}).default(!1),markDeletedTablesfromFilterOnly:_zod.z.boolean({description:"Flag tables as deleted only within filtered scope"}).default(!1),markDeletedTopics:_zod.z.boolean({description:"Flag topics as soft-deleted if not present in source system"}).default(!1),includeViews:_zod.z.boolean({description:"Include database views in metadata extraction"}).default(!0),includeTables:_zod.z.boolean({description:"Include database tables in metadata extraction"}).default(!0),includeTags:_zod.z.boolean({description:"Include tags and labels in metadata"}).default(!0),includeOwners:_zod.z.boolean({description:"Include ownership information in metadata"}).default(!0),ingestSampleData:_zod.z.boolean({description:"Ingest sample data from topics/tables for preview"}).default(!1),enableDataProfiler:_zod.z.boolean({description:"Enable data profiling during scan (resource intensive)"}).default(!1),profileSample:_zod.z.number({description:"Percentage of data to sample for profiling (1-100)"}).min(1).max(100).default(100).optional(),enableDebugLog:_zod.z.boolean({description:"Enable debug logging for troubleshooting"}).default(!1),connectionTimeout:_zod.z.number({description:"Connection timeout in seconds"}).default(300).optional(),processProcedures:_zod.z.boolean({description:"Include stored procedures in metadata extraction"}).default(!1),queryLogDuration:_zod.z.number({description:"Duration in days for query usage analysis"}).default(30).optional(),sessionTags:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Session tags to set during connection"}).optional(),sslMode:_zod.z.enum(["disable","require","verify-ca","verify-full"],{description:"SSL mode for database connections"}).optional(),batchSize:_zod.z.number({description:"Batch size for metadata extraction"}).default(1e3).optional()})}).describe("Source configuration for metadata scanning with comprehensive filtering and options"),tr=_zod.z.object({cron:_zod.z.string({description:"Cron expression for scheduling (e.g., '0 2 * * *' for daily at 2 AM, '*/15 * * * *' for every 15 minutes)"}),concurrencyPolicy:_zod.z.enum(["Allow","Forbid","Replace"],{description:"Policy for handling concurrent executions - Allow: allow concurrent runs, Forbid: skip if running, Replace: cancel and restart"}).default("Allow"),endOn:_zod.z.string({description:"End date for scheduled executions in ISO 8601 format (e.g., '2024-12-31T23:59:59Z')"}).optional(),timezone:_zod.z.string({description:"Timezone for schedule execution (e.g., 'UTC', 'Asia/Kolkata', 'America/New_York')"}).default("UTC")}).optional().describe("Schedule configuration for recurring scanner workflows"),rr={name:_zod.z.string({description:"Scanner name - alphanumeric with hyphens, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9])"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the scanner workflow"}),description:_zod.z.string({description:"Human-readable description of the scanner purpose and data sources"}),owner:_zod.z.string({description:"Owner of the scanner resource (DataOS user identifier)"}).optional(),workspace:_zod.z.string({description:"Workspace where the scanner will be deployed"}).default("public")},
|
|
50
|
+
`,J=_zod.z.object({includes:_zod.z.array(_zod.z.string(),{description:"Array of regex patterns to include entities (e.g., ['^customer.*', 'sales_db$'])"}).optional(),excludes:_zod.z.array(_zod.z.string(),{description:"Array of regex patterns to exclude entities (e.g., ['temp.*', '^test_.*'])"}).optional()}).describe("Filter pattern with regex support for precise entity selection"),Xt=_zod.z.object({config:_zod.z.object({type:_zod.z.enum(["Snowflake","BigQuery","Redshift","PostgreSql","MySql","Oracle","MsSql","MariaDB","AzureSql","Kafka","Pulsar","MongoDB","Elasticsearch","OpenSearch"],{description:"Direct connection type to data source"}),hostPort:_zod.z.string({description:"Host and port for connection (e.g., 'localhost:5432', 'cluster.snowflake.com:443')"}).optional(),host:_zod.z.string({description:"Database host address"}).optional(),port:_zod.z.number({description:"Database port number"}).optional(),database:_zod.z.string({description:"Database name to connect to"}).optional(),username:_zod.z.string({description:"Username for authentication"}).optional(),warehouse:_zod.z.string({description:"Snowflake warehouse name"}).optional(),url:_zod.z.string({description:"Connection URL (for Snowflake, MongoDB Atlas, etc.)"}).optional(),project:_zod.z.string({description:"BigQuery project ID"}).optional(),serviceUrl:_zod.z.string({description:"Service URL for Pulsar/Kafka"}).optional(),adminUrl:_zod.z.string({description:"Admin URL for Pulsar"}).optional(),brokers:_zod.z.array(_zod.z.string(),{description:"List of Kafka broker addresses"}).optional(),nodes:_zod.z.array(_zod.z.string(),{description:"List of Elasticsearch/OpenSearch/MongoDB nodes"}).optional()})}).describe("Direct source connection configuration bypassing depot"),er=_zod.z.object({config:_zod.z.object({type:_zod.z.enum(["DatabaseMetadata","BigqueryMetadata","KafkaMetadata","PulsarMetadata","LakehouseMetadata","DashboardMetadata","PipelineMetadata","DataProductMetadata","UserMetadata","DataQualityIndexer","DataProfilingIndexer","QueryUsageIndexer","SodaIndexer"],{description:"Type of metadata source to scan"}),databaseFilterPattern:J.optional(),schemaFilterPattern:J.optional(),tableFilterPattern:J.optional(),projectFilterPattern:J.optional(),datasetFilterPattern:J.optional(),topicFilterPattern:J.optional(),markDeletedTables:_zod.z.boolean({description:"Flag tables as soft-deleted if not present in source system"}).default(!1),markDeletedTablesfromFilterOnly:_zod.z.boolean({description:"Flag tables as deleted only within filtered scope"}).default(!1),markDeletedTopics:_zod.z.boolean({description:"Flag topics as soft-deleted if not present in source system"}).default(!1),includeViews:_zod.z.boolean({description:"Include database views in metadata extraction"}).default(!0),includeTables:_zod.z.boolean({description:"Include database tables in metadata extraction"}).default(!0),includeTags:_zod.z.boolean({description:"Include tags and labels in metadata"}).default(!0),includeOwners:_zod.z.boolean({description:"Include ownership information in metadata"}).default(!0),ingestSampleData:_zod.z.boolean({description:"Ingest sample data from topics/tables for preview"}).default(!1),enableDataProfiler:_zod.z.boolean({description:"Enable data profiling during scan (resource intensive)"}).default(!1),profileSample:_zod.z.number({description:"Percentage of data to sample for profiling (1-100)"}).min(1).max(100).default(100).optional(),enableDebugLog:_zod.z.boolean({description:"Enable debug logging for troubleshooting"}).default(!1),connectionTimeout:_zod.z.number({description:"Connection timeout in seconds"}).default(300).optional(),processProcedures:_zod.z.boolean({description:"Include stored procedures in metadata extraction"}).default(!1),queryLogDuration:_zod.z.number({description:"Duration in days for query usage analysis"}).default(30).optional(),sessionTags:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Session tags to set during connection"}).optional(),sslMode:_zod.z.enum(["disable","require","verify-ca","verify-full"],{description:"SSL mode for database connections"}).optional(),batchSize:_zod.z.number({description:"Batch size for metadata extraction"}).default(1e3).optional()})}).describe("Source configuration for metadata scanning with comprehensive filtering and options"),tr=_zod.z.object({cron:_zod.z.string({description:"Cron expression for scheduling (e.g., '0 2 * * *' for daily at 2 AM, '*/15 * * * *' for every 15 minutes)"}),concurrencyPolicy:_zod.z.enum(["Allow","Forbid","Replace"],{description:"Policy for handling concurrent executions - Allow: allow concurrent runs, Forbid: skip if running, Replace: cancel and restart"}).default("Allow"),endOn:_zod.z.string({description:"End date for scheduled executions in ISO 8601 format (e.g., '2024-12-31T23:59:59Z')"}).optional(),timezone:_zod.z.string({description:"Timezone for schedule execution (e.g., 'UTC', 'Asia/Kolkata', 'America/New_York')"}).default("UTC")}).optional().describe("Schedule configuration for recurring scanner workflows"),rr={name:_zod.z.string({description:"Scanner name - alphanumeric with hyphens, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9])"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the scanner workflow"}),description:_zod.z.string({description:"Human-readable description of the scanner purpose and data sources"}),owner:_zod.z.string({description:"Owner of the scanner resource (DataOS user identifier)"}).optional(),workspace:_zod.z.string({description:"Workspace where the scanner will be deployed"}).default("public")},or=s=>{let{method:l,depot:e,source:a,sourceConnection:c}=s;if(l==="depot"){if(!e)throw new Error("Connection method 'depot' requires 'depot' field");if(c)throw new Error("Connection method 'depot' cannot have 'sourceConnection' field");return{method:l,depot:e,source:a}}if(l==="direct"){if(!c)throw new Error("Connection method 'direct' requires 'sourceConnection' field");if(e||a)throw new Error("Connection method 'direct' cannot have 'depot' or 'source' fields");return{method:l,sourceConnection:c}}throw new Error(`Invalid connection method: ${l}. Must be 'depot' or 'direct'`)},ir={...rr,schedule:tr,jobName:_zod.z.string({description:"Name of the scanner job within the workflow DAG"}),jobDescription:_zod.z.string({description:"Description of what this scanner job accomplishes"}).optional(),jobTags:_zod.z.array(_zod.z.string(),{description:"Tags specific to this scanner job"}).optional(),compute:_zod.z.string({description:"Compute resource for the job (default: runnable-default)"}).default("runnable-default"),runAsUser:_zod.z.string({description:"User to run the scanner as - 'metis' for metadata operations, or specific user ID"}).default("metis"),stack:_zod.z.enum(["scanner:2.0"],{description:"Scanner stack version - currently supports 2.0"}).default("scanner:2.0"),connection:_zod.z.object({method:_zod.z.enum(["depot","direct"],{description:"Connection method - 'depot' for depot-based scanning (recommended), 'direct' for direct source connection (advanced)"}),depot:_zod.z.string({description:"Depot name or UDL for the data source (e.g., 'snowflake-depot', 'dataos://icebase')"}).optional(),source:_zod.z.string({description:"Maps depot to metadata source name in Metis - if not specified, uses depot name"}).optional(),sourceConnection:Xt.optional()}),sourceConfig:er,path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-scanner.yaml"}),fileName:_zod.z.string({description:"File name for the scanner workflow file. example: my-scanner.yaml"})},Ee=s=>{s.tool("create-scanner-workflow",Zt,ir,async({name:l,tags:e,description:a,owner:c,workspace:r,schedule:v,jobName:S,jobDescription:y,jobTags:g,compute:d,runAsUser:m,stack:p,connection:w,sourceConfig:f,path:h,fileName:t})=>{try{let n=or(w),b={...f};n.method==="depot"?(b.depot=n.depot,n.source&&(b.source=n.source)):b.sourceConnection=n.sourceConnection;let x=[{name:S,...y&&{description:y},...g&&{tags:g},spec:{stack:p,compute:d,runAsUser:m,stackSpec:b}}],q=_yaml.stringify.call(void 0, {name:l,version:"v1",type:"workflow",tags:e,description:a,...c&&{owner:c},...r&&r!=="public"&&{workspace:r},workflow:{...v&&{schedule:v},dag:x}}),L=h.includes(".yaml")||h.includes(".yml")?h:h.endsWith("/")?h+t:`${h}/${t}`,N=_path2.default.dirname(L);return _fs.existsSync.call(void 0, N)||_fs.mkdirSync.call(void 0, N,{recursive:!0}),_fs.writeFileSync.call(void 0, L,q),{content:[{type:"text",text:`Scanner workflow file created successfully at ${L}`},{type:"text",text:`File content: ${q}`}]}}catch(n){return{content:[{type:"text",text:`Error creating scanner workflow file: ${n}`}]}}})};var pr=`Soda Workflow in DataOS is a declarative tool for scheduled data quality testing within and beyond data pipelines, extending its capabilities, enhancing data observability and reliability across one or more datasets. It enables you to use the Soda Checks Language (SodaCL) to turn user-defined inputs into aggregated SQL queries.
|
|
51
51
|
|
|
52
52
|
Soda supports comprehensive data quality checks across multiple categories:
|
|
53
53
|
- **Accuracy**: Row counts, value ranges, calculations, cross-dataset validation
|
|
@@ -62,10 +62,10 @@ Examples:
|
|
|
62
62
|
- Basic checks: row_count between 100 and 1000, missing_count(email) = 0
|
|
63
63
|
- Advanced checks: values in (city) must exist in cities (name), freshness(updated_at) < 1d
|
|
64
64
|
- Schema validation: required columns, data types, forbidden columns
|
|
65
|
-
`,ue=_zod.z.object({title:_zod.z.string({description:"Human-readable title for the check (e.g., 'Customer ID Completeness Check')"}).optional(),category:_zod.z.enum(["Accuracy","Completeness","Consistency","Freshness","Schema","Uniqueness","Validity"],{description:"Category of the data quality check for organization and reporting"}),description:_zod.z.string({description:"Detailed description of what the check validates"}).optional(),tags:_zod.z.array(_zod.z.string(),{description:"Tags for organizing and filtering checks (e.g., ['critical', 'pii', 'financial'])"}).optional()}).describe("Metadata attributes for organizing and documenting quality checks"),ur=_zod.z.object({limit:_zod.z.number({description:"Maximum number of failed row samples to collect for analysis (default: 100)"}).min(1).max(1e3).default(100)}).optional().describe("Configuration for collecting failed row samples for debugging"),dr=_zod.z.object({"valid format":_zod.z.enum(["email","phone number","credit card","uuid","date","url"],{description:"Built-in format validation (e.g., 'email', 'phone number')"}).optional(),"valid regex":_zod.z.string({description:"Custom regex pattern for validation (e.g., '^[A-Z]{2}[0-9]{4}$' for postal codes)"}).optional(),"valid min":_zod.z.number({description:"Minimum valid value for numeric columns"}).optional(),"valid max":_zod.z.number({description:"Maximum valid value for numeric columns"}).optional(),"valid min length":_zod.z.number({description:"Minimum valid length for string columns"}).optional(),"valid max length":_zod.z.number({description:"Maximum valid length for string columns"}).optional(),"valid values":_zod.z.array(_zod.z.union([_zod.z.string(),_zod.z.number()]),{description:"List of valid values (e.g., ['active', 'inactive', 'pending'])"}).optional()}).describe("Validation rules for validity checks"),xn=_zod.z.object({name:_zod.z.string({description:"Name identifier for the schema check"}).optional(),warn:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if present (e.g., ['temp_*', 'old_*'])"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger warnings if wrong (e.g., {'age': 'integer'})"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger warnings if wrong"}).optional()}).optional(),fail:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if present (sensitive data)"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger failures if wrong"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger failures if wrong"}).optional()}).optional(),attributes:ue}).describe("Schema validation check configuration with warn/fail conditions"),_n=_zod.z.object({name:_zod.z.string({description:"Custom name for the check (overrides default naming)"}).optional(),filter:_zod.z.string({description:`SQL WHERE clause to filter data for this specific check (e.g., 'status = "active"')`}).optional(),attributes:ue}),mr=n=>{let{checkType:p,column:e,operator:s,value:r,compareDataset:l,samples:v,name:w,filter:b,attributes:g,missingValues:d,missingRegex:h,validationRules:c,function:S,percentile:m,lengthType:f,sourceColumn:t,referenceDataset:a,referenceColumn:y,checkPattern:x,warn:C,fail:T,failCondition:N,failQuery:_}=n;if(!p)return"Error: 'checkType' is required for all quality checks.";switch(p){case"row_count":if(!s)return"Error: 'row_count' check requires 'operator' property.";if(r===void 0)return"Error: 'row_count' check requires 'value' property.";if(e||d||h||c||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'row_count' check should only have 'operator', 'value', 'compareDataset', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"missing_count":if(!e)return"Error: 'missing_count' check requires 'column' property.";if(!s)return"Error: 'missing_count' check requires 'operator' property.";if(r===void 0)return"Error: 'missing_count' check requires 'value' property.";if(l||c||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'missing_count' check should only have 'column', 'operator', 'value', 'missingValues', 'missingRegex', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"missing_percent":if(!e)return"Error: 'missing_percent' check requires 'column' property.";if(!s)return"Error: 'missing_percent' check requires 'operator' property.";if(r===void 0)return"Error: 'missing_percent' check requires 'value' property.";if(l||d||h||c||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'missing_percent' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"duplicate_count":if(!e)return"Error: 'duplicate_count' check requires 'column' property.";if(!s)return"Error: 'duplicate_count' check requires 'operator' property.";if(r===void 0)return"Error: 'duplicate_count' check requires 'value' property.";if(l||d||h||c||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'duplicate_count' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"duplicate_percent":if(!e)return"Error: 'duplicate_percent' check requires 'column' property.";if(!s)return"Error: 'duplicate_percent' check requires 'operator' property.";if(r===void 0)return"Error: 'duplicate_percent' check requires 'value' property.";if(l||d||h||c||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'duplicate_percent' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"invalid_count":if(!e)return"Error: 'invalid_count' check requires 'column' property.";if(!s)return"Error: 'invalid_count' check requires 'operator' property.";if(r===void 0)return"Error: 'invalid_count' check requires 'value' property.";if(!c)return"Error: 'invalid_count' check requires 'validationRules' property.";if(l||d||h||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'invalid_count' check should only have 'column', 'operator', 'value', 'validationRules', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"invalid_percent":if(!e)return"Error: 'invalid_percent' check requires 'column' property.";if(!s)return"Error: 'invalid_percent' check requires 'operator' property.";if(r===void 0)return"Error: 'invalid_percent' check requires 'value' property.";if(!c)return"Error: 'invalid_percent' check requires 'validationRules' property.";if(l||d||h||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'invalid_percent' check should only have 'column', 'operator', 'value', 'validationRules', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"freshness":if(!e)return"Error: 'freshness' check requires 'column' property.";if(!s)return"Error: 'freshness' check requires 'operator' property.";if(!r)return"Error: 'freshness' check requires 'value' property.";if(l||d||h||c||S||m||f||t||a||y||x||C||T||N||_)return"Error: 'freshness' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"aggregation":if(!S)return"Error: 'aggregation' check requires 'function' property.";if(!e)return"Error: 'aggregation' check requires 'column' property.";if(!s)return"Error: 'aggregation' check requires 'operator' property.";if(r===void 0)return"Error: 'aggregation' check requires 'value' property.";if(l||d||h||c||f||t||a||y||x||C||T||N||_)return"Error: 'aggregation' check should only have 'function', 'column', 'operator', 'value', 'percentile', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"length":if(!f)return"Error: 'length' check requires 'lengthType' property.";if(!e)return"Error: 'length' check requires 'column' property.";if(!s)return"Error: 'length' check requires 'operator' property.";if(r===void 0)return"Error: 'length' check requires 'value' property.";if(l||d||h||c||S||m||t||a||y||x||C||T||N||_)return"Error: 'length' check should only have 'lengthType', 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"reference":if(!t)return"Error: 'reference' check requires 'sourceColumn' property.";if(!a)return"Error: 'reference' check requires 'referenceDataset' property.";if(!y)return"Error: 'reference' check requires 'referenceColumn' property.";if(!x)return"Error: 'reference' check requires 'checkPattern' property.";if(e||s||r||l||d||h||c||S||m||f||C||T||N||_)return"Error: 'reference' check should only have 'sourceColumn', 'referenceDataset', 'referenceColumn', 'checkPattern', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"schema":if(e||s||r||l||d||h||c||S||m||f||t||a||y||x||N||_)return"Error: 'schema' check should only have 'warn', 'fail', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"failed_rows":if(!N&&!_)return"Error: 'failed_rows' check requires either 'failCondition' or 'failQuery' property.";if(e||s||r||l||d||h||c||S||m||f||t||a||y||x||C||T)return"Error: 'failed_rows' check should only have 'failCondition', 'failQuery', 'samples', 'name', 'filter', and 'attributes' properties.";break;default:return`Error: Invalid soda check type '${p}'. Supported types are: row_count, missing_count, missing_percent, duplicate_count, duplicate_percent, invalid_count, invalid_percent, freshness, aggregation, length, reference, schema, failed_rows.`}return null},fr=_zod.z.object({checkType:_zod.z.enum(["row_count","missing_count","missing_percent","duplicate_count","duplicate_percent","invalid_count","invalid_percent","freshness","aggregation","length","reference","schema","failed_rows"],{description:"Type of data quality check to perform"}),name:_zod.z.string({description:"Custom name for the check (overrides default naming)"}).optional(),filter:_zod.z.string({description:`SQL WHERE clause to filter data for this specific check (e.g., 'status = "active"')`}).optional(),attributes:ue,samples:ur,compareDataset:_zod.z.string({description:"Reference dataset for comparison (e.g., 'same as reference_table')"}).optional(),column:_zod.z.string({description:"Column name for checks that operate on specific columns"}).optional(),operator:_zod.z.enum(["=","!=",">",">=","<","<=","between"],{description:"Comparison operator for the check"}).optional(),value:_zod.z.union([_zod.z.number(),_zod.z.string()],{description:"Expected value or range for the check"}).optional(),missingValues:_zod.z.array(_zod.z.union([_zod.z.string(),_zod.z.null()]),{description:"Custom missing value indicators (e.g., ['NA', 'n/a', '', null])"}).optional(),missingRegex:_zod.z.string({description:"Regex pattern to identify missing values"}).optional(),validationRules:dr.optional(),function:_zod.z.enum(["min","max","avg","sum","stddev","variance","percentile"],{description:"Aggregation function to apply"}).optional(),percentile:_zod.z.number({description:"Percentile value (0-1) for percentile function"}).min(0).max(1).optional(),lengthType:_zod.z.enum(["min_length","max_length","avg_length"],{description:"Type of length check to perform"}).optional(),sourceColumn:_zod.z.string({description:"Column in current dataset"}).optional(),referenceDataset:_zod.z.string({description:"Reference dataset name (e.g., 'reference_table')"}).optional(),referenceColumn:_zod.z.string({description:"Column in reference dataset"}).optional(),checkPattern:_zod.z.enum(["must exist in","must not exist in"],{description:"Type of reference validation"}).optional(),warn:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if present (e.g., ['temp_*', 'old_*'])"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger warnings if wrong (e.g., {'age': 'integer'})"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger warnings if wrong"}).optional()}).optional(),fail:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if present (sensitive data)"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger failures if wrong"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger failures if wrong"}).optional()}).optional(),failCondition:_zod.z.string({description:"SQL condition for identifying failed rows (e.g., 'age < 0 OR age > 150')"}).optional(),failQuery:_zod.z.string({description:"Custom SQL query for complex failed row detection"}).optional()}).describe("Comprehensive Soda quality check with typed validation"),gr=_zod.z.object({columns:_zod.z.array(_zod.z.string(),{description:"Column specifications for profiling: exact names, wildcards (*), includes/excludes (e.g., ['customer_id', 'include address_*', 'exclude temp_*', '*'])"})}).optional().describe("Data profiling configuration for statistical analysis"),hr=_zod.z.object({name:_zod.z.string({description:"Identifier name for the filter"}),where:_zod.z.string({description:`SQL WHERE clause for global filtering across all checks (e.g., 'status = "active" AND created_date > "2023-01-01"')`})}).optional().describe("Global filter applied to all checks on this dataset"),yr=_zod.z.object({engine:_zod.z.enum(["minerva","default"],{description:"Query engine: 'minerva' for Trino-based queries, 'default' for native engine"}).default("default"),clusterName:_zod.z.string({description:"Cluster name for Minerva engine (required when engine is 'minerva')"}).optional(),branchName:_zod.z.string({description:"Branch name for Iceberg datasets (defaults to 'main')"}).default("main").optional()}).optional().describe("Engine and execution options for quality checks"),br=_zod.z.object({dataset:_zod.z.string({description:"Dataset specification using DataOS UDL format: dataos://[depot]:[collection]/[dataset] (e.g., 'dataos://icebase:retail/customer')"}),options:yr,filter:hr,profile:gr,checks:_zod.z.array(fr,{description:"List of data quality checks to execute on this dataset"})}).describe("Complete dataset configuration with quality checks and profiling"),vr=_zod.z.object({requests:_zod.z.object({cpu:_zod.z.string({description:"CPU resource request (e.g., '500m', '1', '2000m')"}).default("1000m"),memory:_zod.z.string({description:"Memory resource request (e.g., '512Mi', '1Gi', '250Mi')"}).default("250Mi")}),limits:_zod.z.object({cpu:_zod.z.string({description:"CPU resource limit (e.g., '1', '2', '4000m')"}).optional(),memory:_zod.z.string({description:"Memory resource limit (e.g., '1Gi', '2Gi', '500Mi')"}).optional()}).optional()}).optional().describe("Resource allocation for quality check execution"),wr=_zod.z.object({cron:_zod.z.string({description:"Cron expression for scheduling (e.g., '0 2 * * *' for daily at 2 AM, '0 */6 * * *' for every 6 hours)"}),concurrencyPolicy:_zod.z.enum(["Allow","Forbid","Replace"],{description:"Policy for handling concurrent executions"}).default("Allow"),endOn:_zod.z.string({description:"End date for scheduled executions in ISO 8601 format"}).optional(),timezone:_zod.z.string({description:"Timezone for schedule execution (e.g., 'UTC', 'Asia/Kolkata')"}).default("UTC")}).optional().describe("Schedule configuration for recurring quality checks"),Sr={name:_zod.z.string({description:"Quality workflow name - alphanumeric with hyphens, max 48 chars"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the quality workflow"}),description:_zod.z.string({description:"Human-readable description of the quality checks and purpose"}),owner:_zod.z.string({description:"Owner of the quality workflow resource"}).optional(),workspace:_zod.z.string({description:"Workspace where the quality workflow will be deployed"}).default("public")},Er={...Sr,schedule:wr,jobName:_zod.z.string({description:"Name of the quality job within the workflow"}),jobTitle:_zod.z.string({description:"Human-readable title for the job"}).optional(),jobDescription:_zod.z.string({description:"Description of the quality job"}).optional(),jobTags:_zod.z.array(_zod.z.string(),{description:"Tags specific to this quality job"}).optional(),compute:_zod.z.string({description:"Compute resource for the job"}).default("runnable-default"),runAsUser:_zod.z.string({description:"User to run the quality job as"}).optional(),stack:_zod.z.enum(["soda+python:1.0"],{description:"Soda stack version with Python flavor"}).default("soda+python:1.0"),logLevel:_zod.z.enum(["DEBUG","INFO","WARNING","ERROR"],{description:"Logging level for the quality job"}).default("INFO"),resources:vr,inputs:_zod.z.array(br,{description:"List of datasets and their associated quality checks"}),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-quality.yaml"}),fileName:_zod.z.string({description:"File name for the quality workflow file. example: my-quality.yaml"})},ke=n=>{n.tool("create-quality-workflow",pr,Er,async({name:p,tags:e,description:s,owner:r,workspace:l,schedule:v,jobName:w,jobTitle:b,jobDescription:g,jobTags:d,compute:h,runAsUser:c,stack:S,logLevel:m,resources:f,inputs:t,path:a,fileName:y})=>{for(let T of t)for(let N of T.checks){let _=mr(N);if(_)return{content:[{type:"text",text:_}]}}let x=[{name:w,...b&&{title:b},...g&&{description:g},...d&&{tags:d},spec:{stack:S,compute:h,...c&&{runAsUser:c},...f&&{resources:f},logLevel:m,stackSpec:{inputs:t}}}],C=_yaml.stringify.call(void 0, {name:p,tags:e,type:"workflow",version:"v1",...r&&{owner:r},workspace:l,description:s,...v&&{schedule:v},dag:x});try{let T=a.includes(".yaml")||a.includes(".yml")?a:a.endsWith("/")?a+y:`${a}/${y}`,N=_path2.default.dirname(T);return _fs.existsSync.call(void 0, N)||_fs.mkdirSync.call(void 0, N,{recursive:!0}),_fs.writeFileSync.call(void 0, T,C),{content:[{type:"text",text:`Quality workflow created successfully at ${T}`},{type:"text",text:`File content: ${C}`}]}}catch(T){return{content:[{type:"text",text:`Error creating quality workflow: ${T}`}]}}})};var _https = require('https'); var _https2 = _interopRequireDefault(_https);var xr=4;function K(n,p,e,s=!1,r=0){return new Promise((l,v)=>{let w=_https2.default.request(n,p,b=>{let g="";b.on("data",d=>g+=d),b.on("end",()=>{s&&r<xr&&g.includes("Continue wait")?setTimeout(()=>{K(n,p,e,s,r+1).then(l).catch(v)},1e3):l({status:b.statusCode||500,data:g})})});w.on("error",v),e&&w.write(e),w.end()})}var _dotenv = require('dotenv');_dotenv.config.call(void 0, );var Z="",ae=process.env.SLUG||"bmx";function xe(n,p=","){return Array.isArray(n)?n.filter(e=>e!=null).map(e=>typeof e=="string"?e:JSON.stringify(e)).join(p):""}function W(n){return n===!0||n==="true"||n===1||n==="1"}function ce(n){return n==null?"":typeof n=="string"?n:typeof n=="number"||typeof n=="boolean"?String(n):JSON.stringify(n)}function O(n){let p=ce(n);return p===""?p:p.includes("|")||p.includes(",")||p.includes('"')?`"${p.replace(/"/g,'"')}"`:p}function _e(n){return Array.isArray(n)?n.map(p=>`"${ce(p)}"`).join(","):""}function ie(...n){for(let p of n)if(p!==void 0)return p}function Ae(n,p){let e=n.name||n.slug||p||"",s=n.fullyQualifiedName||e||p||"",r=n.description||"",l=_optionalChain([n, 'access', _2 => _2.owner, 'optionalAccess', _3 => _3.name])||_optionalChain([n, 'access', _4 => _4.owner, 'optionalAccess', _5 => _5.displayName])||xe(n.authors||[],","),v=n.timeZones||n.timezones||[],w=Array.isArray(v)?v:typeof v=="string"?[v]:[],b=W(ie(_optionalChain([n, 'access', _6 => _6.cache, 'optionalAccess', _7 => _7.enabled]),n.cache,_optionalChain([n, 'access', _8 => _8.runtime, 'optionalAccess', _9 => _9.cache, 'optionalAccess', _10 => _10.enabled]),_optionalChain([n, 'access', _11 => _11.runtime, 'optionalAccess', _12 => _12.cache]),n.isCacheEnabled)),g=[];g.push(`${s}(${s})`),r&&g.push(r),l&&g.push(l),w.length&&g.push(w.join(",")),g.push(`Cache: ${b}`),g.push("");let d=Array.isArray(n.views)?n.views:[],h=Array.isArray(n.tables)?n.tables:[];d.length===0&&h.length>0&&(d.push(...h.filter(m=>!m.sql)),h=h.filter(m=>m.sql));let c=(m,f,t)=>{let a=[],y=m.title||m.displayName||m.name||`item_${f}`,x=m.name||m.table||m.view||y,C=m.description||_optionalChain([m, 'access', _13 => _13.meta, 'optionalAccess', _14 => _14.description])||"",T=W(ie(m.isVisible,m.visible)),N=W(m.public),_=m.connectedComponent,B=m.refs||m.references,z=_optionalChain([m, 'access', _15 => _15.meta, 'optionalAccess', _16 => _16.title])||_optionalChain([m, 'access', _17 => _17.metric, 'optionalAccess', _18 => _18.title])||"",G=_optionalChain([m, 'access', _19 => _19.meta, 'optionalAccess', _20 => _20.tags])||m.tags,re=!!(_optionalChain([m, 'access', _21 => _21.meta, 'optionalAccess', _22 => _22.metric])||m.metric);if(a.push(`${f}. ${y}(${x})`),C&&a.push(C),t==="view"){let q=["view"];re&&q.push("metric"),q.push(`visible: ${T}`),q.push(`public: ${N}`),a.push(q.join(", "))}else{let q=["table"];_!==void 0&&q.push(`connectedComponent: ${_}`),q.push(`visible: ${T}`),q.push(`public: ${N}`),a.push(q.join(", "))}if(z&&a.push(z),Array.isArray(G)&&G.length&&a.push(_e(G)),B&&(Array.isArray(B)&&B.length||typeof B=="string")&&a.push(`refs: ${Array.isArray(B)?B.join(","):B}`),t==="table"&&m.sql&&(a.push("SQL:"),a.push("`"+String(m.sql).trim()+"`")),m.joins&&Array.isArray(m.joins)&&m.joins.length){a.push(""),a.push("Joins:");for(let q of m.joins)a.push(`${q.relationship} - ${q.name}`),a.push(" SQL: `"+ce(q.sql).trim()+"`")}a.push(""),a.push("Dimentions"),t==="table"?a.push("name|title|description|type|sql|suggestFilterValues|isVisible|public|primaryKey|aliasMember|isGoverned|refs"):a.push("name|title|description|type|suggestFilterValues|isVisible|public|primaryKey|aliasMember|isGoverned|refs");let Q=Array.isArray(m.dimensions)?m.dimensions:[];for(let q of Q){let M=[O(q.name),O(q.title||q.displayName||""),O(q.description||""),O(q.type||q.dataType||"")];t==="table"&&M.push(O(q.sql||q.expression||"")),M.push(O(W(q.suggestFilterValues)),O(W(ie(q.isVisible,q.visible))),O(W(q.public)),O(W(q.primaryKey)),O(q.aliasMember||(Array.isArray(q.aliasMembers)?q.aliasMembers.join(","):"")),O(W(q.isGoverned)),O(Array.isArray(q.refs)?q.refs.join(","):q.refs||"")),a.push(M.join("|"))}a.push(""),a.push("Measures"),a.push("name|title|description|cumulativeTotal|cumulative|type|aggType|isVisible|public|aliasMember|isGoverned|refs");let V=Array.isArray(m.measures)?m.measures:[];for(let q of V){let M=[O(q.name),O(q.title||q.displayName||""),O(q.description||""),O(W(q.cumulativeTotal)),O(W(q.cumulative)),O(q.type||q.dataType||""),O(q.aggType||q.aggregation||""),O(W(ie(q.isVisible,q.visible))),O(W(q.public)),O(q.aliasMember||(Array.isArray(q.aliasMembers)?q.aliasMembers.join(","):"")),O(W(q.isGoverned)),O(Array.isArray(q.refs)?q.refs.join(","):q.refs||"")];a.push(M.join("|"))}return a},S=1;for(let m of d)g.push(...c(m,S++,"view")),g.push("");for(let m of h)g.push(...c(m,S++,"table")),g.push("");return g.join(`
|
|
66
|
-
`)}async function X({slug:
|
|
67
|
-
`+
|
|
68
|
-
`)}]}}catch (e3){return{content:[{type:"text",text:"Error: API Response is not a JSON"}],isError:!0}}}var
|
|
65
|
+
`,ue=_zod.z.object({title:_zod.z.string({description:"Human-readable title for the check (e.g., 'Customer ID Completeness Check')"}).optional(),category:_zod.z.enum(["Accuracy","Completeness","Consistency","Freshness","Schema","Uniqueness","Validity"],{description:"Category of the data quality check for organization and reporting"}),description:_zod.z.string({description:"Detailed description of what the check validates"}).optional(),tags:_zod.z.array(_zod.z.string(),{description:"Tags for organizing and filtering checks (e.g., ['critical', 'pii', 'financial'])"}).optional()}).describe("Metadata attributes for organizing and documenting quality checks"),ur=_zod.z.object({limit:_zod.z.number({description:"Maximum number of failed row samples to collect for analysis (default: 100)"}).min(1).max(1e3).default(100)}).optional().describe("Configuration for collecting failed row samples for debugging"),dr=_zod.z.object({"valid format":_zod.z.enum(["email","phone number","credit card","uuid","date","url"],{description:"Built-in format validation (e.g., 'email', 'phone number')"}).optional(),"valid regex":_zod.z.string({description:"Custom regex pattern for validation (e.g., '^[A-Z]{2}[0-9]{4}$' for postal codes)"}).optional(),"valid min":_zod.z.number({description:"Minimum valid value for numeric columns"}).optional(),"valid max":_zod.z.number({description:"Maximum valid value for numeric columns"}).optional(),"valid min length":_zod.z.number({description:"Minimum valid length for string columns"}).optional(),"valid max length":_zod.z.number({description:"Maximum valid length for string columns"}).optional(),"valid values":_zod.z.array(_zod.z.union([_zod.z.string(),_zod.z.number()]),{description:"List of valid values (e.g., ['active', 'inactive', 'pending'])"}).optional()}).describe("Validation rules for validity checks"),ks=_zod.z.object({name:_zod.z.string({description:"Name identifier for the schema check"}).optional(),warn:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if present (e.g., ['temp_*', 'old_*'])"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger warnings if wrong (e.g., {'age': 'integer'})"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger warnings if wrong"}).optional()}).optional(),fail:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if present (sensitive data)"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger failures if wrong"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger failures if wrong"}).optional()}).optional(),attributes:ue}).describe("Schema validation check configuration with warn/fail conditions"),xs=_zod.z.object({name:_zod.z.string({description:"Custom name for the check (overrides default naming)"}).optional(),filter:_zod.z.string({description:`SQL WHERE clause to filter data for this specific check (e.g., 'status = "active"')`}).optional(),attributes:ue}),mr=s=>{let{checkType:l,column:e,operator:a,value:c,compareDataset:r,samples:v,name:S,filter:y,attributes:g,missingValues:d,missingRegex:m,validationRules:p,function:w,percentile:f,lengthType:h,sourceColumn:t,referenceDataset:n,referenceColumn:b,checkPattern:x,warn:q,fail:L,failCondition:N,failQuery:_}=s;if(!l)return"Error: 'checkType' is required for all quality checks.";switch(l){case"row_count":if(!a)return"Error: 'row_count' check requires 'operator' property.";if(c===void 0)return"Error: 'row_count' check requires 'value' property.";if(e||d||m||p||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'row_count' check should only have 'operator', 'value', 'compareDataset', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"missing_count":if(!e)return"Error: 'missing_count' check requires 'column' property.";if(!a)return"Error: 'missing_count' check requires 'operator' property.";if(c===void 0)return"Error: 'missing_count' check requires 'value' property.";if(r||p||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'missing_count' check should only have 'column', 'operator', 'value', 'missingValues', 'missingRegex', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"missing_percent":if(!e)return"Error: 'missing_percent' check requires 'column' property.";if(!a)return"Error: 'missing_percent' check requires 'operator' property.";if(c===void 0)return"Error: 'missing_percent' check requires 'value' property.";if(r||d||m||p||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'missing_percent' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"duplicate_count":if(!e)return"Error: 'duplicate_count' check requires 'column' property.";if(!a)return"Error: 'duplicate_count' check requires 'operator' property.";if(c===void 0)return"Error: 'duplicate_count' check requires 'value' property.";if(r||d||m||p||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'duplicate_count' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"duplicate_percent":if(!e)return"Error: 'duplicate_percent' check requires 'column' property.";if(!a)return"Error: 'duplicate_percent' check requires 'operator' property.";if(c===void 0)return"Error: 'duplicate_percent' check requires 'value' property.";if(r||d||m||p||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'duplicate_percent' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"invalid_count":if(!e)return"Error: 'invalid_count' check requires 'column' property.";if(!a)return"Error: 'invalid_count' check requires 'operator' property.";if(c===void 0)return"Error: 'invalid_count' check requires 'value' property.";if(!p)return"Error: 'invalid_count' check requires 'validationRules' property.";if(r||d||m||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'invalid_count' check should only have 'column', 'operator', 'value', 'validationRules', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"invalid_percent":if(!e)return"Error: 'invalid_percent' check requires 'column' property.";if(!a)return"Error: 'invalid_percent' check requires 'operator' property.";if(c===void 0)return"Error: 'invalid_percent' check requires 'value' property.";if(!p)return"Error: 'invalid_percent' check requires 'validationRules' property.";if(r||d||m||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'invalid_percent' check should only have 'column', 'operator', 'value', 'validationRules', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"freshness":if(!e)return"Error: 'freshness' check requires 'column' property.";if(!a)return"Error: 'freshness' check requires 'operator' property.";if(!c)return"Error: 'freshness' check requires 'value' property.";if(r||d||m||p||w||f||h||t||n||b||x||q||L||N||_)return"Error: 'freshness' check should only have 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"aggregation":if(!w)return"Error: 'aggregation' check requires 'function' property.";if(!e)return"Error: 'aggregation' check requires 'column' property.";if(!a)return"Error: 'aggregation' check requires 'operator' property.";if(c===void 0)return"Error: 'aggregation' check requires 'value' property.";if(r||d||m||p||h||t||n||b||x||q||L||N||_)return"Error: 'aggregation' check should only have 'function', 'column', 'operator', 'value', 'percentile', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"length":if(!h)return"Error: 'length' check requires 'lengthType' property.";if(!e)return"Error: 'length' check requires 'column' property.";if(!a)return"Error: 'length' check requires 'operator' property.";if(c===void 0)return"Error: 'length' check requires 'value' property.";if(r||d||m||p||w||f||t||n||b||x||q||L||N||_)return"Error: 'length' check should only have 'lengthType', 'column', 'operator', 'value', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"reference":if(!t)return"Error: 'reference' check requires 'sourceColumn' property.";if(!n)return"Error: 'reference' check requires 'referenceDataset' property.";if(!b)return"Error: 'reference' check requires 'referenceColumn' property.";if(!x)return"Error: 'reference' check requires 'checkPattern' property.";if(e||a||c||r||d||m||p||w||f||h||q||L||N||_)return"Error: 'reference' check should only have 'sourceColumn', 'referenceDataset', 'referenceColumn', 'checkPattern', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"schema":if(e||a||c||r||d||m||p||w||f||h||t||n||b||x||N||_)return"Error: 'schema' check should only have 'warn', 'fail', 'samples', 'name', 'filter', and 'attributes' properties.";break;case"failed_rows":if(!N&&!_)return"Error: 'failed_rows' check requires either 'failCondition' or 'failQuery' property.";if(e||a||c||r||d||m||p||w||f||h||t||n||b||x||q||L)return"Error: 'failed_rows' check should only have 'failCondition', 'failQuery', 'samples', 'name', 'filter', and 'attributes' properties.";break;default:return`Error: Invalid soda check type '${l}'. Supported types are: row_count, missing_count, missing_percent, duplicate_count, duplicate_percent, invalid_count, invalid_percent, freshness, aggregation, length, reference, schema, failed_rows.`}return null},fr=_zod.z.object({checkType:_zod.z.enum(["row_count","missing_count","missing_percent","duplicate_count","duplicate_percent","invalid_count","invalid_percent","freshness","aggregation","length","reference","schema","failed_rows"],{description:"Type of data quality check to perform"}),name:_zod.z.string({description:"Custom name for the check (overrides default naming)"}).optional(),filter:_zod.z.string({description:`SQL WHERE clause to filter data for this specific check (e.g., 'status = "active"')`}).optional(),attributes:ue,samples:ur,compareDataset:_zod.z.string({description:"Reference dataset for comparison (e.g., 'same as reference_table')"}).optional(),column:_zod.z.string({description:"Column name for checks that operate on specific columns"}).optional(),operator:_zod.z.enum(["=","!=",">",">=","<","<=","between"],{description:"Comparison operator for the check"}).optional(),value:_zod.z.union([_zod.z.number(),_zod.z.string()],{description:"Expected value or range for the check"}).optional(),missingValues:_zod.z.array(_zod.z.union([_zod.z.string(),_zod.z.null()]),{description:"Custom missing value indicators (e.g., ['NA', 'n/a', '', null])"}).optional(),missingRegex:_zod.z.string({description:"Regex pattern to identify missing values"}).optional(),validationRules:dr.optional(),function:_zod.z.enum(["min","max","avg","sum","stddev","variance","percentile"],{description:"Aggregation function to apply"}).optional(),percentile:_zod.z.number({description:"Percentile value (0-1) for percentile function"}).min(0).max(1).optional(),lengthType:_zod.z.enum(["min_length","max_length","avg_length"],{description:"Type of length check to perform"}).optional(),sourceColumn:_zod.z.string({description:"Column in current dataset"}).optional(),referenceDataset:_zod.z.string({description:"Reference dataset name (e.g., 'reference_table')"}).optional(),referenceColumn:_zod.z.string({description:"Column in reference dataset"}).optional(),checkPattern:_zod.z.enum(["must exist in","must not exist in"],{description:"Type of reference validation"}).optional(),warn:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger warnings if present (e.g., ['temp_*', 'old_*'])"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger warnings if wrong (e.g., {'age': 'integer'})"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger warnings if wrong"}).optional()}).optional(),fail:_zod.z.object({"when required column missing":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if missing"}).optional(),"when forbidden column present":_zod.z.array(_zod.z.string(),{description:"Columns that should trigger failures if present (sensitive data)"}).optional(),"when wrong column type":_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Expected column types that should trigger failures if wrong"}).optional(),"when wrong column index":_zod.z.record(_zod.z.string(),_zod.z.number(),{description:"Expected column positions that should trigger failures if wrong"}).optional()}).optional(),failCondition:_zod.z.string({description:"SQL condition for identifying failed rows (e.g., 'age < 0 OR age > 150')"}).optional(),failQuery:_zod.z.string({description:"Custom SQL query for complex failed row detection"}).optional()}).describe("Comprehensive Soda quality check with typed validation"),gr=_zod.z.object({columns:_zod.z.array(_zod.z.string(),{description:"Column specifications for profiling: exact names, wildcards (*), includes/excludes (e.g., ['customer_id', 'include address_*', 'exclude temp_*', '*'])"})}).optional().describe("Data profiling configuration for statistical analysis"),hr=_zod.z.object({name:_zod.z.string({description:"Identifier name for the filter"}),where:_zod.z.string({description:`SQL WHERE clause for global filtering across all checks (e.g., 'status = "active" AND created_date > "2023-01-01"')`})}).optional().describe("Global filter applied to all checks on this dataset"),yr=_zod.z.object({engine:_zod.z.enum(["minerva","default"],{description:"Query engine: 'minerva' for Trino-based queries, 'default' for native engine"}).default("default"),clusterName:_zod.z.string({description:"Cluster name for Minerva engine (required when engine is 'minerva')"}).optional(),branchName:_zod.z.string({description:"Branch name for Iceberg datasets (defaults to 'main')"}).default("main").optional()}).optional().describe("Engine and execution options for quality checks"),br=_zod.z.object({dataset:_zod.z.string({description:"Dataset specification using DataOS UDL format: dataos://[depot]:[collection]/[dataset] (e.g., 'dataos://icebase:retail/customer')"}),options:yr,filter:hr,profile:gr,checks:_zod.z.array(fr,{description:"List of data quality checks to execute on this dataset"})}).describe("Complete dataset configuration with quality checks and profiling"),vr=_zod.z.object({requests:_zod.z.object({cpu:_zod.z.string({description:"CPU resource request (e.g., '500m', '1', '2000m')"}).default("1000m"),memory:_zod.z.string({description:"Memory resource request (e.g., '512Mi', '1Gi', '250Mi')"}).default("250Mi")}),limits:_zod.z.object({cpu:_zod.z.string({description:"CPU resource limit (e.g., '1', '2', '4000m')"}).optional(),memory:_zod.z.string({description:"Memory resource limit (e.g., '1Gi', '2Gi', '500Mi')"}).optional()}).optional()}).optional().describe("Resource allocation for quality check execution"),Sr=_zod.z.object({cron:_zod.z.string({description:"Cron expression for scheduling (e.g., '0 2 * * *' for daily at 2 AM, '0 */6 * * *' for every 6 hours)"}),concurrencyPolicy:_zod.z.enum(["Allow","Forbid","Replace"],{description:"Policy for handling concurrent executions"}).default("Allow"),endOn:_zod.z.string({description:"End date for scheduled executions in ISO 8601 format"}).optional(),timezone:_zod.z.string({description:"Timezone for schedule execution (e.g., 'UTC', 'Asia/Kolkata')"}).default("UTC")}).optional().describe("Schedule configuration for recurring quality checks"),wr={name:_zod.z.string({description:"Quality workflow name - alphanumeric with hyphens, max 48 chars"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing the quality workflow"}),description:_zod.z.string({description:"Human-readable description of the quality checks and purpose"}),owner:_zod.z.string({description:"Owner of the quality workflow resource"}).optional(),workspace:_zod.z.string({description:"Workspace where the quality workflow will be deployed"}).default("public")},Er={...wr,schedule:Sr,jobName:_zod.z.string({description:"Name of the quality job within the workflow"}),jobTitle:_zod.z.string({description:"Human-readable title for the job"}).optional(),jobDescription:_zod.z.string({description:"Description of the quality job"}).optional(),jobTags:_zod.z.array(_zod.z.string(),{description:"Tags specific to this quality job"}).optional(),compute:_zod.z.string({description:"Compute resource for the job"}).default("runnable-default"),runAsUser:_zod.z.string({description:"User to run the quality job as"}).optional(),stack:_zod.z.enum(["soda+python:1.0"],{description:"Soda stack version with Python flavor"}).default("soda+python:1.0"),logLevel:_zod.z.enum(["DEBUG","INFO","WARNING","ERROR"],{description:"Logging level for the quality job"}).default("INFO"),resources:vr,inputs:_zod.z.array(br,{description:"List of datasets and their associated quality checks"}),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-quality.yaml"}),fileName:_zod.z.string({description:"File name for the quality workflow file. example: my-quality.yaml"})},ke=s=>{s.tool("create-quality-workflow",pr,Er,async({name:l,tags:e,description:a,owner:c,workspace:r,schedule:v,jobName:S,jobTitle:y,jobDescription:g,jobTags:d,compute:m,runAsUser:p,stack:w,logLevel:f,resources:h,inputs:t,path:n,fileName:b})=>{for(let L of t)for(let N of L.checks){let _=mr(N);if(_)return{content:[{type:"text",text:_}]}}let x=[{name:S,...y&&{title:y},...g&&{description:g},...d&&{tags:d},spec:{stack:w,compute:m,...p&&{runAsUser:p},...h&&{resources:h},logLevel:f,stackSpec:{inputs:t}}}],q=_yaml.stringify.call(void 0, {name:l,tags:e,type:"workflow",version:"v1",...c&&{owner:c},workspace:r,description:a,...v&&{schedule:v},dag:x});try{let L=n.includes(".yaml")||n.includes(".yml")?n:n.endsWith("/")?n+b:`${n}/${b}`,N=_path2.default.dirname(L);return _fs.existsSync.call(void 0, N)||_fs.mkdirSync.call(void 0, N,{recursive:!0}),_fs.writeFileSync.call(void 0, L,q),{content:[{type:"text",text:`Quality workflow created successfully at ${L}`},{type:"text",text:`File content: ${q}`}]}}catch(L){return{content:[{type:"text",text:`Error creating quality workflow: ${L}`}]}}})};var _https = require('https'); var _https2 = _interopRequireDefault(_https);var xr=4;function H(s,l,e,a=!1,c=0){return new Promise((r,v)=>{let S=_https2.default.request(s,l,y=>{let g="";y.on("data",d=>g+=d),y.on("end",()=>{a&&c<xr&&g.includes("Continue wait")?setTimeout(()=>{H(s,l,e,a,c+1).then(r).catch(v)},1e3):r({status:y.statusCode||500,data:g})})});S.on("error",v),e&&S.write(e),S.end()})}var _dotenv = require('dotenv');_dotenv.config.call(void 0, );var Z="",ae=process.env.SLUG||"bmx";function xe(s,l=","){return Array.isArray(s)?s.filter(e=>e!=null).map(e=>typeof e=="string"?e:JSON.stringify(e)).join(l):""}function W(s){return s===!0||s==="true"||s===1||s==="1"}function ce(s){return s==null?"":typeof s=="string"?s:typeof s=="number"||typeof s=="boolean"?String(s):JSON.stringify(s)}function M(s){let l=ce(s);return l===""?l:l.includes("|")||l.includes(",")||l.includes('"')?`"${l.replace(/"/g,'"')}"`:l}function _e(s){return Array.isArray(s)?s.map(l=>`"${ce(l)}"`).join(","):""}function oe(...s){for(let l of s)if(l!==void 0)return l}function Ae(s,l){let e=s.name||s.slug||l||"",a=s.fullyQualifiedName||e||l||"",c=s.description||"",r=_optionalChain([s, 'access', _2 => _2.owner, 'optionalAccess', _3 => _3.name])||_optionalChain([s, 'access', _4 => _4.owner, 'optionalAccess', _5 => _5.displayName])||xe(s.authors||[],","),v=s.timeZones||s.timezones||[],S=Array.isArray(v)?v:typeof v=="string"?[v]:[],y=W(oe(_optionalChain([s, 'access', _6 => _6.cache, 'optionalAccess', _7 => _7.enabled]),s.cache,_optionalChain([s, 'access', _8 => _8.runtime, 'optionalAccess', _9 => _9.cache, 'optionalAccess', _10 => _10.enabled]),_optionalChain([s, 'access', _11 => _11.runtime, 'optionalAccess', _12 => _12.cache]),s.isCacheEnabled)),g=[];g.push(`${a}(${a})`),c&&g.push(c),r&&g.push(r),S.length&&g.push(S.join(",")),g.push(`Cache: ${y}`),g.push("");let d=Array.isArray(s.views)?s.views:[],m=Array.isArray(s.tables)?s.tables:[];d.length===0&&m.length>0&&(d.push(...m.filter(f=>!f.sql)),m=m.filter(f=>f.sql));let p=(f,h,t)=>{let n=[],b=f.title||f.displayName||f.name||`item_${h}`,x=f.name||f.table||f.view||b,q=f.description||_optionalChain([f, 'access', _13 => _13.meta, 'optionalAccess', _14 => _14.description])||"",L=W(oe(f.isVisible,f.visible)),N=W(f.public),_=f.connectedComponent,B=f.refs||f.references,z=_optionalChain([f, 'access', _15 => _15.meta, 'optionalAccess', _16 => _16.title])||_optionalChain([f, 'access', _17 => _17.metric, 'optionalAccess', _18 => _18.title])||"",G=_optionalChain([f, 'access', _19 => _19.meta, 'optionalAccess', _20 => _20.tags])||f.tags,re=!!(_optionalChain([f, 'access', _21 => _21.meta, 'optionalAccess', _22 => _22.metric])||f.metric);if(n.push(`${h}. ${b}(${x})`),q&&n.push(q),t==="view"){let C=["view"];re&&C.push("metric"),C.push(`visible: ${L}`),C.push(`public: ${N}`),n.push(C.join(", "))}else{let C=["table"];_!==void 0&&C.push(`connectedComponent: ${_}`),C.push(`visible: ${L}`),C.push(`public: ${N}`),n.push(C.join(", "))}if(z&&n.push(z),Array.isArray(G)&&G.length&&n.push(_e(G)),B&&(Array.isArray(B)&&B.length||typeof B=="string")&&n.push(`refs: ${Array.isArray(B)?B.join(","):B}`),t==="table"&&f.sql&&(n.push("SQL:"),n.push("`"+String(f.sql).trim()+"`")),f.joins&&Array.isArray(f.joins)&&f.joins.length){n.push(""),n.push("Joins:");for(let C of f.joins)n.push(`${C.relationship} - ${C.name}`),n.push(" SQL: `"+ce(C.sql).trim()+"`")}n.push(""),n.push("Dimentions"),t==="table"?n.push("name|title|description|type|sql|suggestFilterValues|isVisible|public|primaryKey|aliasMember|isGoverned|refs"):n.push("name|title|description|type|suggestFilterValues|isVisible|public|primaryKey|aliasMember|isGoverned|refs");let Q=Array.isArray(f.dimensions)?f.dimensions:[];for(let C of Q){let O=[M(C.name),M(C.title||C.displayName||""),M(C.description||""),M(C.type||C.dataType||"")];t==="table"&&O.push(M(C.sql||C.expression||"")),O.push(M(W(C.suggestFilterValues)),M(W(oe(C.isVisible,C.visible))),M(W(C.public)),M(W(C.primaryKey)),M(C.aliasMember||(Array.isArray(C.aliasMembers)?C.aliasMembers.join(","):"")),M(W(C.isGoverned)),M(Array.isArray(C.refs)?C.refs.join(","):C.refs||"")),n.push(O.join("|"))}n.push(""),n.push("Measures"),n.push("name|title|description|cumulativeTotal|cumulative|type|aggType|isVisible|public|aliasMember|isGoverned|refs");let V=Array.isArray(f.measures)?f.measures:[];for(let C of V){let O=[M(C.name),M(C.title||C.displayName||""),M(C.description||""),M(W(C.cumulativeTotal)),M(W(C.cumulative)),M(C.type||C.dataType||""),M(C.aggType||C.aggregation||""),M(W(oe(C.isVisible,C.visible))),M(W(C.public)),M(C.aliasMember||(Array.isArray(C.aliasMembers)?C.aliasMembers.join(","):"")),M(W(C.isGoverned)),M(Array.isArray(C.refs)?C.refs.join(","):C.refs||"")];n.push(O.join("|"))}return n},w=1;for(let f of d)g.push(...p(f,w++,"view")),g.push("");for(let f of m)g.push(...p(f,w++,"table")),g.push("");return g.join(`
|
|
66
|
+
`)}async function X({slug:s=ae,apiKey:l,fqdn:e=Z}){let a=`https://${e}/lens2/api/${s.replace("public.","public:")}/v2/meta`,c={method:"GET",headers:{Authorization:`Bearer ${l}`}},{status:r,data:v}=await H(a,c);if(r<200||r>=300)return{content:[{type:"text",text:`Error: API responded with ${r}`}],isError:!0};try{let S=JSON.parse(v);return{content:[{type:"text",text:Ae(S,s)}]}}catch (e2){return{content:[{type:"text",text:"Error: API Response is not a JSON"}],isError:!0}}}async function le({slug:s=ae,payload:l,apiKey:e,fqdn:a=Z}){let c=`https://${a}/lens2/api/${s.replace("public.","public:")}/v2/load`,r=JSON.stringify({query:l}),v={method:"POST",headers:{Authorization:`Bearer ${e}`,"Content-Type":"application/json","Content-Length":Buffer.byteLength(r)}},{status:S,data:y}=await H(c,v,r,!0);if(S<200||S>=300)return{content:[{type:"text",text:JSON.stringify({status:S,data:y})}],isError:!0};try{let g=JSON.parse(y);return{content:[{type:"text",text:JSON.stringify(g.data)}]}}catch(g){return console.log("Error parsing lens data response:",g,y),{content:[{type:"text",text:`Query may be still running. Try again one more time. ${g.message}`}],isError:!0}}}async function ee({apiKey:s,fqdn:l=Z}){let e=`https://${l}/dph/discover/api/msearch/query/v2`,a=JSON.stringify({queries:[{indexUid:"data_product_v2_search_index",q:"*",filter:["deleted='false'","tagList NOT IN ['DPTier.Source Aligned']"],offset:0,limit:1e3,attributesToRetrieve:["description","name","fullyQualifiedName","id","tagList","displayName","ports.lens.entityInfo.fullyQualifiedName","owner","purpose","updatedAt"]}]}),c={method:"POST",headers:{Authorization:`Bearer ${s}`,"Content-Type":"application/json","Content-Length":Buffer.byteLength(a)}},{status:r,data:v}=await H(e,c,a);if(r<200||r>=300)return{content:[{type:"text",text:`Error: API responded with ${r}`}],isError:!0};try{let y=JSON.parse(v).hits.filter(m=>_optionalChain([m, 'access', _23 => _23.ports, 'optionalAccess', _24 => _24.lens])),g=[],d=["Semantic model (lens) - fully qualified name","Data product name","Data product description","Purpose","DPDomain","DPUsecase","Owner","Updated At"];return y.forEach(m=>{let p=_optionalChain([m, 'access', _25 => _25.tagList, 'access', _26 => _26.find, 'call', _27 => _27(n=>n.startsWith("DPDomain.")), 'optionalAccess', _28 => _28.replace, 'call', _29 => _29("DPDomain.","")])||"N/A",w=_optionalChain([m, 'access', _30 => _30.tagList, 'access', _31 => _31.find, 'call', _32 => _32(n=>n.startsWith("DPUsecase.")), 'optionalAccess', _33 => _33.replace, 'call', _34 => _34("DPUsecase.","")])||"N/A",f=_optionalChain([m, 'access', _35 => _35.owner, 'optionalAccess', _36 => _36.displayName])||"N/A",h=m.updatedAt?new Date(m.updatedAt).toDateString():"N/A",t=[m.ports.lens.entityInfo.fullyQualifiedName.replace("dataos.",""),m.displayName,m.description,m.purpose||"N/A",p,w,f,h];g.push(t.join("|"))}),{content:[{type:"text",text:`${d.join("|")}
|
|
67
|
+
`+g.join(`
|
|
68
|
+
`)}]}}catch (e3){return{content:[{type:"text",text:"Error: API Response is not a JSON"}],isError:!0}}}var Ar=`
|
|
69
69
|
Query data from a Lens by sending a "query" object.
|
|
70
70
|
|
|
71
71
|
Build the parameters as follows:
|
|
@@ -117,7 +117,7 @@ Strict rules
|
|
|
117
117
|
- Explore the schema before selecting fields (use the schema tool to copy exact field names)
|
|
118
118
|
|
|
119
119
|
Keywords: query, analytics, business intelligence, metrics, KPIs, lens, data, insights
|
|
120
|
-
`,
|
|
120
|
+
`,Tr=["equals","notEquals","contains","notContains","startsWith","notStartsWith","endsWith","notEndsWith","gt","gte","lt","lte","inDateRange","notInDateRange","beforeDate","afterDate"],Te=_zod.z.object({measures:_zod.z.array(_zod.z.string({description:"Fully-qualified measure name from the schema (e.g., sales.totalRevenue). Provide numbers-based metrics only here."})).optional(),dimensions:_zod.z.array(_zod.z.string({description:"Fully-qualified dimension name from the schema (e.g., date.month, customer.region). Use for grouping or slicing results."})).optional(),filters:_zod.z.array(_zod.z.record(_zod.z.enum(["and","or"]),_zod.z.array(_zod.z.object({member:_zod.z.string(),operator:_zod.z.enum(Tr),values:_zod.z.array(_zod.z.string().date().or(_zod.z.number()).or(_zod.z.string()).or(_zod.z.boolean()).or(_zod.z.null()))}))),{description:"Optional but strongly recommended. Provide an array of groups. Each group is an object with a single key: 'and' or 'or', mapping to an array of filter clauses. Clause shape: { member, operator, values }. Use fully-qualified member names from the schema. For numeric measures, values must be numbers. For date ranges, pass [start, end] as 'YYYY-MM-DD'. Prefer including at least a time window or key categorical filter. Omit this key entirely if no filters are needed. Do not send an empty array."}).nonempty().optional(),limit:_zod.z.number({description:"Max rows to return. Integer. Default 100. Use small limits (5/10/20) for top/bottom questions; larger (500/1000) for comprehensive lists."}).int().default(100),offset:_zod.z.number({description:"Rows to skip for pagination. Integer. Default 0. Increase to fetch subsequent pages."}).int().default(0)}),Le=({server:s,apiKey:l,fqdn:e,slug:a})=>{s.tool("query-lens-data",Ar,a?{query:Te}:{query:Te,slug:_zod.z.string({description:"Name of the lens (semantic model slug). Alphanumeric with hyphen(-)s allowed. Make sure slug string contains workspace notation. example: public.customer-360"})},async({slug:c,query:r})=>le({payload:r,slug:a||c,apiKey:l,fqdn:e}))};var Ce=({server:s,apiKey:l,fqdn:e})=>{s.resource("sematic-models","sematic-models://lenses",async a=>{let c=await ee({apiKey:l,fqdn:e});return{isError:c.isError,contents:c.content.map(({text:r})=>({uri:a.href,text:r}))}})};var Lr=`Discover available semantic models within Data Products. This is always the starting point for data exploration.
|
|
121
121
|
|
|
122
122
|
WHEN TO USE:
|
|
123
123
|
- Begin every new data exploration journey by listing available semantic models
|
|
@@ -132,7 +132,7 @@ WHAT IT RETURNS:
|
|
|
132
132
|
SEMANTIC MODEL CONTEXT:
|
|
133
133
|
Semantic models are business-friendly modeling layers that sit on top of data warehouses/lakehouses. They transform raw data into logical business entities (customers, products, sales, orders) with dimensions, measures, and pre-built business logic.
|
|
134
134
|
|
|
135
|
-
Keywords: list lens, discover data, available models, semantic models, data products, exploration`,
|
|
135
|
+
Keywords: list lens, discover data, available models, semantic models, data products, exploration`,qe=({server:s,apiKey:l,fqdn:e})=>{s.tool("list-lens",Lr,{},async()=>ee({apiKey:l,fqdn:e}))};var qr=`Retrieve the complete metadata and structure of a semantic model. This is essential before querying data.
|
|
136
136
|
|
|
137
137
|
WHEN TO USE:
|
|
138
138
|
- After discovering available semantic models (lenses)
|
|
@@ -168,16 +168,16 @@ The schema includes:
|
|
|
168
168
|
- Provide curated data experiences for specific use cases
|
|
169
169
|
- Can be entity-first (customer_360) or metrics-first (monthly_revenue)
|
|
170
170
|
|
|
171
|
-
Keywords: schema, metadata, data structure, dimensions, measures, segments, views, model structure`,
|
|
171
|
+
Keywords: schema, metadata, data structure, dimensions, measures, segments, views, model structure`,Dr=_zod.z.string({description:"Name of the lens (semantic model slug). Alphanumeric with hyphen(-)s allowed. Make sure slug string contains workspace notation. example: public.customer-360"}),De=({server:s,apiKey:l,fqdn:e,slug:a})=>{s.tool("query-lens-schema",qr,a?{}:{slug:Dr},async({slug:c})=>X({slug:a||c,payload:{},apiKey:l,fqdn:e}))};var je=({server:s,apiKey:l,fqdn:e,slug:a})=>{a?s.resource("sematic-models",`sematic-models://lenses/${a}`,async c=>Re({slug:a.toString(),href:c.href,apiKey:l,fqdn:e,payload:{}})):s.resource("sematic-models",new (0, _mcpjs.ResourceTemplate)("sematic-models://lenses/{slug}",{list:void 0}),async(c,{slug:r})=>Re({slug:r.toString(),href:c.href,apiKey:l,fqdn:e,payload:{}}))};async function Re({slug:s,apiKey:l,fqdn:e,href:a}){let c=await X({slug:s.toString(),payload:{},apiKey:l,fqdn:e});return{isError:c.isError,contents:c.content.map(({text:r})=>({uri:a,text:r}))}}var Ie={name:"@tmdc-solutions/mcp-beta",version:"0.1.6",description:"for dataOS related tasks in local",type:"module",sideEffects:!1,main:"dist/stdio.cjs",types:"dist/stdio.d.ts",scripts:{build:"tsc","build:stdio":"tsup --clean","build:modules":"tsup --clean",http:"node dist/src/streamable-http.js -L",compile:"npm run build && npm run build:css && node dist/src/streamable-http.js -L",watch:'nodemon --watch src,views -e ts,pug,json,css --exec "npm run compile"',"build:css":"npx @tailwindcss/cli -i app.css -o public/output.css",test:'echo "Error: no test specified" && exit 1'},files:["dist/studio.cjs","dist/studio.cjs.map"],bin:{mcp:"./dist/stdio.js"},keywords:[],author:"",license:"ISC",packageManager:"pnpm@10.12.4",dependencies:{"@faker-js/faker":"^9.8.0","@modelcontextprotocol/sdk":"^1.12.0",dotenv:"^16.5.0",express:"^5.1.0",keyv:"^5.3.4",pug:"^3.0.3","trino-client":"^0.2.9",yaml:"^2.8.0",zod:"^3.25.67","zod-to-json-schema":"^3.24.5"},devDependencies:{"@tailwindcss/cli":"^4.1.8","@total-typescript/tsconfig":"^1.0.4","@types/express":"^5.0.2","@types/node":"^22.15.21",daisyui:"^5.0.40",nodemon:"^3.1.10",tailwindcss:"^4.1.8",tsup:"^8.5.0",typescript:"^5.8.3"}};var Mr=`Bundle in DataOS is a Resource that serves as a declarative and standardized mechanism for deploying a collection of Resources, Data Products, or applications in a single operation.
|
|
172
172
|
It empowers data developers with the capability to programmatically orchestrate the deployment, scheduling, creation, and dismantling of code and infrastructure resources linked to these Data Products and applications in a unified manner.
|
|
173
173
|
|
|
174
174
|
A Bundle aggregates various DataOS Resources into a flattened directed acyclic graph (DAG), where each node represents a distinct DataOS Resource, interconnected through dependency relationships.
|
|
175
175
|
Examples: Deploy a complete data pipeline with depot connections, data ingestion workflows, quality checks, and scanners all orchestrated together.
|
|
176
|
-
`,Ne=_zod.z.object({cron:_zod.z.string({description:"Cron expression for scheduling (5 fields: minute hour day-of-month month day-of-week). Examples: '0 9 * * 1' (every Monday at 9 AM), '*/15 * * * *' (every 15 minutes), '0 0 1 * *' (first day of every month)"})}),
|
|
176
|
+
`,Ne=_zod.z.object({cron:_zod.z.string({description:"Cron expression for scheduling (5 fields: minute hour day-of-month month day-of-week). Examples: '0 9 * * 1' (every Monday at 9 AM), '*/15 * * * *' (every 15 minutes), '0 0 1 * *' (first day of every month)"})}),Ur=_zod.z.object({initialState:_zod.z.enum(["create","delete"],{description:"Starting condition of the Bundle Resource. 'create' initiates Bundle creation according to the create schedule, 'delete' triggers Bundle deletion according to the delete schedule"}),timezone:_zod.z.string({description:"Timezone for interpreting the schedule in 'Area/Location' format (e.g., 'Asia/Kolkata', 'America/New_York', 'Europe/Berlin'). Supports daylight savings time rules"}),create:_zod.z.array(Ne,{description:"List of cron schedules for creating the Bundle Resource. Multiple schedules can be specified for different creation times"}).optional(),delete:_zod.z.array(Ne,{description:"List of cron schedules for deleting the Bundle Resource. Used when initialState is 'delete' or for cleanup schedules"}).optional()}),Fr=_zod.z.object({name:_zod.z.string({description:"Workspace name - max 32 chars, pattern: [a-z]([-a-z0-9]*[a-z0-9])? Examples: 'data-platform', 'ml-workspace', 'analytics-dev'"}),description:_zod.z.string({description:"Human-readable description of the workspace purpose and functionality"}).optional(),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing the workspace. Examples: ['dataproduct', 'production', 'analytics']. Note: DataOS adds system tags like 'dataos:type:workspace'"}).optional(),labels:_zod.z.record(_zod.z.string(),_zod.z.string(),{description:"Key-value labels for metadata and organization. Examples: {'environment': 'production', 'team': 'data-engineering'}"}).optional(),layer:_zod.z.enum(["user","system"],{description:"Layer within the User Space - 'user' for user-created workspaces, 'system' for system-managed"})}),Br=_zod.z.object({is:_zod.z.array(_zod.z.string(),{description:"Exact status matches required. Common values: ['active', 'inactive', 'succeeded', 'failed', 'running']"}).optional(),contains:_zod.z.array(_zod.z.string(),{description:"Status contains any of these values (flexible matching). Examples: ['error', 'warning', 'activ'] for partial matches"}).optional()}),zr=_zod.z.object({is:_zod.z.array(_zod.z.string(),{description:"Exact runtime state matches. Common values: ['running', 'succeeded', 'failed', 'pending', 'completed']"}).optional(),contains:_zod.z.array(_zod.z.string(),{description:"Runtime state contains any of these values. Examples: ['run', 'complete'] for partial matches"}).optional()}),Wr=_zod.z.object({resourceId:_zod.z.string({description:"Unique identifier of the resource this resource depends on (must match an 'id' within the Bundle)"}),status:Br.optional(),runtime:zr.optional()}),Gr=_zod.z.object({id:_zod.z.string({description:"Unique identifier for this resource within the Bundle. Used for creating dependencies between resources in the DAG"}),workspace:_zod.z.string({description:"Target workspace for deploying Workspace-level resources. Must be a valid workspace name (max 32 chars, pattern: [a-z]([-a-z0-9]*[a-z0-9])?)"}).optional(),spec:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Inline resource specification - complete DataOS resource manifest (name, version, type, etc.). Alternative to 'file' attribute"}).optional(),file:_zod.z.string({description:"Path to external resource specification file (YAML). Alternative to 'spec' attribute. Examples: './depot.yaml', '/path/to/workflow.yml'"}).optional(),dependencies:_zod.z.array(_zod.z.string(),{description:"List of resource IDs this resource depends on. Creates edges in the DAG. Resources will wait for dependencies to be ready"}).optional(),dependencyConditions:_zod.z.array(Wr,{description:"Advanced dependency conditions with status and runtime checks. If not specified but dependencies exist, defaults to status 'active'"}).optional()}).refine(s=>s.spec||s.file,{message:"Either 'spec' (inline specification) or 'file' (external file path) must be provided",path:["spec"]}),Qr=_zod.z.object({schedule:Ur.optional(),workspaces:_zod.z.array(Fr,{description:"List of new workspaces to create for this Bundle. These workspaces can then be referenced by resources within the Bundle"}).optional(),resources:_zod.z.array(Gr,{description:"List of DataOS resources that make up this Bundle, organized as a directed acyclic graph (DAG) with dependencies"}),properties:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional custom properties for the Bundle in key-value format. Examples: {'team': 'data-platform', 'cost-center': 'engineering'}"}).optional(),manageAsUser:_zod.z.string({description:"UserID to manage this Bundle on behalf of. Grants authority to perform operations as that user"}).optional()}),Vr={name:_zod.z.string({description:"Bundle name - alphanumeric with hyphens, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9])? Examples: 'data-pipeline-bundle', 'ml-training-stack'"}),version:_zod.z.enum(["v1alpha","v1beta"],{description:"Bundle manifest version - determines available features and compatibility"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing the Bundle. Examples: ['dataproduct', 'production', 'analytics', 'ml-pipeline']"}).optional(),description:_zod.z.string({description:"Human-readable description of the Bundle's purpose and contents"}).optional(),owner:_zod.z.string({description:"Owner of the Bundle resource - typically a UserID or team identifier"}).optional(),layer:_zod.z.enum(["user","system"],{description:"Resource layer - 'user' for user-created bundles, 'system' for system-managed"}).default("user"),workspace:_zod.z.string({description:"Default workspace for the Bundle (Instance-level). Note: Individual resources can override this with their own workspace settings"}).optional(),bundle:Qr,path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-bundle.yaml"}),fileName:_zod.z.string({description:"File name for the bundle file. example: my-bundle.yaml"})},$e=s=>{s.tool("create-bundle",Mr,Vr,async({name:l,version:e,tags:a,description:c,owner:r,layer:v,workspace:S,bundle:y,path:g,fileName:d})=>{let m=_yaml.stringify.call(void 0, {name:l,version:e,type:"bundle",...a&&{tags:a},...c&&{description:c},...r&&{owner:r},layer:v,...S&&{workspace:S},bundle:y});try{let p=g.includes(".yaml")||g.includes(".yml")?g:g.endsWith("/")?g+d:`${g}/${d}`,w=_path2.default.dirname(p);return _fs.existsSync.call(void 0, w)||_fs.mkdirSync.call(void 0, w,{recursive:!0}),_fs.writeFileSync.call(void 0, p,m),{content:[{type:"text",text:`Bundle file created successfully at ${p}`},{type:"text",text:`File content: ${m}`}]}}catch(p){return{content:[{type:"text",text:`Error creating bundle file: ${p}`}]}}})};var Xr=`Data Product in DataOS is a curated, reusable, and governed dataset that provides business value and is designed to be consumed by various applications, analytics, and decision-making processes.
|
|
177
177
|
Data Products encapsulate data, metadata, transformation logic, quality checks, and consumption interfaces, making data assets more discoverable, accessible, and trustworthy.
|
|
178
178
|
|
|
179
179
|
Data Products can be versioned (v1alpha, v1beta) and support various inputs, outputs, resources, and consumption ports like Lens (semantic layer) and Talos (APIs).
|
|
180
|
-
`,
|
|
180
|
+
`,ie=_zod.z.object({description:_zod.z.string({description:"Description of the referenced resource or dataset"}).optional(),purpose:_zod.z.string({description:"Purpose of this reference (e.g., 'source', 'ingestion', 'transformation', 'consumption')"}).optional(),refType:_zod.z.enum(["dataos","external"],{description:"Type of reference - 'dataos' for internal DataOS resources, 'external' for external systems"}),ref:_zod.z.string({description:"Reference path (e.g., 'dataset:icebase:sales:customers', 'bundle:v1beta:sales-pipeline', 'dataos://s3:bucket/path')"}),name:_zod.z.string({description:"Name of the resource (for workflow/service references)"}).optional(),version:_zod.z.string({description:"Version of the referenced resource"}).optional(),type:_zod.z.string({description:"Type of resource (e.g., 'workflow', 'service', 'dataset')"}).optional(),workspace:_zod.z.string({description:"Workspace where the resource exists (default: 'public')"}).optional()}),eo=_zod.z.object({ref:_zod.z.string({description:"Lens reference (e.g., 'lens:v1alpha:sales-semantic-model:public')"}),refType:_zod.z.literal("dataos")}),Pe=_zod.z.object({ref:_zod.z.string({description:"Talos service reference (e.g., 'service:v1:sales-api:public')"}),refType:_zod.z.literal("dataos")}),to=_zod.z.object({lens:eo.optional(),talos:_zod.z.union([Pe,_zod.z.array(Pe)]).optional()}).optional(),ro=_zod.z.object({name:_zod.z.string({description:"DataOS user ID of the collaborator"}),description:_zod.z.string({description:"Role of the collaborator (e.g., 'developer', 'consumer', 'reviewer', 'domain expert')"})}),oo=_zod.z.object({title:_zod.z.string({description:"Human-readable title of the data product (e.g., 'Customer 360 Analytics', 'Sales Intelligence Dashboard')"}),sourceCodeUrl:_zod.z.string({description:"URL to source code repository (e.g., GitHub, Bitbucket, GitLab repository URL)"}).optional(),trackerUrl:_zod.z.string({description:"URL to issue tracker or project management tool (e.g., Jira, GitHub Issues)"}).optional()}),io=_zod.z.array(_zod.z.string({description:"Business use cases this data product serves (e.g., 'Customer Segmentation', 'Demand Forecasting', 'Fraud Detection')"})),so={name:_zod.z.string({description:"Data Product name - alphanumeric with hyphens, max 48 chars, pattern: [a-z0-9]([-a-z0-9]*[a-z0-9]) (e.g., 'customer-360-dp', 'sales-analytics')"}),version:_zod.z.enum(["v1alpha","v1beta"],{description:"Data Product version - v1alpha for basic structure, v1beta for enhanced structure with ports"}),entity:_zod.z.literal("product",{description:"DataOS entity type - always 'product' for data products"}),type:_zod.z.literal("data",{description:"Product type - always 'data' for data products"}),tags:_zod.z.array(_zod.z.string(),{description:"Categorization tags (e.g., 'DPDomain.Sales', 'DPTier.Gold', 'DPUsecase.Analytics', 'Readiness.Ready to use')"}),description:_zod.z.string({description:"Comprehensive description of the data product, its purpose, business value, and use cases"}),purpose:_zod.z.string({description:"Specific purpose and business objective this data product serves"}).optional(),owner:_zod.z.string({description:"DataOS user ID of the data product owner (person responsible for the data product)"}),collaborators:_zod.z.array(_zod.z.string(),{description:"List of DataOS user IDs who collaborate on this data product (for v1alpha format)"}).optional(),refs:_zod.z.array(_zod.z.object({title:_zod.z.string({description:"Reference title (e.g., 'Dashboard', 'API Documentation', 'Data Dictionary')"}),href:_zod.z.string({description:"URL to the reference resource"})})).optional()},no=_zod.z.object({data:_zod.z.object({useCases:io.optional(),resources:_zod.z.array(ie).optional(),inputs:_zod.z.array(ie).optional(),outputs:_zod.z.array(ie).optional()})}),ao=_zod.z.object({data:_zod.z.object({meta:oo,collaborators:_zod.z.array(ro).optional(),relatedDataProducts:_zod.z.array(_zod.z.string({description:"References to related data products (format: 'data:version:name', e.g., 'data:v1beta:customer-360')"})).optional(),resource:_zod.z.object({description:_zod.z.string({description:"Description of the main resource (usually a bundle) that implements this data product"}).optional(),purpose:_zod.z.string({description:"Purpose of the resource (e.g., 'ingestion', 'transformation', 'data pipeline')"}).optional(),refType:_zod.z.literal("dataos"),ref:_zod.z.string({description:"Reference to the main bundle or resource (e.g., 'bundle:v1beta:sales-pipeline')"})}),inputs:_zod.z.array(ie).optional(),outputs:_zod.z.array(ie).optional(),ports:to})}),co={...so,v1alpha:no.optional(),v1beta:ao.optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-data-product.yaml"}),fileName:_zod.z.string({description:"File name for the data product file. example: my-data-product.yaml"})},Oe=s=>{s.tool("create-data-product",Xr,co,async l=>{let{version:e,v1alpha:a,v1beta:c,path:r,fileName:v,...S}=l,y={};e==="v1alpha"&&a?y={v1alpha:a}:e==="v1beta"&&c&&(y={v1beta:c});let g={...S,...y},d=_yaml.stringify.call(void 0, g);try{let m=r.includes(".yaml")||r.includes(".yml")?r:r.endsWith("/")?r+v:`${r}/${v}`,p=_path2.default.dirname(m);return _fs.existsSync.call(void 0, p)||_fs.mkdirSync.call(void 0, p,{recursive:!0}),_fs.writeFileSync.call(void 0, m,d),{content:[{type:"text",text:`Data product file created successfully at ${m}`},{type:"text",text:`File content: ${d}`}]}}catch(m){return{content:[{type:"text",text:`Error creating data product file: ${m}`}]}}})};var go=`Lens Manifest is used to deploy Lens resources on DataOS, providing a semantic modeling layer over your data sources. Lens enables business users to access and analyze data through a unified interface while maintaining governance and performance.
|
|
181
181
|
|
|
182
182
|
Lens provides:
|
|
183
183
|
- **Semantic Modeling**: Create logical views of your data with dimensions, measures, and metrics
|
|
@@ -197,20 +197,20 @@ Deployment Features:
|
|
|
197
197
|
- Flexible resource allocation (CPU/Memory)
|
|
198
198
|
- Environment variable configuration
|
|
199
199
|
- Auto-scaling based on CPU/Memory utilization
|
|
200
|
-
- High availability across multiple zones/regions`,yi=_zod.z.object({cpu:_zod.z.string({description:"CPU request in milliCPU (m) or cores (e.g., '100m', '1', '2000m')"}),memory:_zod.z.string({description:"Memory request in Mi/Gi (e.g., '256Mi', '1Gi', '2048Mi')"})}),bi=_zod.z.object({cpu:_zod.z.string({description:"CPU limit in milliCPU (m) or cores (e.g., '2000m', '4', '6000m')"}),memory:_zod.z.string({description:"Memory limit in Mi/Gi (e.g., '2048Mi', '4Gi', '6048Mi')"})}),te=_zod.z.object({requests:yi.optional(),limits:bi.optional()}),pe=_zod.z.enum(["INFO","WARN","DEBUG","ERROR"],{description:"Logging level: INFO (general info), WARN (warnings), DEBUG (detailed debug), ERROR (errors only)"}),de=_zod.z.record(_zod.z.string(),_zod.z.union([_zod.z.string(),_zod.z.number()]),{description:"Environment variables as key-value pairs. Common variables: LENS2_SCHEDULED_REFRESH_TIMEZONES, LENS2_SOURCE_WORKSPACE_NAME, LENS2_DB_TIMEOUT"}),vi=_zod.z.object({name:_zod.z.string({description:"Name of the Instance Secret resource (e.g., 'bitbucket-r', 'github-cred')"}),key:_zod.z.string({description:"Specific key to reference from the secret"}).optional(),keys:_zod.z.array(_zod.z.string(),{description:"List of specific keys to reference from the secret"}).optional(),allKeys:_zod.z.boolean({description:"Whether to use all keys from the secret (default: true for most cases)"}).optional(),consumptionType:_zod.z.enum(["envVars","propFile"],{description:"How the secret should be consumed: 'envVars' (environment variables) or 'propFile' (property file)"}).optional()}),wi=n=>{let{type:p,name:e,catalog:s}=n;switch(p){case"minerva":if(!e)return"Error: 'minerva' type requires 'name' property with Minerva cluster name.";break;case"themis":if(!e)return"Error: 'themis' type requires 'name' property with Themis cluster name.";break;case"depot":if(!e)return"Error: 'depot' type requires 'name' property with Depot resource name.";if(s)return"Error: 'depot' type should not have 'catalog' property.";break;case"flash":if(!e)return"Error: 'flash' type requires 'name' property with Flash service name.";if(s)return"Error: 'flash' type should not have 'catalog' property.";break;default:return`Error: Invalid source configuration type '${p}'. Supported types are: minerva, themis, depot, flash.`}return null},Si=_zod.z.object({type:_zod.z.enum(["minerva","themis","depot","flash"],{description:"Type of data source: 'minerva' for Minerva clusters, 'themis' for Themis clusters, 'depot' for Depot resources, 'flash' for Flash services"}),name:_zod.z.string({description:"Name of the source (cluster name for minerva/themis, resource name for depot, service name for flash)"}),catalog:_zod.z.string({description:"Catalog name for Minerva/Themis (e.g., 'icebase', 'lakehouse')"}).optional()}),Ei=_zod.z.object({url:_zod.z.string({description:"Git repository URL containing the Lens model (e.g., 'https://github.com/org/lens-models', 'https://bitbucket.org/team/project')"}).url(),lensBaseDir:_zod.z.string({description:"Relative path to the Lens model directory in the repository (e.g., 'lens/sales360/model', 'models/customer-analytics')"}),secretId:_zod.z.string({description:"Secret ID for private repository access (alternative to secrets array)"}).optional(),syncFlags:_zod.z.array(_zod.z.string(),{description:"Additional synchronization flags, typically for branch specification (e.g., ['--ref=main', '--ref=develop'])"}).optional()}),ki=_zod.z.object({enabled:_zod.z.boolean({description:"Whether auto-scaling is enabled"}).default(!0),minReplicas:_zod.z.number({description:"Minimum number of replicas for auto-scaling"}).int().positive().default(1),maxReplicas:_zod.z.number({description:"Maximum number of replicas for auto-scaling"}).int().positive(),targetMemoryUtilizationPercentage:_zod.z.number({description:"Target memory utilization percentage (0-100) that triggers scaling"}).int().min(0).max(100).optional(),targetCPUUtilizationPercentage:_zod.z.number({description:"Target CPU utilization percentage (0-100) that triggers scaling"}).int().min(0).max(100).default(80)}),xi=_zod.z.object({level:_zod.z.enum(["hostname","zone","region"],{description:"Distribution level: 'hostname' (separate nodes), 'zone' (multiple zones), 'region' (multiple regions)"}),mode:_zod.z.enum(["preferred","required"],{description:"Distribution mode: 'preferred' (flexible, starts quickly), 'required' (strict, waits for proper distribution)"})}),_i=_zod.z.object({replicas:_zod.z.number({description:"Number of API instance replicas (recommended: 1 instance per 5-10 requests/second)"}).int().positive().optional(),logLevel:pe.optional(),autoScaling:ki.optional(),resources:te.optional(),envs:de.optional()}),Ai=_zod.z.object({replicas:_zod.z.number({description:"Number of Worker replicas for background processing"}).int().positive().optional(),logLevel:pe.optional(),highAvailabilityConfig:xi.optional(),resources:te.optional(),envs:de.optional()}),Li=_zod.z.object({replicas:_zod.z.number({description:"Number of Router instances"}).int().positive().optional(),logLevel:pe.optional(),resources:te.optional(),envs:de.optional()}),Ti=_zod.z.object({compute:_zod.z.string({description:"Compute Resource name to be used by Lens (e.g., 'runnable-default', 'runnable-large')"}),runAsApiKey:_zod.z.string({description:"DataOS API key to run as another user (optional, use dataos-ctl user apikey get to obtain)"}).optional(),runAsUser:_zod.z.string({description:"DataOS User ID to run as (requires proper use-case assignment)"}).optional(),secrets:_zod.z.array(vi,{description:"List of Instance Secret configurations for private repositories and authentication"}).optional(),source:Si,repo:Ei,api:_i.optional(),worker:Ai.optional(),router:Li.optional()}),qi=_zod.z.object({name:_zod.z.string({description:"Lens Resource name - alphanumeric with hyphens, max 48 chars (e.g., 'sales-analytics', 'customer-insights')"}),version:_zod.z.literal("v1alpha",{description:"Lens manifest version (always v1alpha)"}),type:_zod.z.literal("lens",{description:"Resource type (always lens)"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing the Lens resource (e.g., ['analytics', 'sales', 'customer'])"}).optional(),description:_zod.z.string({description:"Human-readable description of the Lens resource"}).optional(),owner:_zod.z.string({description:"Owner of the Lens resource (defaults to user deploying the resource)"}).optional(),layer:_zod.z.enum(["user","system"],{description:"DataOS layer where the resource will be deployed"}).default("user")}),Ci=_zod.z.enum(["basic","production","high_availability","auto_scaling","development","custom"],{description:"Deployment template: 'basic' (minimal setup), 'production' (optimized for prod), 'high_availability' (HA setup), 'auto_scaling' (with auto-scaling), 'development' (dev environment), 'custom' (fully custom)"}),Di={...qi.shape,deploymentTemplate:Ci.default("production"),lens:Ti,templateOverrides:_zod.z.object({apiReplicas:_zod.z.number().min(1).max(20).optional(),workerReplicas:_zod.z.number().min(1).max(10).optional(),enableAutoScaling:_zod.z.boolean().optional(),enableHighAvailability:_zod.z.boolean().optional(),logLevel:pe.optional(),customResources:_zod.z.object({api:te.optional(),worker:te.optional(),router:te.optional()}).optional()}).optional(),includeComments:_zod.z.boolean().default(!0).describe("Whether to include helpful comments in the generated YAML"),workspace:_zod.z.string().optional(),additionalComments:_zod.z.string().optional().describe("Additional custom comments to include in the output"),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-lens.yaml"}),fileName:_zod.z.string({description:"File name for the lens manifest file. example: my-lens.yaml"})},Oe=n=>{n.tool("create-lens-manifest",hi,Di,async({name:p,version:e,type:s,tags:r,description:l,owner:v,layer:w,deploymentTemplate:b,lens:g,templateOverrides:d,workspace:h,path:c,fileName:S})=>{let m=wi(g.source);if(m)return{content:[{type:"text",text:m}]};let t=((T,N)=>{let _={...N};switch(T){case"basic":_.api={replicas:1,logLevel:"INFO",resources:{requests:{cpu:"100m",memory:"256Mi"},limits:{cpu:"1000m",memory:"512Mi"}}};break;case"production":_.api={replicas:3,logLevel:"WARN",autoScaling:{enabled:!0,minReplicas:2,maxReplicas:10,targetCPUUtilizationPercentage:70},resources:{requests:{cpu:"500m",memory:"1Gi"},limits:{cpu:"2000m",memory:"2Gi"}}},_.worker={replicas:2,logLevel:"WARN",resources:{requests:{cpu:"200m",memory:"512Mi"},limits:{cpu:"1000m",memory:"1Gi"}}};break;case"high_availability":_.api={replicas:5,logLevel:"INFO",autoScaling:{enabled:!0,minReplicas:3,maxReplicas:15,targetCPUUtilizationPercentage:60},resources:{requests:{cpu:"500m",memory:"1Gi"},limits:{cpu:"2000m",memory:"2Gi"}}},_.worker={replicas:3,logLevel:"INFO",highAvailabilityConfig:{level:"zone",mode:"preferred"},resources:{requests:{cpu:"300m",memory:"512Mi"},limits:{cpu:"1000m",memory:"1Gi"}}},_.router={replicas:2,logLevel:"INFO",resources:{requests:{cpu:"100m",memory:"256Mi"},limits:{cpu:"500m",memory:"512Mi"}}};break;case"auto_scaling":_.api={replicas:2,logLevel:"INFO",autoScaling:{enabled:!0,minReplicas:1,maxReplicas:20,targetCPUUtilizationPercentage:80,targetMemoryUtilizationPercentage:75},resources:{requests:{cpu:"250m",memory:"512Mi"},limits:{cpu:"1500m",memory:"1Gi"}}},_.worker={replicas:1,logLevel:"INFO",resources:{requests:{cpu:"200m",memory:"256Mi"},limits:{cpu:"800m",memory:"512Mi"}}};break;case"development":_.api={replicas:1,logLevel:"DEBUG",resources:{requests:{cpu:"100m",memory:"256Mi"},limits:{cpu:"500m",memory:"512Mi"}}};break;case"custom":break}return d&&(d.apiReplicas&&(_.api=_.api||{},_.api.replicas=d.apiReplicas),d.workerReplicas&&(_.worker=_.worker||{},_.worker.replicas=d.workerReplicas),d.enableAutoScaling&&(_.api=_.api||{},_.api.autoScaling=_.api.autoScaling||{},_.api.autoScaling.enabled=d.enableAutoScaling),d.enableHighAvailability&&(_.worker=_.worker||{},_.worker.highAvailabilityConfig=_.worker.highAvailabilityConfig||{level:"zone",mode:"preferred"}),d.logLevel&&(_.api&&(_.api.logLevel=d.logLevel),_.worker&&(_.worker.logLevel=d.logLevel),_.router&&(_.router.logLevel=d.logLevel)),d.customResources&&(d.customResources.api&&(_.api=_.api||{},_.api.resources=d.customResources.api),d.customResources.worker&&(_.worker=_.worker||{},_.worker.resources=d.customResources.worker),d.customResources.router&&(_.router=_.router||{},_.router.resources=d.customResources.router))),_})(b,g),a={name:p,version:e,type:s,...r&&{tags:r},...l&&{description:l},...v&&{owner:v},layer:w,lens:t,...h&&{workspace:h}},y=_yaml.stringify.call(void 0, a),C=["# Lens Manifest for DataOS",`# This manifest deploys a Lens semantic layer using the '${b}' template`,"# ","# Lens provides a unified semantic modeling layer over your data sources","# Key components:","# - API: Processes queries and connects to data sources","# - Worker: Manages background tasks and cache invalidation","# - Router: Handles query planning and distribution","# ",`# Source Configuration: ${t.source.type} (${t.source.name})`,`# Repository: ${t.repo.url}`,`# Model Path: ${t.repo.lensBaseDir}`,"# ","# Deployment Details:",`# - Template: ${b}`,...t.api?[`# - API Replicas: ${t.api.replicas||"default"}`]:[],...t.worker?[`# - Worker Replicas: ${t.worker.replicas||"default"}`]:[],...t.router?[`# - Router Replicas: ${t.router.replicas||"default"}`]:[],..._optionalChain([t, 'access', _37 => _37.api, 'optionalAccess', _38 => _38.autoScaling, 'optionalAccess', _39 => _39.enabled])?["# - Auto-scaling: Enabled"]:[],..._optionalChain([t, 'access', _40 => _40.worker, 'optionalAccess', _41 => _41.highAvailabilityConfig])?["# - High Availability: Enabled"]:[],"# ","# For more information: https://dataos.info/resources/lens/",""].join(`
|
|
201
|
-
`)+
|
|
202
|
-
${
|
|
200
|
+
- High availability across multiple zones/regions`,ho=_zod.z.object({cpu:_zod.z.string({description:"CPU request in milliCPU (m) or cores (e.g., '100m', '1', '2000m')"}),memory:_zod.z.string({description:"Memory request in Mi/Gi (e.g., '256Mi', '1Gi', '2048Mi')"})}),yo=_zod.z.object({cpu:_zod.z.string({description:"CPU limit in milliCPU (m) or cores (e.g., '2000m', '4', '6000m')"}),memory:_zod.z.string({description:"Memory limit in Mi/Gi (e.g., '2048Mi', '4Gi', '6048Mi')"})}),te=_zod.z.object({requests:ho.optional(),limits:yo.optional()}),pe=_zod.z.enum(["INFO","WARN","DEBUG","ERROR"],{description:"Logging level: INFO (general info), WARN (warnings), DEBUG (detailed debug), ERROR (errors only)"}),de=_zod.z.record(_zod.z.string(),_zod.z.union([_zod.z.string(),_zod.z.number()]),{description:"Environment variables as key-value pairs. Common variables: LENS2_SCHEDULED_REFRESH_TIMEZONES, LENS2_SOURCE_WORKSPACE_NAME, LENS2_DB_TIMEOUT"}),bo=_zod.z.object({name:_zod.z.string({description:"Name of the Instance Secret resource (e.g., 'bitbucket-r', 'github-cred')"}),key:_zod.z.string({description:"Specific key to reference from the secret"}).optional(),keys:_zod.z.array(_zod.z.string(),{description:"List of specific keys to reference from the secret"}).optional(),allKeys:_zod.z.boolean({description:"Whether to use all keys from the secret (default: true for most cases)"}).optional(),consumptionType:_zod.z.enum(["envVars","propFile"],{description:"How the secret should be consumed: 'envVars' (environment variables) or 'propFile' (property file)"}).optional()}),vo=s=>{let{type:l,name:e,catalog:a}=s;switch(l){case"minerva":if(!e)return"Error: 'minerva' type requires 'name' property with Minerva cluster name.";break;case"themis":if(!e)return"Error: 'themis' type requires 'name' property with Themis cluster name.";break;case"depot":if(!e)return"Error: 'depot' type requires 'name' property with Depot resource name.";if(a)return"Error: 'depot' type should not have 'catalog' property.";break;case"flash":if(!e)return"Error: 'flash' type requires 'name' property with Flash service name.";if(a)return"Error: 'flash' type should not have 'catalog' property.";break;default:return`Error: Invalid source configuration type '${l}'. Supported types are: minerva, themis, depot, flash.`}return null},So=_zod.z.object({type:_zod.z.enum(["minerva","themis","depot","flash"],{description:"Type of data source: 'minerva' for Minerva clusters, 'themis' for Themis clusters, 'depot' for Depot resources, 'flash' for Flash services"}),name:_zod.z.string({description:"Name of the source (cluster name for minerva/themis, resource name for depot, service name for flash)"}),catalog:_zod.z.string({description:"Catalog name for Minerva/Themis (e.g., 'icebase', 'lakehouse')"}).optional()}),wo=_zod.z.object({url:_zod.z.string({description:"Git repository URL containing the Lens model (e.g., 'https://github.com/org/lens-models', 'https://bitbucket.org/team/project')"}).url(),lensBaseDir:_zod.z.string({description:"Relative path to the Lens model directory in the repository (e.g., 'lens/sales360/model', 'models/customer-analytics')"}),secretId:_zod.z.string({description:"Secret ID for private repository access (alternative to secrets array)"}).optional(),syncFlags:_zod.z.array(_zod.z.string(),{description:"Additional synchronization flags, typically for branch specification (e.g., ['--ref=main', '--ref=develop'])"}).optional()}),Eo=_zod.z.object({enabled:_zod.z.boolean({description:"Whether auto-scaling is enabled"}).default(!0),minReplicas:_zod.z.number({description:"Minimum number of replicas for auto-scaling"}).int().positive().default(1),maxReplicas:_zod.z.number({description:"Maximum number of replicas for auto-scaling"}).int().positive(),targetMemoryUtilizationPercentage:_zod.z.number({description:"Target memory utilization percentage (0-100) that triggers scaling"}).int().min(0).max(100).optional(),targetCPUUtilizationPercentage:_zod.z.number({description:"Target CPU utilization percentage (0-100) that triggers scaling"}).int().min(0).max(100).default(80)}),ko=_zod.z.object({level:_zod.z.enum(["hostname","zone","region"],{description:"Distribution level: 'hostname' (separate nodes), 'zone' (multiple zones), 'region' (multiple regions)"}),mode:_zod.z.enum(["preferred","required"],{description:"Distribution mode: 'preferred' (flexible, starts quickly), 'required' (strict, waits for proper distribution)"})}),xo=_zod.z.object({replicas:_zod.z.number({description:"Number of API instance replicas (recommended: 1 instance per 5-10 requests/second)"}).int().positive().optional(),logLevel:pe.optional(),autoScaling:Eo.optional(),resources:te.optional(),envs:de.optional()}),_o=_zod.z.object({replicas:_zod.z.number({description:"Number of Worker replicas for background processing"}).int().positive().optional(),logLevel:pe.optional(),highAvailabilityConfig:ko.optional(),resources:te.optional(),envs:de.optional()}),Ao=_zod.z.object({replicas:_zod.z.number({description:"Number of Router instances"}).int().positive().optional(),logLevel:pe.optional(),resources:te.optional(),envs:de.optional()}),To=_zod.z.object({compute:_zod.z.string({description:"Compute Resource name to be used by Lens (e.g., 'runnable-default', 'runnable-large')"}),runAsApiKey:_zod.z.string({description:"DataOS API key to run as another user (optional, use dataos-ctl user apikey get to obtain)"}).optional(),runAsUser:_zod.z.string({description:"DataOS User ID to run as (requires proper use-case assignment)"}).optional(),secrets:_zod.z.array(bo,{description:"List of Instance Secret configurations for private repositories and authentication"}).optional(),source:So,repo:wo,api:xo.optional(),worker:_o.optional(),router:Ao.optional()}),Lo=_zod.z.object({name:_zod.z.string({description:"Lens Resource name - alphanumeric with hyphens, max 48 chars (e.g., 'sales-analytics', 'customer-insights')"}),version:_zod.z.literal("v1alpha",{description:"Lens manifest version (always v1alpha)"}),type:_zod.z.literal("lens",{description:"Resource type (always lens)"}),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing the Lens resource (e.g., ['analytics', 'sales', 'customer'])"}).optional(),description:_zod.z.string({description:"Human-readable description of the Lens resource"}).optional(),owner:_zod.z.string({description:"Owner of the Lens resource (defaults to user deploying the resource)"}).optional(),layer:_zod.z.enum(["user","system"],{description:"DataOS layer where the resource will be deployed"}).default("user")}),Co=_zod.z.enum(["basic","production","high_availability","auto_scaling","development","custom"],{description:"Deployment template: 'basic' (minimal setup), 'production' (optimized for prod), 'high_availability' (HA setup), 'auto_scaling' (with auto-scaling), 'development' (dev environment), 'custom' (fully custom)"}),qo={...Lo.shape,deploymentTemplate:Co.default("production"),lens:To,templateOverrides:_zod.z.object({apiReplicas:_zod.z.number().min(1).max(20).optional(),workerReplicas:_zod.z.number().min(1).max(10).optional(),enableAutoScaling:_zod.z.boolean().optional(),enableHighAvailability:_zod.z.boolean().optional(),logLevel:pe.optional(),customResources:_zod.z.object({api:te.optional(),worker:te.optional(),router:te.optional()}).optional()}).optional(),includeComments:_zod.z.boolean().default(!0).describe("Whether to include helpful comments in the generated YAML"),workspace:_zod.z.string().optional(),additionalComments:_zod.z.string().optional().describe("Additional custom comments to include in the output"),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-lens.yaml"}),fileName:_zod.z.string({description:"File name for the lens manifest file. example: my-lens.yaml"})},Me=s=>{s.tool("create-lens-manifest",go,qo,async({name:l,version:e,type:a,tags:c,description:r,owner:v,layer:S,deploymentTemplate:y,lens:g,templateOverrides:d,workspace:m,path:p,fileName:w})=>{let f=vo(g.source);if(f)return{content:[{type:"text",text:f}]};let t=((L,N)=>{let _={...N};switch(L){case"basic":_.api={replicas:1,logLevel:"INFO",resources:{requests:{cpu:"100m",memory:"256Mi"},limits:{cpu:"1000m",memory:"512Mi"}}};break;case"production":_.api={replicas:3,logLevel:"WARN",autoScaling:{enabled:!0,minReplicas:2,maxReplicas:10,targetCPUUtilizationPercentage:70},resources:{requests:{cpu:"500m",memory:"1Gi"},limits:{cpu:"2000m",memory:"2Gi"}}},_.worker={replicas:2,logLevel:"WARN",resources:{requests:{cpu:"200m",memory:"512Mi"},limits:{cpu:"1000m",memory:"1Gi"}}};break;case"high_availability":_.api={replicas:5,logLevel:"INFO",autoScaling:{enabled:!0,minReplicas:3,maxReplicas:15,targetCPUUtilizationPercentage:60},resources:{requests:{cpu:"500m",memory:"1Gi"},limits:{cpu:"2000m",memory:"2Gi"}}},_.worker={replicas:3,logLevel:"INFO",highAvailabilityConfig:{level:"zone",mode:"preferred"},resources:{requests:{cpu:"300m",memory:"512Mi"},limits:{cpu:"1000m",memory:"1Gi"}}},_.router={replicas:2,logLevel:"INFO",resources:{requests:{cpu:"100m",memory:"256Mi"},limits:{cpu:"500m",memory:"512Mi"}}};break;case"auto_scaling":_.api={replicas:2,logLevel:"INFO",autoScaling:{enabled:!0,minReplicas:1,maxReplicas:20,targetCPUUtilizationPercentage:80,targetMemoryUtilizationPercentage:75},resources:{requests:{cpu:"250m",memory:"512Mi"},limits:{cpu:"1500m",memory:"1Gi"}}},_.worker={replicas:1,logLevel:"INFO",resources:{requests:{cpu:"200m",memory:"256Mi"},limits:{cpu:"800m",memory:"512Mi"}}};break;case"development":_.api={replicas:1,logLevel:"DEBUG",resources:{requests:{cpu:"100m",memory:"256Mi"},limits:{cpu:"500m",memory:"512Mi"}}};break;case"custom":break}return d&&(d.apiReplicas&&(_.api=_.api||{},_.api.replicas=d.apiReplicas),d.workerReplicas&&(_.worker=_.worker||{},_.worker.replicas=d.workerReplicas),d.enableAutoScaling&&(_.api=_.api||{},_.api.autoScaling=_.api.autoScaling||{},_.api.autoScaling.enabled=d.enableAutoScaling),d.enableHighAvailability&&(_.worker=_.worker||{},_.worker.highAvailabilityConfig=_.worker.highAvailabilityConfig||{level:"zone",mode:"preferred"}),d.logLevel&&(_.api&&(_.api.logLevel=d.logLevel),_.worker&&(_.worker.logLevel=d.logLevel),_.router&&(_.router.logLevel=d.logLevel)),d.customResources&&(d.customResources.api&&(_.api=_.api||{},_.api.resources=d.customResources.api),d.customResources.worker&&(_.worker=_.worker||{},_.worker.resources=d.customResources.worker),d.customResources.router&&(_.router=_.router||{},_.router.resources=d.customResources.router))),_})(y,g),n={name:l,version:e,type:a,...c&&{tags:c},...r&&{description:r},...v&&{owner:v},layer:S,lens:t,...m&&{workspace:m}},b=_yaml.stringify.call(void 0, n),q=["# Lens Manifest for DataOS",`# This manifest deploys a Lens semantic layer using the '${y}' template`,"# ","# Lens provides a unified semantic modeling layer over your data sources","# Key components:","# - API: Processes queries and connects to data sources","# - Worker: Manages background tasks and cache invalidation","# - Router: Handles query planning and distribution","# ",`# Source Configuration: ${t.source.type} (${t.source.name})`,`# Repository: ${t.repo.url}`,`# Model Path: ${t.repo.lensBaseDir}`,"# ","# Deployment Details:",`# - Template: ${y}`,...t.api?[`# - API Replicas: ${t.api.replicas||"default"}`]:[],...t.worker?[`# - Worker Replicas: ${t.worker.replicas||"default"}`]:[],...t.router?[`# - Router Replicas: ${t.router.replicas||"default"}`]:[],..._optionalChain([t, 'access', _37 => _37.api, 'optionalAccess', _38 => _38.autoScaling, 'optionalAccess', _39 => _39.enabled])?["# - Auto-scaling: Enabled"]:[],..._optionalChain([t, 'access', _40 => _40.worker, 'optionalAccess', _41 => _41.highAvailabilityConfig])?["# - High Availability: Enabled"]:[],"# ","# For more information: https://dataos.info/resources/lens/",""].join(`
|
|
201
|
+
`)+b;try{let L=p.includes(".yaml")||p.includes(".yml")?p:p.endsWith("/")?p+w:`${p}/${w}`,N=_path2.default.dirname(L);return _fs.existsSync.call(void 0, N)||_fs.mkdirSync.call(void 0, N,{recursive:!0}),_fs.writeFileSync.call(void 0, L,q),{content:[{type:"text",text:`Lens manifest created successfully at ${L}`},{type:"text",text:`Template: ${y}`},{type:"text",text:`Source: ${t.source.type} (${t.source.name})`},{type:"text",text:`File content:
|
|
202
|
+
${q}`}]}}catch(L){return{content:[{type:"text",text:`Error creating lens manifest: ${L}`}]}}})};var No=`Lens SQL Tool for preparing view models in DataOS Lens semantic layer using SQL selections and filters.
|
|
203
203
|
This tool helps create SQL queries for Lens model views that reference dimensions, measures, and segments from multiple logical tables.
|
|
204
204
|
|
|
205
205
|
Lens operates as a logical modeling layer for accessing tabular data in data warehouses/lakehouses, extending physical tables into logical tables with measures and relationships.
|
|
206
206
|
Examples: Create entity-first views (customer profiles, product analytics) or metrics-first views (conversion rates, revenue tracking) with proper SQL selections and joins.
|
|
207
|
-
`,Fe=_zod.z.enum(["string","number","time","boolean","date","timestamp"],{description:"SQL data type for dimensions and measures in Lens models"})
|
|
208
|
-
`)}function Ue(
|
|
209
|
-
`)}var ze=
|
|
210
|
-
${
|
|
211
|
-
`),V=["","-- Lens Model Integration Notes:",`-- 1. Save this SQL as: sqls/${
|
|
207
|
+
`,Fe=_zod.z.enum(["string","number","time","boolean","date","timestamp"],{description:"SQL data type for dimensions and measures in Lens models"}),$o=_zod.z.enum(["count","count_distinct","count_distinct_approx","sum","avg","min","max","string","number","boolean"],{description:"Measure aggregation type - count/sum/avg for numerical aggregations, string/number/boolean for calculated measures"}),Dn=_zod.z.enum(["one_to_one","one_to_many","many_to_one"],{description:"Join relationship type between tables - one_to_one (1:1), one_to_many (1:N), many_to_one (N:1)"}),Po=_zod.z.enum(["entity_first","metrics_first"],{description:"View modeling approach - entity_first (focus on entity attributes), metrics_first (focus on specific metrics with time dimensions)"}),Oo=_zod.z.object({baseTable:_zod.z.string({description:"Base table name for the join (left side of join). Examples: 'customer', 'sales', 'product'"}),baseColumn:_zod.z.string({description:"Column name in base table for join condition. Examples: 'customer_id', 'product_id', 'order_id'"}),targetTable:_zod.z.string({description:"Target table name for the join (right side of join). Examples: 'orders', 'transactions', 'inventory'"}),targetColumn:_zod.z.string({description:"Column name in target table for join condition. Examples: 'customer_id', 'product_id', 'order_id'"})}),Mo=_zod.z.object({tableName:_zod.z.string({description:"Source table name containing the column. Examples: 'customer', 'sales', 'product'"}),columnName:_zod.z.string({description:"Column name to select from the table. Examples: 'customer_id', 'product_name', 'order_date'"}),alias:_zod.z.string({description:"Optional alias for the column in the view. Examples: 'customer_identifier', 'product_title', 'purchase_date'"}).optional(),dataType:Fe,isPrimaryKey:_zod.z.boolean({description:"Whether this column serves as a primary key for the entity"}).default(!1),isRequired:_zod.z.boolean({description:"Whether this column is required (NOT NULL) in the view"}).default(!0)}),Uo=_zod.z.object({name:_zod.z.string({description:"Name of the calculated field. Examples: 'total_revenue', 'customer_lifetime_value', 'avg_order_value'"}),expression:_zod.z.string({description:"SQL expression for the calculation. Examples: 'SUM(order_amount)', 'COUNT(DISTINCT customer_id)', 'AVG(product_price * quantity)'"}),dataType:Fe,measureType:$o.optional(),description:_zod.z.string({description:"Business description of what this calculated field represents"}).optional()}),Be=_zod.z.object({column:_zod.z.string({description:"Column name for the condition. Examples: 'status', 'created_date', 'region'"}),operator:_zod.z.enum(["=","!=","<>","<","<=",">",">=","IN","NOT IN","LIKE","NOT LIKE","IS NULL","IS NOT NULL","BETWEEN","EXISTS","NOT EXISTS"],{description:"SQL comparison operator for the condition"}),value:_zod.z.union([_zod.z.string(),_zod.z.number(),_zod.z.array(_zod.z.union([_zod.z.string(),_zod.z.number()]))],{description:"Value(s) for the condition. Can be single value, array for IN/NOT IN, or range for BETWEEN"}).optional(),logicalOperator:_zod.z.enum(["AND","OR"],{description:"Logical operator to combine with next condition"}).optional()}),Fo=_zod.z.object({columns:_zod.z.array(_zod.z.string(),{description:"Column names to group by. Examples: ['region', 'product_category'], ['customer_segment'], ['date_trunc(\\'month\\', order_date)']"}),having:_zod.z.array(Be,{description:"HAVING conditions for filtering grouped results (applied after GROUP BY)"}).optional()}),Bo=_zod.z.object({column:_zod.z.string({description:"Column name to order by. Examples: 'total_revenue', 'customer_name', 'order_date'"}),direction:_zod.z.enum(["ASC","DESC"],{description:"Sort direction - ASC (ascending) or DESC (descending)"}).default("ASC")}),zo=_zod.z.object({viewName:_zod.z.string({description:"Name of the Lens view to create. Examples: 'customer_360', 'sales_metrics', 'product_performance'"}),viewType:Po,description:_zod.z.string({description:"Business description of the view's purpose and use case"}),baseTables:_zod.z.array(_zod.z.string(),{description:"Primary tables for the view. Examples: ['customer', 'orders'], ['sales', 'product'], ['transactions']"}),joins:_zod.z.array(Oo,{description:"Join conditions between tables. All joins in Lens are LEFT JOINs by default"}).optional(),dimensions:_zod.z.array(Mo,{description:"Dimensional columns (descriptive attributes) to include in the view. Examples: customer_name, product_category, region"}),measures:_zod.z.array(Uo,{description:"Calculated measures (aggregated values) for the view. Examples: total_sales, customer_count, avg_order_value"}).optional(),whereConditions:_zod.z.array(Be,{description:"WHERE conditions to filter the data. Examples: status = 'active', created_date > '2023-01-01'"}).optional(),groupBy:Fo.optional(),orderBy:_zod.z.array(Bo,{description:"ORDER BY specifications for result sorting"}).optional(),limit:_zod.z.number({description:"Maximum number of rows to return. Examples: 1000, 10000"}).optional(),timeFilter:_zod.z.object({timeColumn:_zod.z.string({description:"Time/date column for filtering. Examples: 'created_date', 'order_timestamp', 'updated_at'"}),granularity:_zod.z.enum(["second","minute","hour","day","week","month","quarter","year"],{description:"Time granularity for grouping time-based data"}).optional(),range:_zod.z.string({description:"Time range filter. Examples: 'last 30 days', 'this month', 'last quarter', '2023-01-01 to 2023-12-31'"}).optional()}).optional(),includeNulls:_zod.z.boolean({description:"Whether to include NULL values in results"}).default(!0),distinct:_zod.z.boolean({description:"Whether to apply DISTINCT to the entire result set"}).default(!1)}),Wo={viewConfig:zo,path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-lens-sql.sql"}),fileName:_zod.z.string({description:"File name for the lens SQL file. example: my-lens-sql.sql"})};function Go(s){return s.map(e=>`LEFT JOIN ${e.targetTable} ON ${e.baseTable}.${e.baseColumn} = ${e.targetTable}.${e.targetColumn}`).join(`
|
|
208
|
+
`)}function Ue(s){return s.length?`WHERE ${s.map((e,a)=>{let c="";switch(a>0&&e.logicalOperator&&(c+=`${e.logicalOperator} `),e.operator){case"IS NULL":case"IS NOT NULL":c+=`${e.column} ${e.operator}`;break;case"IN":case"NOT IN":let v=(Array.isArray(e.value)?e.value:[e.value]).map(y=>typeof y=="string"?`'${y}'`:y).join(", ");c+=`${e.column} ${e.operator} (${v})`;break;case"BETWEEN":Array.isArray(e.value)&&e.value.length===2&&(c+=`${e.column} BETWEEN ${e.value[0]} AND ${e.value[1]}`);break;case"LIKE":case"NOT LIKE":c+=`${e.column} ${e.operator} '${e.value}'`;break;default:let S=typeof e.value=="string"?`'${e.value}'`:e.value;c+=`${e.column} ${e.operator} ${S}`}return c}).join(" ")}`:""}function Qo(s,l){let e=s.map(r=>{let v=`${r.tableName}.${r.columnName}`;return r.alias?`${v} AS ${r.alias}`:v}),a=_optionalChain([l, 'optionalAccess', _42 => _42.map, 'call', _43 => _43(r=>`${r.expression} AS ${r.name}`)])||[];return[...e,...a].join(`,
|
|
209
|
+
`)}var ze=s=>{s.tool("create-lens-sql",No,Wo,async({viewConfig:l,path:e,fileName:a})=>{let{viewName:c,viewType:r,description:v,baseTables:S,joins:y=[],dimensions:g,measures:d=[],whereConditions:m=[],groupBy:p,orderBy:w=[],limit:f,timeFilter:h,includeNulls:t,distinct:n}=l,b=Qo(g,d),x=S[0],q=y.length>0?Go(y):"",L=m.length>0?Ue(m):"",N="";if(p&&(N=`GROUP BY ${p.columns.join(", ")}`,p.having&&p.having.length>0)){let O=Ue(p.having).replace("WHERE","HAVING");N+=`
|
|
210
|
+
${O}`}let _=w.length>0?`ORDER BY ${w.map(O=>`${O.column} ${O.direction}`).join(", ")}`:"",B=f?`LIMIT ${f}`:"",z="";if(h){let O=[];if(h.range){if(h.range.includes("last")&&h.range.includes("days")){let Y=_optionalChain([h, 'access', _44 => _44.range, 'access', _45 => _45.match, 'call', _46 => _46(/\d+/), 'optionalAccess', _47 => _47[0]])||"30";O.push(`${h.timeColumn} >= CURRENT_DATE - INTERVAL '${Y}' DAY`)}else if(h.range.includes("this month"))O.push(`${h.timeColumn} >= DATE_TRUNC('month', CURRENT_DATE)`);else if(h.range.includes("last quarter"))O.push(`${h.timeColumn} >= DATE_TRUNC('quarter', CURRENT_DATE) - INTERVAL '3' MONTH`);else if(h.range.includes(" to ")){let[Y,it]=h.range.split(" to ");O.push(`${h.timeColumn} BETWEEN '${Y.trim()}' AND '${it.trim()}'`)}}O.length>0&&(z=O.join(" AND "))}let G="";L&&z?G=`${L} AND ${z}`:L?G=L:z&&(G=`WHERE ${z}`);let Q=[`-- Lens View: ${c}`,`-- Type: ${r}`,`-- Description: ${v}`,"",`SELECT${n?" DISTINCT":""}`,` ${b}`,`FROM ${x}`,q,G,N,_,B].filter(O=>O.trim()!=="").join(`
|
|
211
|
+
`),V=["","-- Lens Model Integration Notes:",`-- 1. Save this SQL as: sqls/${c}.sql`,`-- 2. Reference in table YAML: sql: {{ load_sql('${c}') }}`,`-- 3. View type: ${r}`,r==="metrics_first"?"-- 4. For metrics-first views, focus on single measure with time dimension":"-- 4. For entity-first views, include comprehensive entity attributes","-- 5. Define dimensions and measures in the table YAML manifest",_optionalChain([h, 'optionalAccess', _48 => _48.granularity])?`-- 6. Time granularity: ${h.granularity}`:"","-- 7. All joins are LEFT JOINs in Lens by default"].filter(O=>O!==""),C=Q+`
|
|
212
212
|
`+V.join(`
|
|
213
|
-
`);try{let
|
|
213
|
+
`);try{let O=e.includes(".sql")?e:e.endsWith("/")?e+a:`${e}/${a}`,Y=_path2.default.dirname(O);return _fs.existsSync.call(void 0, Y)||_fs.mkdirSync.call(void 0, Y,{recursive:!0}),_fs.writeFileSync.call(void 0, O,C),{content:[{type:"text",text:`Lens SQL file created successfully at ${O}`},{type:"text",text:`File content: ${C}`}]}}catch(O){return{content:[{type:"text",text:`Error creating lens SQL file: ${O}`}]}}})};var Jo=`Lens Segments are reusable filters defined in the segment section within table manifests of the semantic model. They allow stakeholders to apply common filtering logic consistently across multiple queries, similar to how a WHERE clause works in SQL.
|
|
214
214
|
|
|
215
215
|
Segments are essential for:
|
|
216
216
|
- **Reusable Filtering**: Apply the same filter conditions across multiple queries and dashboards
|
|
@@ -225,28 +225,28 @@ Key Features:
|
|
|
225
225
|
- Row-level data access control
|
|
226
226
|
- Integration with Lens views and queries
|
|
227
227
|
|
|
228
|
-
Use segments when you notice the same filter conditions being applied repeatedly across multiple queries or dashboards.`,
|
|
228
|
+
Use segments when you notice the same filter conditions being applied repeatedly across multiple queries or dashboards.`,Zo=_zod.z.object({includes:_zod.z.union([_zod.z.literal("*"),_zod.z.array(_zod.z.string(),{description:"List of user groups to include. Use specific group names (e.g., ['data_analyst', 'data_scientist']) or '*' for all users"})],{description:"User groups that should have access to this segment. Use '*' for all users or specify group names"}),excludes:_zod.z.array(_zod.z.string(),{description:"List of user groups to exclude from access (e.g., ['reader', 'guest'])"}).optional()}),Xo=_zod.z.object({user_groups:Zo.describe("User group access control configuration for row-level security")}),ei=_zod.z.object({secure:Xo.optional(),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorizing and organizing segments (e.g., ['geography', 'sales', 'security'])"}).optional(),description:_zod.z.string({description:"Additional metadata description for the segment"}).optional()}),me=_zod.z.string({description:`SQL filter expression using {TABLE} placeholder. Examples:
|
|
229
229
|
- Simple equality: "{TABLE}.state = 'California'"
|
|
230
230
|
- Multiple values with OR: "{TABLE}.state = 'Illinois' OR {TABLE}.state = 'Ohio'"
|
|
231
231
|
- IN operator: "{TABLE}.state IN ('Illinois', 'Ohio', 'California')"
|
|
232
232
|
- LIKE pattern: "{TABLE}.state LIKE '%York%'"
|
|
233
233
|
- Complex AND/OR: "{TABLE}.state = 'Illinois' AND {TABLE}.sales > 1000"
|
|
234
234
|
- Date ranges: "{TABLE}.order_date >= '2023-01-01'"
|
|
235
|
-
- Null checks: "{TABLE}.email IS NOT NULL"`}),Os=_zod.z.object({name:_zod.z.string({description:"Segment name - should be descriptive and follow naming conventions (e.g., 'active_customers', 'high_value_orders', 'california_sales')"}),sql:me,public:_zod.z.boolean({description:"Whether the segment is publicly visible to all users or restricted"}).default(!0),description:_zod.z.string({description:"Human-readable description of what this segment filters (e.g., 'Filters for customers with active status and recent purchases')"}).optional(),meta:to.optional()}),Qe=_zod.z.enum(["geographic_filter","date_range_filter","status_filter","category_filter","value_range_filter","null_check_filter","pattern_match_filter","custom"],{description:"Predefined templates for common segment patterns. Choose 'custom' for fully custom segments"}),ro=n=>{let{template:p,column:e,values:s,operator:r,startDate:l,endDate:v,minValue:w,maxValue:b,checkType:g,pattern:d,sql:h}=n;switch(p){case"geographic_filter":if(!e)return"Error: 'geographic_filter' template requires 'column' property.";if(!s||s.length===0)return"Error: 'geographic_filter' template requires 'values' property with geographic values.";if(l||v||w||b||g||d||h)return"Error: 'geographic_filter' template should only have 'column', 'values', and 'operator' properties.";break;case"date_range_filter":if(!e)return"Error: 'date_range_filter' template requires 'column' property.";if(!l&&!v)return"Error: 'date_range_filter' template requires either 'startDate' or 'endDate' property.";if(s||w||b||g||d||h)return"Error: 'date_range_filter' template should only have 'column', 'startDate', 'endDate', and 'operator' properties.";break;case"status_filter":if(!e)return"Error: 'status_filter' template requires 'column' property.";if(!s||s.length===0)return"Error: 'status_filter' template requires 'values' property with status values.";if(l||v||w||b||g||d||h)return"Error: 'status_filter' template should only have 'column', 'values', and 'operator' properties.";break;case"category_filter":if(!e)return"Error: 'category_filter' template requires 'column' property.";if(!s||s.length===0)return"Error: 'category_filter' template requires 'values' property with category values.";if(l||v||w||b||g||d||h)return"Error: 'category_filter' template should only have 'column', 'values', and 'operator' properties.";break;case"value_range_filter":if(!e)return"Error: 'value_range_filter' template requires 'column' property.";if(!w&&!b)return"Error: 'value_range_filter' template requires either 'minValue' or 'maxValue' property.";if(s||l||v||g||d||h)return"Error: 'value_range_filter' template should only have 'column', 'minValue', 'maxValue', and 'operator' properties.";break;case"null_check_filter":if(!e)return"Error: 'null_check_filter' template requires 'column' property.";if(!g)return"Error: 'null_check_filter' template requires 'checkType' property.";if(s||r||l||v||w||b||d||h)return"Error: 'null_check_filter' template should only have 'column' and 'checkType' properties.";break;case"pattern_match_filter":if(!e)return"Error: 'pattern_match_filter' template requires 'column' property.";if(!d)return"Error: 'pattern_match_filter' template requires 'pattern' property.";if(s||l||v||w||b||g||h)return"Error: 'pattern_match_filter' template should only have 'column', 'pattern', and 'operator' properties.";break;case"custom":if(!h)return"Error: 'custom' template requires 'sql' property with SQL expression.";if(e||s||r||l||v||w||b||g||d)return"Error: 'custom' template should only have 'sql' property.";break;default:return`Error: Invalid template type '${p}'. Supported types are: geographic_filter, date_range_filter, status_filter, category_filter, value_range_filter, null_check_filter, pattern_match_filter, custom.`}return null},io=_zod.z.object({template:Qe,column:_zod.z.string({description:"Column name for the filter"}).optional(),operator:_zod.z.enum(["=","!=",">",">=","<","<=","BETWEEN","IN","NOT IN","LIKE","NOT LIKE","ILIKE"],{description:"SQL operator to use for filtering"}).optional(),values:_zod.z.array(_zod.z.string(),{description:"Values to filter for (geographic, status, or category values)"}).optional(),startDate:_zod.z.string({description:"Start date in YYYY-MM-DD format (e.g., '2023-01-01')"}).optional(),endDate:_zod.z.string({description:"End date in YYYY-MM-DD format (e.g., '2023-12-31')"}).optional(),minValue:_zod.z.number({description:"Minimum value for the range"}).optional(),maxValue:_zod.z.number({description:"Maximum value for the range"}).optional(),checkType:_zod.z.enum(["IS NULL","IS NOT NULL"],{description:"Type of null check to perform"}).optional(),pattern:_zod.z.string({description:"Pattern to match (e.g., '%@gmail.com', 'Mr.%', '%premium%')"}).optional(),sql:me.optional()}),Ge=_zod.z.object({segmentType:_zod.z.enum(["template","custom"],{description:"Whether to use a predefined template or create a custom segment"}).default("custom"),name:_zod.z.string({description:"Segment name - descriptive identifier for the segment"}),description:_zod.z.string({description:"Human-readable description of the segment's purpose"}).optional(),public:_zod.z.boolean({description:"Whether the segment is publicly accessible"}).default(!0),templateConfig:io.optional(),sql:me.optional(),security:_zod.z.object({enabled:_zod.z.boolean({description:"Whether to enable user group-based security for this segment"}).default(!1),includeGroups:_zod.z.union([_zod.z.literal("*"),_zod.z.array(_zod.z.string())],{description:"User groups to include. Use '*' for all users or specify group names"}).optional(),excludeGroups:_zod.z.array(_zod.z.string(),{description:"User groups to exclude from access"}).optional()}).optional(),meta:_zod.z.object({tags:_zod.z.array(_zod.z.string()).optional(),customProperties:_zod.z.record(_zod.z.string(),_zod.z.any()).optional()}).optional()}),oo={configurationType:_zod.z.enum(["single","multiple","bulk_template"],{description:"Configuration type: 'single' for one segment, 'multiple' for several segments, 'bulk_template' for template-based generation"}),segment:Ge.optional(),segments:_zod.z.array(Ge,{description:"Array of segment configurations"}).optional(),bulkTemplate:_zod.z.object({baseTemplate:Qe,variations:_zod.z.array(_zod.z.object({name:_zod.z.string({description:"Name of the segment variation"}),values:_zod.z.array(_zod.z.string(),{description:"Values for this variation"}).optional(),customSql:_zod.z.string({description:"Custom SQL to override template"}).optional()}))}).optional(),outputFormat:_zod.z.enum(["segments_only","full_table_structure"],{description:"Whether to output only segments or include them in a full table structure"}).default("segments_only"),tableName:_zod.z.string({description:"Table name for full table structure output"}).optional(),comments:_zod.z.string({description:"Additional comments or notes about the segment configuration"}).optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-segments.yaml"}),fileName:_zod.z.string({description:"File name for the lens segments file. example: my-segments.yaml"})},Ve=n=>{n.tool("create-lens-segments",Zi,oo,async({configurationType:p,segment:e,segments:s,bulkTemplate:r,outputFormat:l,tableName:v,path:w,fileName:b})=>{let g=[],d=c=>{let S=ro(c);if(S)throw new Error(S);let{template:m,column:f,values:t,operator:a,startDate:y,endDate:x,minValue:C,maxValue:T,checkType:N,pattern:_,sql:B}=c;switch(m){case"geographic_filter":return a==="IN"&&t.length>1?`{TABLE}.${f} IN ('${t.join("', '")}')`:a==="LIKE"?t.map(z=>`{TABLE}.${f} LIKE '%${z}%'`).join(" OR "):`{TABLE}.${f} ${a||"="} '${t[0]}'`;case"date_range_filter":if(y&&x)return a==="BETWEEN"?`{TABLE}.${f} BETWEEN '${y}' AND '${x}'`:`{TABLE}.${f} >= '${y}' AND {TABLE}.${f} <= '${x}'`;if(y)return`{TABLE}.${f} ${a||">="} '${y}'`;if(x)return`{TABLE}.${f} ${a||"<="} '${x}'`;break;case"status_filter":return a==="IN"&&t.length>1?`{TABLE}.${f} IN ('${t.join("', '")}')`:a==="NOT IN"?`{TABLE}.${f} NOT IN ('${t.join("', '")}')`:`{TABLE}.${f} ${a||"="} '${t[0]}'`;case"category_filter":return a==="IN"&&t.length>1?`{TABLE}.${f} IN ('${t.join("', '")}')`:a==="LIKE"?t.map(z=>`{TABLE}.${f} LIKE '%${z}%'`).join(" OR "):`{TABLE}.${f} ${a||"="} '${t[0]}'`;case"value_range_filter":if(C!==void 0&&T!==void 0)return a==="BETWEEN"?`{TABLE}.${f} BETWEEN ${C} AND ${T}`:`{TABLE}.${f} >= ${C} AND {TABLE}.${f} <= ${T}`;if(C!==void 0)return`{TABLE}.${f} ${a||">="} ${C}`;if(T!==void 0)return`{TABLE}.${f} ${a||"<="} ${T}`;break;case"null_check_filter":return`{TABLE}.${f} ${N}`;case"pattern_match_filter":return`{TABLE}.${f} ${a||"LIKE"} '${_}'`;case"custom":return B;default:throw new Error(`Unsupported template: ${m}`)}throw new Error(`Unable to generate SQL for template: ${m}`)},h=c=>{let S={name:c.name,public:c.public!==void 0?c.public:!0};if(c.description&&(S.description=c.description),c.segmentType==="template"&&c.templateConfig)S.sql=d(c.templateConfig);else if(c.sql)S.sql=c.sql;else throw new Error(`Segment '${c.name}' must have either templateConfig or custom sql`);if(_optionalChain([c, 'access', _49 => _49.security, 'optionalAccess', _50 => _50.enabled])){let m={};(c.security.includeGroups||c.security.excludeGroups)&&(m.secure={user_groups:{}},c.security.includeGroups&&(m.secure.user_groups.includes=c.security.includeGroups),c.security.excludeGroups&&(m.secure.user_groups.excludes=c.security.excludeGroups)),_optionalChain([c, 'access', _51 => _51.meta, 'optionalAccess', _52 => _52.tags])&&(m.tags=c.meta.tags),_optionalChain([c, 'access', _53 => _53.meta, 'optionalAccess', _54 => _54.customProperties])&&Object.assign(m,c.meta.customProperties),Object.keys(m).length>0&&(S.meta=m)}return S};try{if(p==="single"){if(!e)throw new Error("Segment configuration is required for single type");g.push(h(e))}else if(p==="multiple"){if(!s||s.length===0)throw new Error("Segments array is required for multiple type");g=s.map(h)}else if(p==="bulk_template"){if(!r)throw new Error("Bulk template configuration is required for bulk_template type");let{baseTemplate:t,variations:a}=r;g=a.map(y=>{let x={template:t};return y.values&&(x.values=y.values),y.customSql&&(x.sql=y.customSql,x.template="custom"),h({name:y.name,segmentType:"template",templateConfig:x,public:!0})})}let c=()=>["# Lens Segments Configuration","# Reusable filters for consistent data access patterns","# ","# Segments provide:","# - Reusable filter logic across multiple queries","# - Row-level security and data governance","# - Consistent business rule application","# - Performance optimization through pre-defined filters","# ",`# Configuration Type: ${p}`,`# Number of Segments: ${g.length}`,"# ","# Usage in Lens models:","# - Reference segments in measures and dimensions","# - Apply as default filters in cubes","# - Use for user group-based data access control","# ","# Segment SQL uses {TABLE} placeholder for table reference","# Example: {TABLE}.status = 'active' AND {TABLE}.created_date >= '2023-01-01'",""].join(`
|
|
236
|
-
`),
|
|
235
|
+
- Null checks: "{TABLE}.email IS NOT NULL"`}),Pn=_zod.z.object({name:_zod.z.string({description:"Segment name - should be descriptive and follow naming conventions (e.g., 'active_customers', 'high_value_orders', 'california_sales')"}),sql:me,public:_zod.z.boolean({description:"Whether the segment is publicly visible to all users or restricted"}).default(!0),description:_zod.z.string({description:"Human-readable description of what this segment filters (e.g., 'Filters for customers with active status and recent purchases')"}).optional(),meta:ei.optional()}),Qe=_zod.z.enum(["geographic_filter","date_range_filter","status_filter","category_filter","value_range_filter","null_check_filter","pattern_match_filter","custom"],{description:"Predefined templates for common segment patterns. Choose 'custom' for fully custom segments"}),ti=s=>{let{template:l,column:e,values:a,operator:c,startDate:r,endDate:v,minValue:S,maxValue:y,checkType:g,pattern:d,sql:m}=s;switch(l){case"geographic_filter":if(!e)return"Error: 'geographic_filter' template requires 'column' property.";if(!a||a.length===0)return"Error: 'geographic_filter' template requires 'values' property with geographic values.";if(r||v||S||y||g||d||m)return"Error: 'geographic_filter' template should only have 'column', 'values', and 'operator' properties.";break;case"date_range_filter":if(!e)return"Error: 'date_range_filter' template requires 'column' property.";if(!r&&!v)return"Error: 'date_range_filter' template requires either 'startDate' or 'endDate' property.";if(a||S||y||g||d||m)return"Error: 'date_range_filter' template should only have 'column', 'startDate', 'endDate', and 'operator' properties.";break;case"status_filter":if(!e)return"Error: 'status_filter' template requires 'column' property.";if(!a||a.length===0)return"Error: 'status_filter' template requires 'values' property with status values.";if(r||v||S||y||g||d||m)return"Error: 'status_filter' template should only have 'column', 'values', and 'operator' properties.";break;case"category_filter":if(!e)return"Error: 'category_filter' template requires 'column' property.";if(!a||a.length===0)return"Error: 'category_filter' template requires 'values' property with category values.";if(r||v||S||y||g||d||m)return"Error: 'category_filter' template should only have 'column', 'values', and 'operator' properties.";break;case"value_range_filter":if(!e)return"Error: 'value_range_filter' template requires 'column' property.";if(!S&&!y)return"Error: 'value_range_filter' template requires either 'minValue' or 'maxValue' property.";if(a||r||v||g||d||m)return"Error: 'value_range_filter' template should only have 'column', 'minValue', 'maxValue', and 'operator' properties.";break;case"null_check_filter":if(!e)return"Error: 'null_check_filter' template requires 'column' property.";if(!g)return"Error: 'null_check_filter' template requires 'checkType' property.";if(a||c||r||v||S||y||d||m)return"Error: 'null_check_filter' template should only have 'column' and 'checkType' properties.";break;case"pattern_match_filter":if(!e)return"Error: 'pattern_match_filter' template requires 'column' property.";if(!d)return"Error: 'pattern_match_filter' template requires 'pattern' property.";if(a||r||v||S||y||g||m)return"Error: 'pattern_match_filter' template should only have 'column', 'pattern', and 'operator' properties.";break;case"custom":if(!m)return"Error: 'custom' template requires 'sql' property with SQL expression.";if(e||a||c||r||v||S||y||g||d)return"Error: 'custom' template should only have 'sql' property.";break;default:return`Error: Invalid template type '${l}'. Supported types are: geographic_filter, date_range_filter, status_filter, category_filter, value_range_filter, null_check_filter, pattern_match_filter, custom.`}return null},ri=_zod.z.object({template:Qe,column:_zod.z.string({description:"Column name for the filter"}).optional(),operator:_zod.z.enum(["=","!=",">",">=","<","<=","BETWEEN","IN","NOT IN","LIKE","NOT LIKE","ILIKE"],{description:"SQL operator to use for filtering"}).optional(),values:_zod.z.array(_zod.z.string(),{description:"Values to filter for (geographic, status, or category values)"}).optional(),startDate:_zod.z.string({description:"Start date in YYYY-MM-DD format (e.g., '2023-01-01')"}).optional(),endDate:_zod.z.string({description:"End date in YYYY-MM-DD format (e.g., '2023-12-31')"}).optional(),minValue:_zod.z.number({description:"Minimum value for the range"}).optional(),maxValue:_zod.z.number({description:"Maximum value for the range"}).optional(),checkType:_zod.z.enum(["IS NULL","IS NOT NULL"],{description:"Type of null check to perform"}).optional(),pattern:_zod.z.string({description:"Pattern to match (e.g., '%@gmail.com', 'Mr.%', '%premium%')"}).optional(),sql:me.optional()}),Ge=_zod.z.object({segmentType:_zod.z.enum(["template","custom"],{description:"Whether to use a predefined template or create a custom segment"}).default("custom"),name:_zod.z.string({description:"Segment name - descriptive identifier for the segment"}),description:_zod.z.string({description:"Human-readable description of the segment's purpose"}).optional(),public:_zod.z.boolean({description:"Whether the segment is publicly accessible"}).default(!0),templateConfig:ri.optional(),sql:me.optional(),security:_zod.z.object({enabled:_zod.z.boolean({description:"Whether to enable user group-based security for this segment"}).default(!1),includeGroups:_zod.z.union([_zod.z.literal("*"),_zod.z.array(_zod.z.string())],{description:"User groups to include. Use '*' for all users or specify group names"}).optional(),excludeGroups:_zod.z.array(_zod.z.string(),{description:"User groups to exclude from access"}).optional()}).optional(),meta:_zod.z.object({tags:_zod.z.array(_zod.z.string()).optional(),customProperties:_zod.z.record(_zod.z.string(),_zod.z.any()).optional()}).optional()}),oi={configurationType:_zod.z.enum(["single","multiple","bulk_template"],{description:"Configuration type: 'single' for one segment, 'multiple' for several segments, 'bulk_template' for template-based generation"}),segment:Ge.optional(),segments:_zod.z.array(Ge,{description:"Array of segment configurations"}).optional(),bulkTemplate:_zod.z.object({baseTemplate:Qe,variations:_zod.z.array(_zod.z.object({name:_zod.z.string({description:"Name of the segment variation"}),values:_zod.z.array(_zod.z.string(),{description:"Values for this variation"}).optional(),customSql:_zod.z.string({description:"Custom SQL to override template"}).optional()}))}).optional(),outputFormat:_zod.z.enum(["segments_only","full_table_structure"],{description:"Whether to output only segments or include them in a full table structure"}).default("segments_only"),tableName:_zod.z.string({description:"Table name for full table structure output"}).optional(),comments:_zod.z.string({description:"Additional comments or notes about the segment configuration"}).optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-segments.yaml"}),fileName:_zod.z.string({description:"File name for the lens segments file. example: my-segments.yaml"})},Ve=s=>{s.tool("create-lens-segments",Jo,oi,async({configurationType:l,segment:e,segments:a,bulkTemplate:c,outputFormat:r,tableName:v,path:S,fileName:y})=>{let g=[],d=p=>{let w=ti(p);if(w)throw new Error(w);let{template:f,column:h,values:t,operator:n,startDate:b,endDate:x,minValue:q,maxValue:L,checkType:N,pattern:_,sql:B}=p;switch(f){case"geographic_filter":return n==="IN"&&t.length>1?`{TABLE}.${h} IN ('${t.join("', '")}')`:n==="LIKE"?t.map(z=>`{TABLE}.${h} LIKE '%${z}%'`).join(" OR "):`{TABLE}.${h} ${n||"="} '${t[0]}'`;case"date_range_filter":if(b&&x)return n==="BETWEEN"?`{TABLE}.${h} BETWEEN '${b}' AND '${x}'`:`{TABLE}.${h} >= '${b}' AND {TABLE}.${h} <= '${x}'`;if(b)return`{TABLE}.${h} ${n||">="} '${b}'`;if(x)return`{TABLE}.${h} ${n||"<="} '${x}'`;break;case"status_filter":return n==="IN"&&t.length>1?`{TABLE}.${h} IN ('${t.join("', '")}')`:n==="NOT IN"?`{TABLE}.${h} NOT IN ('${t.join("', '")}')`:`{TABLE}.${h} ${n||"="} '${t[0]}'`;case"category_filter":return n==="IN"&&t.length>1?`{TABLE}.${h} IN ('${t.join("', '")}')`:n==="LIKE"?t.map(z=>`{TABLE}.${h} LIKE '%${z}%'`).join(" OR "):`{TABLE}.${h} ${n||"="} '${t[0]}'`;case"value_range_filter":if(q!==void 0&&L!==void 0)return n==="BETWEEN"?`{TABLE}.${h} BETWEEN ${q} AND ${L}`:`{TABLE}.${h} >= ${q} AND {TABLE}.${h} <= ${L}`;if(q!==void 0)return`{TABLE}.${h} ${n||">="} ${q}`;if(L!==void 0)return`{TABLE}.${h} ${n||"<="} ${L}`;break;case"null_check_filter":return`{TABLE}.${h} ${N}`;case"pattern_match_filter":return`{TABLE}.${h} ${n||"LIKE"} '${_}'`;case"custom":return B;default:throw new Error(`Unsupported template: ${f}`)}throw new Error(`Unable to generate SQL for template: ${f}`)},m=p=>{let w={name:p.name,public:p.public!==void 0?p.public:!0};if(p.description&&(w.description=p.description),p.segmentType==="template"&&p.templateConfig)w.sql=d(p.templateConfig);else if(p.sql)w.sql=p.sql;else throw new Error(`Segment '${p.name}' must have either templateConfig or custom sql`);if(_optionalChain([p, 'access', _49 => _49.security, 'optionalAccess', _50 => _50.enabled])){let f={};(p.security.includeGroups||p.security.excludeGroups)&&(f.secure={user_groups:{}},p.security.includeGroups&&(f.secure.user_groups.includes=p.security.includeGroups),p.security.excludeGroups&&(f.secure.user_groups.excludes=p.security.excludeGroups)),_optionalChain([p, 'access', _51 => _51.meta, 'optionalAccess', _52 => _52.tags])&&(f.tags=p.meta.tags),_optionalChain([p, 'access', _53 => _53.meta, 'optionalAccess', _54 => _54.customProperties])&&Object.assign(f,p.meta.customProperties),Object.keys(f).length>0&&(w.meta=f)}return w};try{if(l==="single"){if(!e)throw new Error("Segment configuration is required for single type");g.push(m(e))}else if(l==="multiple"){if(!a||a.length===0)throw new Error("Segments array is required for multiple type");g=a.map(m)}else if(l==="bulk_template"){if(!c)throw new Error("Bulk template configuration is required for bulk_template type");let{baseTemplate:t,variations:n}=c;g=n.map(b=>{let x={template:t};return b.values&&(x.values=b.values),b.customSql&&(x.sql=b.customSql,x.template="custom"),m({name:b.name,segmentType:"template",templateConfig:x,public:!0})})}let p=()=>["# Lens Segments Configuration","# Reusable filters for consistent data access patterns","# ","# Segments provide:","# - Reusable filter logic across multiple queries","# - Row-level security and data governance","# - Consistent business rule application","# - Performance optimization through pre-defined filters","# ",`# Configuration Type: ${l}`,`# Number of Segments: ${g.length}`,"# ","# Usage in Lens models:","# - Reference segments in measures and dimensions","# - Apply as default filters in cubes","# - Use for user group-based data access control","# ","# Segment SQL uses {TABLE} placeholder for table reference","# Example: {TABLE}.status = 'active' AND {TABLE}.created_date >= '2023-01-01'",""].join(`
|
|
236
|
+
`),w;if(r==="full_table_structure"){let t={name:v||"example_table",segments:g};w=p()+`table:
|
|
237
237
|
`+_yaml.stringify.call(void 0, t).split(`
|
|
238
|
-
`).map(
|
|
239
|
-
`)}else
|
|
238
|
+
`).map(n=>` ${n}`).join(`
|
|
239
|
+
`)}else w=p()+`segments:
|
|
240
240
|
`+_yaml.stringify.call(void 0, g).split(`
|
|
241
241
|
`).map(t=>` ${t}`).join(`
|
|
242
|
-
`);let
|
|
243
|
-
${
|
|
242
|
+
`);let f=S.includes(".yaml")||S.includes(".yml")?S:S.endsWith("/")?S+y:`${S}/${y}`,h=_path2.default.dirname(f);return _fs.existsSync.call(void 0, h)||_fs.mkdirSync.call(void 0, h,{recursive:!0}),_fs.writeFileSync.call(void 0, f,w),{content:[{type:"text",text:`Lens segments configuration created successfully at ${f}`},{type:"text",text:`Generated ${g.length} segments using ${l} configuration`},{type:"text",text:`File content:
|
|
243
|
+
${w}`}]}}catch(p){return{content:[{type:"text",text:`Error creating lens segments: ${p}`}]}}})};var li=`Lens Table Tool for creating comprehensive table definitions in DataOS Lens semantic layer.
|
|
244
244
|
This tool helps create logical table definitions with dimensions, measures, segments, and joins that form the foundation of Lens data models.
|
|
245
245
|
|
|
246
246
|
Lens tables are logical constructs that define core business entities (Customer, Product, Sales) by extending physical tables with dimensions (descriptive attributes), measures (aggregated values), segments (filters), and relationships (joins).
|
|
247
247
|
Examples: Create customer tables with demographics and lifetime value, product tables with categories and inventory metrics, sales tables with revenue calculations and time-based segments.
|
|
248
|
-
`,
|
|
249
|
-
`)+_yaml.stringify.call(void 0,
|
|
248
|
+
`,pi=_zod.z.enum(["string","number","time","boolean","date","timestamp"],{description:"Data type for dimensions - string (text), number (numeric), time (datetime), boolean (true/false), date (date only), timestamp (date with time)"}),ui=_zod.z.enum(["count","count_distinct","count_distinct_approx","sum","avg","min","max","string","number","boolean","time"],{description:"Measure type - count (row count), count_distinct (unique values), sum (total), avg (average), min/max (extremes), string/number/boolean/time (calculated measures)"}),di=_zod.z.enum(["one_to_one","one_to_many","many_to_one"],{description:"Relationship type between tables - one_to_one (1:1), one_to_many (1:N parent to children), many_to_one (N:1 children to parent)"}),mi=_zod.z.enum(["redact","md5"],{description:"Data masking function - redact (replace with 'redact' string), md5 (hash with MD5 algorithm)"}),fi=_zod.z.object({includes:_zod.z.union([_zod.z.literal("*"),_zod.z.array(_zod.z.string())],{description:"User groups to include - '*' for all users, or array of group names like ['analyst', 'engineer']"}).optional(),excludes:_zod.z.array(_zod.z.string(),{description:"User groups to exclude from access. Examples: ['reader', 'guest']"}).optional()}),gi=_zod.z.object({func:mi.optional(),user_groups:_zod.z.union([_zod.z.literal("*"),fi],{description:"User groups for security policy - '*' for all users, or object with includes/excludes"}).optional()}),fe=_zod.z.object({secure:gi.optional(),additionalProperties:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional custom metadata as key-value pairs"}).optional()}),hi=_zod.z.object({name:_zod.z.string({description:"Target table name for the join. Examples: 'orders', 'customers', 'products'"}),relationship:di,sql:_zod.z.string({description:"Join condition using {TABLE} and {target_table} placeholders. Examples: '{TABLE.customer_id} = {orders.customer_id}', '{TABLE.product_id} = {inventory.product_id}'"})}),yi=_zod.z.object({name:_zod.z.string({description:"Dimension name in snake_case. Examples: 'customer_id', 'product_name', 'order_date', 'email_address'"}),title:_zod.z.string({description:"Human-readable title for the dimension. Examples: 'Customer ID', 'Product Name', 'Order Date'"}).optional(),description:_zod.z.string({description:"Business description of the dimension's purpose and usage"}),type:pi,sql:_zod.z.string({description:"SQL expression or column name for the dimension. Examples: 'customer_id', 'UPPER(product_name)', 'DATE(created_at)'"}),column:_zod.z.string({description:"References the column defined in the table's SQL. Examples: 'customer_id', 'product_name', 'order_date'"}).optional(),primary_key:_zod.z.boolean({description:"Whether this dimension is a primary key for the table"}).default(!1),public:_zod.z.boolean({description:"Whether the dimension is visible to all users (true) or hidden (false)"}).default(!0),sub_query:_zod.z.boolean({description:"Set to true to reference a measure from another table in this dimension"}).default(!1),meta:fe.optional()}),bi=_zod.z.object({sql:_zod.z.string({description:"SQL filter condition. Examples: '{TABLE}.status = \\'active\\'', '{TABLE}.amount > 100', '{TABLE}.category IN (\\'electronics\\', \\'books\\')'"})}),vi=_zod.z.object({name:_zod.z.string({description:"Measure name in snake_case. Examples: 'total_revenue', 'customer_count', 'avg_order_value', 'monthly_sales'"}),title:_zod.z.string({description:"Human-readable title for the measure. Examples: 'Total Revenue', 'Customer Count', 'Average Order Value'"}).optional(),description:_zod.z.string({description:"Business description of what this measure represents and how it's calculated"}),type:ui,sql:_zod.z.string({description:"SQL expression for the measure calculation. Examples: 'COUNT(customer_id)', 'SUM(order_amount)', 'AVG(product_price)', '{total_revenue} / {order_count}'"}),public:_zod.z.boolean({description:"Whether the measure is visible to all users (true) or hidden (false)"}).default(!0),filters:_zod.z.array(bi,{description:"Filters to apply to this measure for specific conditions"}).optional(),meta:fe.optional()}),Si=_zod.z.object({name:_zod.z.string({description:"Segment name in snake_case. Examples: 'active_customers', 'high_value_orders', 'premium_products'"}),description:_zod.z.string({description:"Business description of what this segment filters for"}).optional(),sql:_zod.z.string({description:"SQL filter condition using {TABLE} placeholder. Examples: '{TABLE}.status = \\'active\\'', '{TABLE}.amount > 1000', '{TABLE}.category = \\'premium\\' AND {TABLE}.stock > 0'"}),public:_zod.z.boolean({description:"Whether the segment is visible to all users (true) or hidden (false)"}).default(!0),meta:fe.optional()}),wi=_zod.z.object({name:_zod.z.string({description:"Table name in snake_case following pattern ^[a-zA-Z][a-zA-Z0-9_]*$. Examples: 'customers', 'sales_orders', 'product_inventory'"}),sql:_zod.z.string({description:"SQL reference using load_sql function. Examples: '{{ load_sql(\\'customers\\') }}', '{{ load_sql(\\'sales_data\\') }}'"}),description:_zod.z.string({description:"Business description of the table's purpose and the entity it represents"}),public:_zod.z.boolean({description:"Whether the table is visible to all users (true) or hidden (false). Use true for widely accessible tables, false for sensitive data"}).default(!0),joins:_zod.z.array(hi,{description:"Join relationships with other tables. All joins are LEFT JOINs in Lens"}).optional(),dimensions:_zod.z.array(yi,{description:"Dimensional attributes (descriptive columns) for slicing and dicing data"}),measures:_zod.z.array(vi,{description:"Aggregated metrics and calculated values for quantitative analysis"}).optional(),segments:_zod.z.array(Si,{description:"Predefined filters for common data subsets and business rules"}).optional()}),Ei={table:wi,path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-table.yaml"}),fileName:_zod.z.string({description:"File name for the lens table file. example: my-table.yaml"})},He=s=>{s.tool("create-lens-table",li,Ei,async({table:l,path:e,fileName:a})=>{let{name:c,sql:r,description:v,public:S,joins:y=[],dimensions:g,measures:d=[],segments:m=[]}=l,p={tables:[{name:c,sql:r,description:v,public:S,...y.length>0&&{joins:y.map(t=>({name:t.name,relationship:t.relationship,sql:t.sql}))},dimensions:g.map(t=>{let n={name:t.name,type:t.type,description:t.description,sql:t.sql,primary_key:t.primary_key,public:t.public};return t.title&&(n.title=t.title),t.column&&(n.column=t.column),t.sub_query&&(n.sub_query=t.sub_query),t.meta&&(n.meta=t.meta),n}),...d.length>0&&{measures:d.map(t=>{let n={name:t.name,type:t.type,description:t.description,sql:t.sql,public:t.public};return t.title&&(n.title=t.title),t.filters&&t.filters.length>0&&(n.filters=t.filters),t.meta&&(n.meta=t.meta),n})},...m.length>0&&{segments:m.map(t=>{let n={name:t.name,sql:t.sql,public:t.public};return t.description&&(n.description=t.description),t.meta&&(n.meta=t.meta),n})}}]},w=r.includes("load_sql")&&_optionalChain([r, 'access', _55 => _55.match, 'call', _56 => _56(/load_sql\(['"](.*?)['"]\)/), 'optionalAccess', _57 => _57[1]])||c,h=[`# Lens Table Definition: ${c}`,`# Description: ${v}`,`# Generated with ${g.length} dimensions, ${d.length} measures, ${m.length} segments`,y.length>0?`# Joins: ${y.map(t=>t.name).join(", ")}`:"",""].filter(t=>t!=="").join(`
|
|
249
|
+
`)+_yaml.stringify.call(void 0, p);try{let t=e.includes(".yaml")||e.includes(".yml")?e:e.endsWith("/")?e+a:`${e}/${a}`,n=_path2.default.dirname(t);return _fs.existsSync.call(void 0, n)||_fs.mkdirSync.call(void 0, n,{recursive:!0}),_fs.writeFileSync.call(void 0, t,h),{content:[{type:"text",text:`Lens table file created successfully at ${t}`},{type:"text",text:`File content: ${h}`}]}}catch(t){return{content:[{type:"text",text:`Error creating lens table file: ${t}`}]}}})};var Li=`Lens Views serve as a layer atop the data graph of tables, presenting an abstraction of the entire data model with which consumers can interact. Views are essential for defining metrics and providing a simplified interface for end-users to interact with key business metrics.
|
|
250
250
|
|
|
251
251
|
There are two main approaches for designing views:
|
|
252
252
|
|
|
@@ -254,10 +254,10 @@ There are two main approaches for designing views:
|
|
|
254
254
|
|
|
255
255
|
2. **Metrics-first approach**: Views focused on specific performance metrics, each containing one key measure with relevant dimensions for grouping and filtering. Each view represents a specific business metric over time.
|
|
256
256
|
|
|
257
|
-
Views reference dimensions, measures, and segments from multiple logical tables but don't have any measures, dimensions, or segments of their own.`,
|
|
258
|
-
`)},
|
|
257
|
+
Views reference dimensions, measures, and segments from multiple logical tables but don't have any measures, dimensions, or segments of their own.`,Ci=_zod.z.string({description:"Cron expression for scheduling (e.g., '*/5 * * * *' for every 5 minutes, '0 */6 * * *' for every 6 hours)"}).regex(/^(\*|([0-5]?\d)) (\*|([01]?\d|2[0-3])) (\*|([01]?\d|[12]\d|3[01])) (\*|([01]?\d)) (\*|[0-6])$/,"Invalid cron expression format"),qi=_zod.z.object({expression:Ci.describe("Cron expression for metric refresh schedule (e.g., '*/5 * * * *', '0 */6 * * *')"),timezone:_zod.z.string({description:"Timezone for metric calculations in TZ database format (e.g., 'UTC', 'America/Vancouver', 'America/Toronto')"}).default("UTC"),window:_zod.z.enum(["day","week","month","quarter","year"],{description:"Time window for metric aggregation - defines the granularity of metric calculation"}),excludes:_zod.z.array(_zod.z.string(),{description:"List of measures or dimensions to exclude from the metric view (e.g., ['purchases', 'source'])"}).optional()}),Di=_zod.z.object({timeseries:_zod.z.string({description:"Time dimension for Iris dashboard visualization (format: 'table.column_name', e.g., 'sales.invoice_date')"}),excludes:_zod.z.array(_zod.z.string(),{description:"List of fields to exclude from Iris dashboard (e.g., ['sales.source', 'sales.invoice_date'])"}).optional(),refresh:_zod.z.object({every:_zod.z.string({description:"Refresh interval for Iris dashboard (e.g., '24h', '12h', '6h', '1h')"}).regex(/^\d+[hmd]$/,"Format should be like '24h', '12h', '30m', '15m'")}).optional()}),Ke=_zod.z.object({title:_zod.z.string({description:"Human-readable title for the view/metric (e.g., 'Customer Spending by Product Category')"}).optional(),tags:_zod.z.array(_zod.z.string(),{description:"Tags for categorization and discovery. Common patterns: ['DPDomain.{Domain}', 'DPUsecase.{UseCase}', 'DPTier.{Tier}'] (e.g., ['DPDomain.Sales', 'DPUsecase.Revenue Analysis', 'DPTier.Consumer Aligned'])"}).optional(),export_to_iris:_zod.z.boolean({description:"Whether to export this view to Iris dashboard for visualization (mainly for entity-first views)"}).optional(),iris:Di.optional(),metric:qi.optional(),refresh:_zod.z.object({every:_zod.z.string({description:"General refresh interval for the view (e.g., '24h', '12h', '6h')"}).regex(/^\d+[hmd]$/,"Format should be like '24h', '12h', '30m'")}).optional()}),Ye=_zod.z.object({join_path:_zod.z.string({description:"Name of the logical table to include in the view. Must match a table defined in your Lens model (e.g., 'sales', 'customer', 'product', 'marketing_campaign')"}),prefix:_zod.z.boolean({description:"Whether to prefix the included fields with the table name. Set to true to avoid naming conflicts (e.g., 'customer_name' vs 'name')"}).default(!1),includes:_zod.z.array(_zod.z.string(),{description:"List of specific measures, dimensions, or segments to include from this table (e.g., ['customer_id', 'total_revenue', 'invoice_date', 'churn_rate'])"}),excludes:_zod.z.array(_zod.z.string(),{description:"List of fields to exclude from this table (alternative to includes for when you want most fields)"}).optional()}),Je=_zod.z.object({name:_zod.z.string({description:"View name - should be descriptive and follow naming conventions. For metrics-first: use metric name (e.g., 'customer_churn_rate', 'monthly_revenue'). For entity-first: use entity name (e.g., 'customer_360', 'product_analysis')"}),description:_zod.z.string({description:"Detailed description of the view's purpose and what insights it provides (e.g., 'This metric tracks customer churn rate over time, helping identify retention trends and at-risk customer segments')"}),public:_zod.z.boolean({description:"Whether the view is publicly accessible to all users or restricted"}).default(!0),meta:Ke.optional(),tables:_zod.z.array(Ye,{description:"List of tables to include in this view with their specific field selections"}).min(1,"At least one table must be included in the view")}),Hn=_zod.z.object({views:_zod.z.array(Je,{description:"Array of view definitions - you can define multiple views for different use cases or metrics"}).min(1,"At least one view must be defined")}),Ri={viewType:_zod.z.enum(["single","multiple"],{description:"Whether to create a single view or multiple views in one configuration"}).default("single"),name:_zod.z.string({description:"View name - descriptive identifier for the view (required for single view type)"}).optional(),description:_zod.z.string({description:"Detailed description of the view's purpose (required for single view type)"}).optional(),public:_zod.z.boolean({description:"Whether the view is publicly accessible"}).default(!0),approach:_zod.z.enum(["entity-first","metrics-first"],{description:"Design approach: 'entity-first' for comprehensive entity views, 'metrics-first' for specific business metrics"}).optional(),meta:Ke.optional(),tables:_zod.z.array(Ye,{description:"List of tables to include in this view (required for single view type)"}).optional(),views:_zod.z.array(Je,{description:"Array of view definitions (required for multiple view type)"}).optional(),comments:_zod.z.string({description:"Additional comments or notes about the view configuration"}).optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-views.yaml"}),fileName:_zod.z.string({description:"File name for the lens views file. example: my-views.yaml"})},Ze=s=>{s.tool("create-lens-views",Li,Ri,async({viewType:l,name:e,description:a,public:c,approach:r,meta:v,tables:S,views:y,comments:g,path:d,fileName:m})=>{let p;if(l==="multiple"){if(!y||y.length===0)throw new Error("Views array is required for multiple view type");p={views:y}}else{if(!e||!a||!S||S.length===0)throw new Error("Name, description, and tables are required for single view type");let t={name:e,description:a,public:c,tables:S};v&&(t.meta=v),p={views:[t]}}let w=()=>{let t=[];return g&&t.push(`# ${g}`),r&&(r==="entity-first"?(t.push("# Entity-first approach: Comprehensive view of entity with related measures and dimensions"),t.push("# Use this for denormalized tables that describe entities fully")):(t.push("# Metrics-first approach: Focused on specific business metrics"),t.push("# Each view represents a key performance indicator with relevant dimensions"))),t.push("# Views reference dimensions, measures, and segments from logical tables"),t.push("# Views don't define their own measures/dimensions - they include them from tables"),l==="multiple"&&t.push("# Multiple views defined for different use cases or metrics"),t.join(`
|
|
258
|
+
`)},f=_yaml.stringify.call(void 0, p),h=w()+`
|
|
259
259
|
|
|
260
|
-
`+
|
|
260
|
+
`+f;try{let t=d.includes(".yaml")||d.includes(".yml")?d:d.endsWith("/")?d+m:`${d}/${m}`,n=_path2.default.dirname(t);return _fs.existsSync.call(void 0, n)||_fs.mkdirSync.call(void 0, n,{recursive:!0}),_fs.writeFileSync.call(void 0, t,h),{content:[{type:"text",text:`Lens views file created successfully at ${t}`},{type:"text",text:`File content: ${h}`}]}}catch(t){return{content:[{type:"text",text:`Error creating lens views file: ${t}`}]}}})};var Pi=`Lens User Groups are used to manage both data access and API scopes, which control access to specific functionalities and endpoints in the Lens semantic layer. This forms part of the access policy, ensuring users interact only with the data and features they are authorized to use.
|
|
261
261
|
|
|
262
262
|
User Groups provide:
|
|
263
263
|
- **API Scope Management**: Control access to specific REST API endpoints (meta, data, graphql)
|
|
@@ -273,20 +273,75 @@ Key Features:
|
|
|
273
273
|
- Integration with Lens Studio Interface
|
|
274
274
|
- Support for organizational and regulatory compliance
|
|
275
275
|
|
|
276
|
-
User groups extend governance to the Lens Studio Interface, where access to specific tabs and functionalities can be controlled, supporting compliance with organizational and regulatory standards.`,
|
|
276
|
+
User groups extend governance to the Lens Studio Interface, where access to specific tabs and functionalities can be controlled, supporting compliance with organizational and regulatory standards.`,K=_zod.z.enum(["meta","data","graphql","jobs","source"],{description:`API scopes control access to specific Lens endpoints:
|
|
277
277
|
- 'meta': Access to metadata endpoints (/v2/meta) - view sources, authors, timezones, security context
|
|
278
278
|
- 'data': Access to data query endpoints (/v2/load, /v2/sql) - retrieve and analyze data
|
|
279
279
|
- 'graphql': Access to GraphQL endpoint (/v2/graphql) - GraphQL-based queries
|
|
280
280
|
- 'jobs': Access to job-related endpoints (advanced functionality)
|
|
281
|
-
- 'source': Access to source-related endpoints (advanced functionality)`}),et=_zod.z.union([_zod.z.literal("*"),_zod.z.string().regex(/^users:id:.+$/,"User ID must follow pattern 'users:id:username'")],{description:"User specification pattern: '*' for all users or 'users:id:username' for specific users (e.g., 'users:id:johndoe', 'users:id:iamgroot')"}),
|
|
282
|
-
`),
|
|
281
|
+
- 'source': Access to source-related endpoints (advanced functionality)`}),et=_zod.z.union([_zod.z.literal("*"),_zod.z.string().regex(/^users:id:.+$/,"User ID must follow pattern 'users:id:username'")],{description:"User specification pattern: '*' for all users or 'users:id:username' for specific users (e.g., 'users:id:johndoe', 'users:id:iamgroot')"}),ne=_zod.z.union([_zod.z.literal("*"),_zod.z.array(et,{description:"List of users to include in this group. Use specific user IDs for small groups or '*' for all users"})],{description:"Users to include in this group. Use '*' to include all users or specify individual user IDs"}),se=_zod.z.array(et,{description:"List of users to exclude from this group (e.g., ['users:id:tempuser', 'users:id:guest'])"}),ea=_zod.z.object({name:_zod.z.string({description:"Unique group name - should be descriptive and follow naming conventions (e.g., 'data_analyst', 'data_engineer', 'business_user', 'admin')"}),description:_zod.z.string({description:"Brief description of the user group's purpose and the type of users it contains (e.g., 'Data analysts responsible for reporting and visualization tasks')"}).optional(),api_scopes:_zod.z.array(K,{description:"List of API scopes this group can access. Follow principle of least privilege - grant only necessary access"}).optional(),includes:ne,excludes:se.optional()}),he=_zod.z.enum(["data_analyst","data_engineer","data_scientist","business_user","admin","viewer","developer","custom"],{description:"Predefined role templates with typical API scope configurations. Choose 'custom' for fully custom groups"}),ta=_zod.z.object({template:he,name:_zod.z.string({description:"Group name - will be used as-is or can override template default"}).optional(),description:_zod.z.string({description:"Custom description for this group"}).optional(),customApiScopes:_zod.z.array(K,{description:"Override default API scopes for this template"}).optional(),includes:ne,excludes:se.optional(),additionalProperties:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional custom properties for the group"}).optional()}),Oi=s=>{let{type:l,group:e}=s;switch(l){case"custom":if(!e)return"Error: 'custom' type requires 'group' property with userGroupSchema configuration.";if(!e.name)return"Error: Custom user group requires 'name' property.";if(!e.includes)return"Error: Custom user group requires 'includes' property.";if(e.template||e.customApiScopes||e.additionalProperties)return"Error: Custom user group should not have template-specific properties ('template', 'customApiScopes', 'additionalProperties').";break;case"template":if(!e)return"Error: 'template' type requires 'group' property with templateUserGroupSchema configuration.";if(!e.template)return"Error: Template user group requires 'template' property.";if(!e.includes)return"Error: Template user group requires 'includes' property.";break;default:return`Error: Invalid user group configuration type '${l}'. Supported types are: custom, template.`}return null},ge=_zod.z.object({type:_zod.z.enum(["custom","template"],{description:"Type of user group configuration: 'custom' for fully custom groups, 'template' for template-based groups"}),group:_zod.z.object({name:_zod.z.string({description:"Group name - will be used as-is or can override template default"}).optional(),description:_zod.z.string({description:"Custom description for this group"}).optional(),includes:ne,excludes:se.optional(),api_scopes:_zod.z.array(K,{description:"List of API scopes this group can access. Follow principle of least privilege - grant only necessary access"}).optional(),template:he.optional(),customApiScopes:_zod.z.array(K,{description:"Override default API scopes for this template"}).optional(),additionalProperties:_zod.z.record(_zod.z.string(),_zod.z.any(),{description:"Additional custom properties for the group"}).optional()})}),ra=_zod.z.object({groups:_zod.z.array(ge,{description:"Array of user group configurations"}).min(1,"At least one user group must be defined")}),Mi=_zod.z.object({baseTemplate:he,variations:_zod.z.array(_zod.z.object({name:_zod.z.string({description:"Name for this variation (e.g., 'marketing_analyst', 'finance_analyst')"}),description:_zod.z.string({description:"Description for this variation"}).optional(),customApiScopes:_zod.z.array(K).optional(),includes:ne,excludes:se.optional()}),{description:"Variations of the base template to generate"}).min(1,"At least one variation must be specified"),commonProperties:_zod.z.object({apiScopes:_zod.z.array(K).optional(),excludes:se.optional()}).optional()}),Ui={configurationType:_zod.z.enum(["single","multiple","bulk_template"],{description:"Type of user group configuration: 'single' for one group, 'multiple' for several groups, 'bulk_template' for generating multiple groups from templates"}).default("single"),userGroup:ge.optional(),userGroups:_zod.z.array(ge,{description:"Array of user group configurations for multiple groups"}).optional(),bulkTemplate:Mi.optional(),includeDefaultGroup:_zod.z.boolean({description:"Whether to include a default group that gives all users basic access"}).default(!0),defaultGroupConfig:_zod.z.object({name:_zod.z.string().default("default"),description:_zod.z.string().default("Default user group with basic access"),api_scopes:_zod.z.array(K).default(["data","graphql"]),includes:ne.default("*")}).optional(),outputFormat:_zod.z.enum(["user_groups_only","complete_file"],{description:"Whether to output only user groups or include them in a complete user_groups.yaml file structure"}).default("complete_file"),comments:_zod.z.string({description:"Additional comments or notes about the user group configuration"}).optional(),path:_zod.z.string({description:"Absolute path where the file will be stored. example: ~/Documents/project-name/my-user-groups.yaml"}),fileName:_zod.z.string({description:"File name for the lens user groups file. example: my-user-groups.yaml"})},tt=s=>{s.tool("create-lens-user-groups",Pi,Ui,async({configurationType:l,userGroup:e,userGroups:a,bulkTemplate:c,includeDefaultGroup:r,defaultGroupConfig:v,outputFormat:S,comments:y,path:g,fileName:d})=>{let m=[],p=b=>{switch(b){case"data_analyst":return["meta","data","graphql"];case"data_engineer":return["meta","data","graphql","jobs"];case"data_scientist":return["meta","data","graphql","jobs","source"];case"business_user":return["meta","data"];case"admin":return["meta","data","graphql","jobs","source"];case"viewer":return["meta"];case"developer":return["meta","data","graphql","jobs"];default:return["meta","data"]}},w=b=>b==="custom"?"custom_group":b,f=b=>{switch(b){case"data_analyst":return"Data analysts responsible for reporting, visualization, and business intelligence tasks";case"data_engineer":return"Data engineers who build and maintain data pipelines, transformations, and infrastructure";case"data_scientist":return"Data scientists with full access to data, modeling capabilities, and source management";case"business_user":return"Business users who need access to view and analyze data for decision making";case"admin":return"Administrators with full access to all Lens functionality and management capabilities";case"viewer":return"Read-only users who can view metadata but cannot query data";case"developer":return"Developers who build applications and integrations using Lens APIs";default:return"Custom user group with specific access requirements"}},h=b=>{let x=Oi(b);if(x)throw new Error(x);if(b.type==="template"){let{group:q}=b,L=q.template;return{name:q.name||w(L),description:q.description||f(L),api_scopes:q.customApiScopes||p(L),includes:q.includes,...q.excludes&&{excludes:q.excludes},...q.additionalProperties&&q.additionalProperties}}else{let{group:q}=b;return{name:q.name,...q.description&&{description:q.description},...q.api_scopes&&{api_scopes:q.api_scopes},includes:q.includes,...q.excludes&&{excludes:q.excludes}}}};if(l==="single"){if(!e)throw new Error("User group configuration is required for single type");m.push(h(e))}else if(l==="multiple"){if(!a||a.length===0)throw new Error("User groups array is required for multiple type");m=a.map(h)}else if(l==="bulk_template"){if(!c)throw new Error("Bulk template configuration is required for bulk_template type");let{baseTemplate:b,variations:x,commonProperties:q}=c;m=x.map(L=>({name:L.name,description:L.description||f(b),api_scopes:L.customApiScopes||_optionalChain([q, 'optionalAccess', _58 => _58.apiScopes])||p(b),includes:L.includes,excludes:L.excludes||_optionalChain([q, 'optionalAccess', _59 => _59.excludes])}))}if(r){let b={name:_optionalChain([v, 'optionalAccess', _60 => _60.name])||"default",description:_optionalChain([v, 'optionalAccess', _61 => _61.description])||"Default user group with basic access",api_scopes:_optionalChain([v, 'optionalAccess', _62 => _62.api_scopes])||["data","graphql"],includes:_optionalChain([v, 'optionalAccess', _63 => _63.includes])||"*"};m.unshift(b)}let t=()=>["# Lens User Groups Configuration","# This file defines user groups for access control in Lens semantic layer","# ","# User groups control both API access and data visibility:","# - API Scopes: Control access to specific REST endpoints","# - Data Access: Define who can access what data through inclusion/exclusion","# - Priority: First-listed group takes precedence for users in multiple groups","# ","# API Scopes:","# - meta: Metadata endpoints (/v2/meta) - sources, authors, timezones","# - data: Data query endpoints (/v2/load, /v2/sql) - retrieve and analyze data","# - graphql: GraphQL endpoint (/v2/graphql) - GraphQL-based queries","# - jobs: Job-related endpoints (advanced functionality)","# - source: Source-related endpoints (advanced functionality)","# ","# User Patterns:","# - '*': All users","# - 'users:id:username': Specific user (e.g., 'users:id:johndoe')","# ",...y?["# Additional Notes:",`# ${y}`,"# "]:[]].join(`
|
|
282
|
+
`),n=S==="user_groups_only"?`${t()}
|
|
283
283
|
user_groups:
|
|
284
|
-
${_yaml.stringify.call(void 0,
|
|
285
|
-
`).map(
|
|
284
|
+
${_yaml.stringify.call(void 0, m).split(`
|
|
285
|
+
`).map(b=>` ${b}`).join(`
|
|
286
286
|
`)}`:`${t()}
|
|
287
287
|
user_groups:
|
|
288
|
-
${_yaml.stringify.call(void 0,
|
|
289
|
-
`).map(
|
|
290
|
-
`)}`;try{let
|
|
291
|
-
${
|
|
288
|
+
${_yaml.stringify.call(void 0, m).split(`
|
|
289
|
+
`).map(b=>` ${b}`).join(`
|
|
290
|
+
`)}`;try{let b=g.includes(".yaml")||g.includes(".yml")?g:g.endsWith("/")?g+d:`${g}/${d}`,x=_path2.default.dirname(b);return _fs.existsSync.call(void 0, x)||_fs.mkdirSync.call(void 0, x,{recursive:!0}),_fs.writeFileSync.call(void 0, b,n),{content:[{type:"text",text:`Lens user groups configuration created successfully at ${b}`},{type:"text",text:`Generated ${m.length} user groups with ${l} configuration type`},{type:"text",text:`File content:
|
|
291
|
+
${n}`}]}}catch(b){return{content:[{type:"text",text:`Error creating lens user groups configuration: ${b}`}]}}})};var _trinoclient = require('trino-client');function rt({fqdn:s,apiKey:l,userId:e}){return _trinoclient.Trino.create({server:`https://tcp.${s}:7432`,auth:new (0, _trinoclient.BasicAuth)(e,l),extraHeaders:{"cluster-name":"minervac"}})}var Wi=`Execute SQL queries across one or more semantic models using Trino's distributed query engine.
|
|
292
|
+
|
|
293
|
+
WHEN TO USE:
|
|
294
|
+
- Cross-model analysis: JOIN multiple semantic models (e.g., customer_360 + sales_360 + product_360)
|
|
295
|
+
- Advanced SQL: CTEs, window functions, subqueries, complex aggregations
|
|
296
|
+
- Multi-step transformations that exceed structured query API capabilities
|
|
297
|
+
|
|
298
|
+
WHEN NOT TO USE:
|
|
299
|
+
- Simple single-model queries \u2192 use query-lens-data (optimized and easier)
|
|
300
|
+
- Unknown schema \u2192 use query-lens-schema first to explore available tables/columns
|
|
301
|
+
|
|
302
|
+
TABLE NAMING:
|
|
303
|
+
Use two-part names: semantic_model_name.table_name
|
|
304
|
+
- Catalog 'icebase' is automatically set
|
|
305
|
+
- Example: sales_360.orders, customer_360.customer
|
|
306
|
+
|
|
307
|
+
EXAMPLE QUERIES:
|
|
308
|
+
|
|
309
|
+
1. Cross-model JOIN (Customer + Sales):
|
|
310
|
+
SELECT c.customer_name, c.country,
|
|
311
|
+
SUM(s.revenue) as total_revenue,
|
|
312
|
+
COUNT(s.order_id) as order_count
|
|
313
|
+
FROM customer_360.customer c
|
|
314
|
+
JOIN sales_360.orders s ON c.customer_id = s.customer_id
|
|
315
|
+
WHERE s.order_date >= DATE '2024-01-01'
|
|
316
|
+
GROUP BY c.customer_name, c.country
|
|
317
|
+
ORDER BY total_revenue DESC
|
|
318
|
+
LIMIT 20
|
|
319
|
+
|
|
320
|
+
2. Window functions for ranking:
|
|
321
|
+
SELECT product_name, category, revenue,
|
|
322
|
+
RANK() OVER (PARTITION BY category ORDER BY revenue DESC) as rank
|
|
323
|
+
FROM product_360.products
|
|
324
|
+
WHERE year = 2024
|
|
325
|
+
|
|
326
|
+
3. CTE with time-series analysis:
|
|
327
|
+
WITH monthly_sales AS (
|
|
328
|
+
SELECT DATE_TRUNC('month', order_date) as month,
|
|
329
|
+
SUM(revenue) as revenue
|
|
330
|
+
FROM sales_360.orders
|
|
331
|
+
WHERE order_date >= DATE '2024-01-01'
|
|
332
|
+
GROUP BY 1
|
|
333
|
+
)
|
|
334
|
+
SELECT month, revenue,
|
|
335
|
+
revenue - LAG(revenue) OVER (ORDER BY month) as change
|
|
336
|
+
FROM monthly_sales
|
|
337
|
+
|
|
338
|
+
BEST PRACTICES:
|
|
339
|
+
- Use WHERE clauses to filter early and reduce data scanned
|
|
340
|
+
- Add LIMIT for exploratory queries
|
|
341
|
+
- Explore schema first: query-lens-schema tool shows available tables/columns
|
|
342
|
+
- For performance, specify columns instead of SELECT *
|
|
343
|
+
|
|
344
|
+
Keywords: SQL, Trino, cross-model, JOIN, CTE, window functions, multi-model analytics`;async function ot({server:s,apiKey:l,fqdn:e,userId:a}){s.tool("trino-query",Wi,{sqlQuery:_zod2.default.string({description:"SQL query using two-part table names: semantic_model_name.table_name (e.g., sales_360.orders). Supports full Trino SQL syntax. Example: SELECT customer_name, SUM(revenue) FROM sales_360.orders WHERE order_date >= DATE '2024-01-01' GROUP BY customer_name LIMIT 10"})},async({sqlQuery:c})=>{try{let v=await rt({fqdn:e,apiKey:l,userId:a}).query({query:c,catalog:"icebase"}),S=0,y,g;return await v.forEach(d=>{if(S==0){let m=d.columns.map(({name:p,type:w})=>`${p}-${w}`);m.shift(),y=m.join("|")}d.data&&(g=d.data.map(p=>(p.shift(),p.join("|"))).join(`
|
|
345
|
+
`)),S++}),{content:[{type:"text",text:`${y}
|
|
346
|
+
${g}`}]}}catch(r){return{content:[{type:"text",text:`Error executing query: ${r.message}`}],isError:!0}}})}var{version:Vi}=Ie;async function Hi(){let s=process.env.API_KEY,l=process.env.FQDN,e=process.env.SLUG,a=process.env.DEV||!1,c=process.env.USERID,r=new (0, _mcpjs.McpServer)({name:"DataOS - Synthetic data",version:Vi,capabilities:{resources:{}}}),v=new _stdiojs.StdioServerTransport;a&&(be(r),we(r),ve(r),Se(r),ke(r),Ee(r),$e(r),Oe(r),Me(r),ze(r),Ve(r),He(r),Ze(r),tt(r)),s&&(Le({server:r,apiKey:s,fqdn:l,slug:e}),e||(Ce({server:r,apiKey:s,fqdn:l}),qe({server:r,apiKey:s,fqdn:l})),De({server:r,apiKey:s,fqdn:l,slug:e}),je({server:r,apiKey:s,fqdn:l,slug:e}),ot({server:r,apiKey:s,fqdn:l,userId:c})),await r.connect(v)}Hi().catch(console.error);
|
|
292
347
|
//# sourceMappingURL=stdio.cjs.map
|