npm - vesper-wizard - Versions diffs - 2.0.5 → 2.0.7 - Mend

vesper-wizard 2.0.5 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

package/LICENSE +21 -0
package/README.md +300 -37
package/build/cache/cdn.js +34 -0
package/build/cache/service.js +63 -0
package/build/cleaning/cleaner.js +81 -0
package/build/cleaning/evaluator.js +89 -0
package/build/cleaning/executor.js +62 -0
package/build/cleaning/exporter.js +87 -0
package/build/cleaning/planner.js +127 -0
package/build/cleaning/rules.js +57 -0
package/build/cleaning/types.js +1 -0
package/build/cloud/adapters/local.js +37 -0
package/build/cloud/adapters/s3.js +24 -0
package/build/cloud/adapters/supabase.js +49 -0
package/build/cloud/storage-manager.js +26 -0
package/build/cloud/types.js +1 -0
package/build/compliance/service.js +73 -0
package/build/compliance/store.js +80 -0
package/build/compliance/types.js +1 -0
package/build/config/config-manager.js +221 -0
package/build/config/secure-keys.js +51 -0
package/build/config/user-config.js +48 -0
package/build/data/processing-worker.js +23 -0
package/build/data/streaming.js +38 -0
package/build/data/worker-pool.js +39 -0
package/build/export/exporter.js +69 -0
package/build/export/packager.js +100 -0
package/build/export/types.js +1 -0
package/build/fusion/aligner.js +56 -0
package/build/fusion/deduplicator.js +69 -0
package/build/fusion/engine.js +69 -0
package/build/fusion/harmonizer.js +39 -0
package/build/fusion/orchestrator.js +86 -0
package/build/fusion/types.js +1 -0
package/build/gateway/unified-dataset-gateway.js +409 -0
package/build/index.js +2704 -0
package/build/ingestion/hf-downloader.js +171 -0
package/build/ingestion/ingestor.js +271 -0
package/build/ingestion/kaggle-downloader.js +102 -0
package/build/install/install-service.js +41 -0
package/build/jobs/manager.js +136 -0
package/build/jobs/queue.js +59 -0
package/build/jobs/types.js +1 -0
package/build/lib/supabase.js +3 -0
package/build/metadata/dataworld-source.js +89 -0
package/build/metadata/domain.js +147 -0
package/build/metadata/github-scraper.js +47 -0
package/build/metadata/institutional-scrapers.js +49 -0
package/build/metadata/kaggle-scraper.js +182 -0
package/build/metadata/kaggle-source.js +70 -0
package/build/metadata/license.js +68 -0
package/build/metadata/monitoring-service.js +107 -0
package/build/metadata/monitoring-store.js +78 -0
package/build/metadata/monitoring-types.js +1 -0
package/build/metadata/openml-source.js +87 -0
package/build/metadata/quality.js +48 -0
package/build/metadata/rate-limiter.js +128 -0
package/build/metadata/scraper.js +377 -0
package/build/metadata/store.js +340 -0
package/build/metadata/types.js +1 -0
package/build/metadata/uci-scraper.js +49 -0
package/build/monitoring/observability.js +76 -0
package/build/preparation/target-detector.js +75 -0
package/build/python/__pycache__/config.cpython-312.pyc +0 -0
package/build/python/asset_downloader_engine.py +92 -0
package/build/python/cleaner.py +226 -0
package/build/python/config.py +263 -0
package/build/python/dataworld_engine.py +208 -0
package/build/python/export_engine.py +243 -0
package/build/python/framework_adapters.py +100 -0
package/build/python/fusion_engine.py +368 -0
package/build/python/github_adapter.py +106 -0
package/build/python/hf_fallback.py +298 -0
package/build/python/image_engine.py +86 -0
package/build/python/kaggle_engine.py +295 -0
package/build/python/media_engine.py +133 -0
package/build/python/nasa_adapter.py +82 -0
package/build/python/openml_engine.py +146 -0
package/build/python/quality_engine.py +267 -0
package/build/python/row_count.py +54 -0
package/build/python/splitter_engine.py +283 -0
package/build/python/target_engine.py +154 -0
package/build/python/test_framework_adapters.py +61 -0
package/build/python/test_fusion_engine.py +89 -0
package/build/python/uci_adapter.py +94 -0
package/build/python/vesper/__init__.py +1 -0
package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
package/build/python/vesper/core/__init__.py +1 -0
package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
package/build/python/vesper/core/asset_downloader.py +675 -0
package/build/python/vesper/core/download_recipe.py +104 -0
package/build/python/worldbank_adapter.py +99 -0
package/build/quality/analyzer.js +93 -0
package/build/quality/image-analyzer.js +114 -0
package/build/quality/media-analyzer.js +115 -0
package/build/quality/quality-orchestrator.js +162 -0
package/build/quality/types.js +1 -0
package/build/scripts/build-index.js +54 -0
package/build/scripts/check-db.js +73 -0
package/build/scripts/check-jobs.js +24 -0
package/build/scripts/check-naruto.js +17 -0
package/build/scripts/cleanup-kaggle.js +41 -0
package/build/scripts/demo-full-pipeline.js +62 -0
package/build/scripts/demo-ui.js +58 -0
package/build/scripts/e2e-demo.js +72 -0
package/build/scripts/massive-scrape.js +103 -0
package/build/scripts/ops-dashboard.js +33 -0
package/build/scripts/repro-bug.js +37 -0
package/build/scripts/repro-export-bug.js +56 -0
package/build/scripts/scrape-metadata.js +100 -0
package/build/scripts/search-cli.js +26 -0
package/build/scripts/test-bias.js +45 -0
package/build/scripts/test-caching.js +51 -0
package/build/scripts/test-cleaning.js +76 -0
package/build/scripts/test-cloud-storage.js +48 -0
package/build/scripts/test-compliance.js +58 -0
package/build/scripts/test-conversion.js +64 -0
package/build/scripts/test-custom-rules.js +58 -0
package/build/scripts/test-db-opt.js +63 -0
package/build/scripts/test-export-custom.js +33 -0
package/build/scripts/test-exporter.js +53 -0
package/build/scripts/test-fusion.js +61 -0
package/build/scripts/test-github.js +27 -0
package/build/scripts/test-group-split.js +52 -0
package/build/scripts/test-hf-download.js +29 -0
package/build/scripts/test-holdout-manager.js +61 -0
package/build/scripts/test-hybrid-search.js +41 -0
package/build/scripts/test-image-analysis.js +50 -0
package/build/scripts/test-ingestion-infra.js +39 -0
package/build/scripts/test-install.js +40 -0
package/build/scripts/test-institutional.js +26 -0
package/build/scripts/test-integrity.js +41 -0
package/build/scripts/test-jit.js +42 -0
package/build/scripts/test-job-queue.js +62 -0
package/build/scripts/test-kaggle-download.js +34 -0
package/build/scripts/test-large-data.js +50 -0
package/build/scripts/test-mcp-v5.js +74 -0
package/build/scripts/test-media-analysis.js +61 -0
package/build/scripts/test-monitoring.js +91 -0
package/build/scripts/test-observability.js +106 -0
package/build/scripts/test-packager.js +55 -0
package/build/scripts/test-pipeline.js +50 -0
package/build/scripts/test-planning.js +64 -0
package/build/scripts/test-privacy.js +38 -0
package/build/scripts/test-production-sync.js +36 -0
package/build/scripts/test-quality.js +43 -0
package/build/scripts/test-robust-ingestion.js +41 -0
package/build/scripts/test-schema.js +45 -0
package/build/scripts/test-split-validation.js +40 -0
package/build/scripts/test-splitter.js +93 -0
package/build/scripts/test-target-detector.js +29 -0
package/build/scripts/test-uci.js +27 -0
package/build/scripts/test-unified-quality.js +86 -0
package/build/scripts/test-write.js +14 -0
package/build/scripts/verify-integration.js +57 -0
package/build/scripts/verify-priority.js +33 -0
package/build/search/embedder.js +34 -0
package/build/search/engine.js +152 -0
package/build/search/jit-orchestrator.js +258 -0
package/build/search/vector-store.js +123 -0
package/build/splitting/splitter.js +82 -0
package/build/splitting/types.js +1 -0
package/build/tools/formatter.js +251 -0
package/build/utils/downloader.js +52 -0
package/build/utils/selector.js +69 -0
package/mcp-config-template.json +18 -0
package/package.json +101 -29
package/scripts/postinstall.cjs +114 -0
package/scripts/preindex_registry.cjs +157 -0
package/scripts/refresh-index.cjs +87 -0
package/scripts/wizard.cjs +625 -0
package/{wizard.js → scripts/wizard.js} +99 -21
package/src/python/__pycache__/config.cpython-312.pyc +0 -0
package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
package/src/python/asset_downloader_engine.py +92 -0
package/src/python/cleaner.py +226 -0
package/src/python/config.py +263 -0
package/src/python/dataworld_engine.py +208 -0
package/src/python/export_engine.py +243 -0
package/src/python/framework_adapters.py +100 -0
package/src/python/fusion_engine.py +368 -0
package/src/python/github_adapter.py +106 -0
package/src/python/hf_fallback.py +298 -0
package/src/python/image_engine.py +86 -0
package/src/python/kaggle_engine.py +295 -0
package/src/python/media_engine.py +133 -0
package/src/python/nasa_adapter.py +82 -0
package/src/python/openml_engine.py +146 -0
package/src/python/quality_engine.py +267 -0
package/src/python/row_count.py +54 -0
package/src/python/splitter_engine.py +283 -0
package/src/python/target_engine.py +154 -0
package/src/python/test_framework_adapters.py +61 -0
package/src/python/test_fusion_engine.py +89 -0
package/src/python/uci_adapter.py +94 -0
package/src/python/vesper/__init__.py +1 -0
package/src/python/vesper/core/__init__.py +1 -0
package/src/python/vesper/core/asset_downloader.py +675 -0
package/src/python/vesper/core/download_recipe.py +104 -0
package/src/python/worldbank_adapter.py +99 -0
package/vesper-mcp-config.json +0 -6

package/build/cleaning/executor.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { QualityAnalyzer } from "../quality/analyzer.js";
+import { CleaningPlanner } from "./planner.js";
+import { DataCleaner } from "./cleaner.js";
+export class PipelineExecutor {
+    analyzer;
+    planner;
+    cleaner;
+    constructor(dataRoot = process.cwd(), buildDir) {
+        // Use buildDir for analyzer and cleaner (where Python scripts are), dataRoot for data operations
+        this.analyzer = new QualityAnalyzer(undefined, buildDir || dataRoot);
+        this.planner = new CleaningPlanner(undefined, buildDir || dataRoot);
+        this.cleaner = new DataCleaner(buildDir || dataRoot);
+    }
+    /**
+     * Run the full Auto-Cleaning Pipeline on a dataset file
+     */
+    async runPipeline(datasetId, filePath, outputFormat = "parquet", onProgress) {
+        // ... (logging setup)
+        const log = (m) => {
+            console.error(`[Pipeline] ${m}`);
+            if (onProgress)
+                onProgress(m);
+        };
+        log(`Analyzing quality for ${datasetId}...`);
+        const qualityReport = await this.analyzer.analyze(filePath);
+        // 2. Generate Plan
+        log(`Generating cleaning plan...`);
+        const plan = await this.planner.generatePlan(datasetId, qualityReport);
+        const rules = plan.operations;
+        // If no cleaning needed, we still might need format conversion
+        const needsConversion = !filePath.toLowerCase().endsWith(`.${outputFormat}`);
+        if (rules.length === 0 && !needsConversion) {
+            log(`No cleaning or conversion needed.`);
+            return {
+                initial_quality: qualityReport,
+                plan,
+                cleaning_result: {
+                    success: true,
+                    rows_affected: 0,
+                    columns_affected: 0,
+                    logs: ["No operations generated."]
+                }
+            };
+        }
+        // 3. Execute Plan (includes conversion if requested)
+        log(`Executing ${rules.length} operations (Format: ${outputFormat})...`);
+        rules.forEach(op => console.error(`  - ${op.type}: ${op.reason}`));
+        const cleaningResult = await this.cleaner.clean(filePath, rules, outputFormat);
+        if (cleaningResult.success) {
+            log(`Cleaning complete. Output: ${cleaningResult.output_path}`);
+        }
+        else {
+            log(`Cleaning failed: ${cleaningResult.error}`);
+        }
+        return {
+            initial_quality: qualityReport,
+            plan,
+            cleaning_result: cleaningResult,
+            final_output_path: cleaningResult.output_path
+        };
+    }
+}

package/build/cleaning/exporter.js ADDED Viewed

@@ -0,0 +1,87 @@
+export class ScriptGenerator {
+    /**
+     * Generate a standalone Python script for the cleaning plan
+     */
+    generatePythonScript(plan, inputPath) {
+        const timestamp = new Date().toISOString().split('T')[0];
+        let script = `"""
+Vesper Auto-Cleaning Script
+Generated: ${timestamp}
+Dataset ID: ${plan.dataset_id}
+"""
+import polars as pl
+import numpy as np
+def clean_dataset(file_path):
+    print(f"Loading {file_path}...")
+    # Load Data
+    if file_path.endswith(".csv"):
+        df = pl.read_csv(file_path, ignore_errors=True)
+    elif file_path.endswith(".parquet"):
+        df = pl.read_parquet(file_path)
+    else:
+        raise ValueError("Unsupported format")
+    print(f"Initial shape: {df.shape}")
+`;
+        // Generate code for each operation
+        plan.operations.forEach((op, index) => {
+            script += `    # Step ${index + 1}: ${op.type}\n`;
+            script += `    # Reason: ${op.reason}\n`;
+            script += this.generateOpCode(op);
+            script += `    print(f"After Step ${index + 1} (${op.type}): {df.shape}")\n\n`;
+        });
+        script += `    # Save Output
+    output_path = file_path.replace(".csv", "_cleaned_repro.csv").replace(".parquet", "_cleaned_repro.parquet")
+    if file_path.endswith(".csv"):
+        df.write_csv(output_path)
+    else:
+        df.write_parquet(output_path)
+    print(f"Done! Saved to {output_path}")
+if __name__ == "__main__":
+    # Default input path from generation time, can be overridden
+    INPUT_PATH = r"${inputPath}"
+    clean_dataset(INPUT_PATH)
+`;
+        return script;
+    }
+    generateOpCode(op) {
+        const p = op.params;
+        switch (op.type) {
+            case "RemoveDuplicates":
+                return `    df = df.unique()\n`;
+            case "DropColumns":
+                return `    cols_to_drop = ${JSON.stringify(p.columns)}\n    existing_cols = [c for c in cols_to_drop if c in df.columns]\n    if existing_cols:\n        df = df.drop(existing_cols)\n`;
+            case "FillMissing":
+                if (p.method === "constant") {
+                    const val = typeof p.value === 'string' ? `"${p.value}"` : p.value;
+                    return `    df = df.with_columns(pl.col("${p.column}").fill_null(${val}))\n`;
+                }
+                else if (p.method === "mean") {
+                    return `    mean_val = df["${p.column}"].mean()\n    df = df.with_columns(pl.col("${p.column}").fill_null(mean_val))\n`;
+                }
+                else if (p.method === "median") {
+                    return `    median_val = df["${p.column}"].median()\n    df = df.with_columns(pl.col("${p.column}").fill_null(median_val))\n`;
+                }
+                return `    # Unknown fill method for ${p.column}\n`;
+            case "FixTypes":
+                if (p.type === "float")
+                    return `    df = df.with_columns(pl.col("${p.column}").cast(pl.Float64, strict=False))\n`;
+                if (p.type === "int")
+                    return `    df = df.with_columns(pl.col("${p.column}").cast(pl.Int64, strict=False))\n`;
+                if (p.type === "string")
+                    return `    df = df.with_columns(pl.col("${p.column}").cast(pl.Utf8))\n`;
+                return `    # Unknown type conversion for ${p.column}\n`;
+            case "RemoveOutliers":
+                // IQR implementation inline
+                return `    q1 = df["${p.column}"].quantile(0.25)\n    q3 = df["${p.column}"].quantile(0.75)\n    iqr = q3 - q1\n    lower = q1 - (1.5 * iqr)\n    upper = q3 + (1.5 * iqr)\n    df = df.filter((pl.col("${p.column}") >= lower) & (pl.col("${p.column}") <= upper))\n`;
+            default:
+                return `    # Operation ${op.type} not fully supported in export yet\n`;
+        }
+    }
+}

package/build/cleaning/planner.js ADDED Viewed

@@ -0,0 +1,127 @@
+import { TargetDetector } from "../preparation/target-detector.js";
+export class CleaningPlanner {
+    cache;
+    targetDetector;
+    constructor(cache, buildDir = process.cwd()) {
+        this.cache = cache;
+        this.targetDetector = new TargetDetector(buildDir);
+    }
+    /**
+     * Generate a cleaning plan based on the quality report and optional custom rules
+     */
+    async generatePlan(datasetId, report, ruleSet, targetInfo) {
+        if (this.cache) {
+            const cached = await this.cache.getPlan(datasetId, { report, ruleSet });
+            if (cached) {
+                console.error(`[CleaningPlanner] Cache hit for ${datasetId}`);
+                return cached;
+            }
+        }
+        const ops = [];
+        let estimatedRowsSaved = 0;
+        let estimatedColsSaved = 0;
+        // 1. Remove Duplicates (Global)
+        if (report.duplicate_rows > 0) {
+            ops.push({
+                type: "RemoveDuplicates",
+                params: {},
+                reason: `Found ${report.duplicate_rows} exact duplicate rows`
+            });
+            estimatedRowsSaved += report.duplicate_rows;
+        }
+        // 2. Column-level operations
+        for (const col of report.columns) {
+            // A. Drop Empty / Useless Columns
+            if (col.missing_percentage > 90 || col.is_constant) {
+                ops.push({
+                    type: "DropColumns",
+                    params: { columns: [col.name] },
+                    reason: col.is_constant ? "Column is constant (zero variance)" : `High missing values (${col.missing_percentage.toFixed(1)}%)`
+                });
+                estimatedColsSaved++;
+                continue;
+            }
+            // B. Fix Types
+            if (this.shouldFixType(col)) {
+                const targetType = col.inferred_type.toLowerCase().includes("numeric") ? "float" : "string";
+                ops.push({
+                    type: "FixTypes",
+                    params: { column: col.name, type: targetType },
+                    reason: `Inferred type is ${col.inferred_type} but stored as ${col.type}`
+                });
+            }
+            // C. Impute Missing Values
+            if (col.missing_count > 0) {
+                let method = "constant";
+                let value = "unknown";
+                if (col.inferred_type.includes("Numeric") || col.type.includes("Int") || col.type.includes("Float")) {
+                    method = "median";
+                    value = 0;
+                }
+                else {
+                    method = "constant";
+                    value = "missing";
+                }
+                ops.push({
+                    type: "FillMissing",
+                    params: { column: col.name, method, value },
+                    reason: `${col.missing_count} missing values`
+                });
+            }
+        }
+        // 3. Target Unification
+        if (targetInfo && targetInfo.target !== "target" && targetInfo.confidence > 0.7) {
+            ops.push({
+                type: "RenameTarget",
+                params: { old_name: targetInfo.target, new_name: "target" },
+                reason: `Detected target '${targetInfo.target}' with high confidence (${targetInfo.confidence.toFixed(2)})`
+            });
+            estimatedColsSaved++; // Logic fix: effectively "saving" a column by standardizing it
+        }
+        // 4. Apply Custom Rules
+        if (ruleSet) {
+            for (const rule of ruleSet.rules) {
+                const targets = rule.condition.column === "*"
+                    ? report.columns.map(c => c.name)
+                    : [rule.condition.column];
+                for (const targetCol of targets) {
+                    const colStats = report.columns.find(c => c.name === targetCol);
+                    if (!colStats)
+                        continue;
+                    ops.push({
+                        type: rule.action.type,
+                        params: { ...rule.action.params, column: targetCol },
+                        reason: `Custom Rule: ${rule.name} - ${rule.description}`
+                    });
+                }
+            }
+        }
+        const plan = {
+            dataset_id: datasetId,
+            operations: ops,
+            estimated_impact: {
+                rows_saved: estimatedRowsSaved,
+                columns_saved: estimatedColsSaved,
+                quality_score_improvement: 10 + (ops.length * 5)
+            }
+        };
+        if (this.cache) {
+            await this.cache.savePlan(datasetId, { report, ruleSet }, plan);
+        }
+        return plan;
+    }
+    async generateRules(datasetId, report, ruleSet, targetInfo) {
+        const plan = await this.generatePlan(datasetId, report, ruleSet, targetInfo);
+        return plan.operations;
+    }
+    shouldFixType(col) {
+        if (col.inferred_type && col.inferred_type.includes("Numeric") && (col.type.includes("String") || col.type.includes("Utf8"))) {
+            return true;
+        }
+        return false;
+    }
+    isNumeric(col) {
+        const t = col.type.toLowerCase();
+        return t.includes("int") || t.includes("float") || t.includes("numeric");
+    }
+}

package/build/cleaning/rules.js ADDED Viewed

@@ -0,0 +1,57 @@
+// --- Domain Presets ---
+export const NLP_PRESET = {
+    id: "preset-nlp",
+    name: "NLP Data Prep",
+    domain: "nlp",
+    rules: [
+        {
+            id: "nlp-1",
+            name: "Normalize Case",
+            description: "Convert all text to lowercase",
+            condition: { column: "*", operator: "is_null", value: false }, // Apply to all non-null if type is string (logic in evaluator)
+            action: { type: "NormalizeText", params: { case: "lower" } }
+        },
+        {
+            id: "nlp-2",
+            name: "Remove URLs",
+            description: "Strip http/https links",
+            condition: { column: "*", operator: "contains", value: "http" },
+            action: { type: "Replace", params: { pattern: "https?://\\S+", replacement: "" } }
+        }
+    ]
+};
+export const HEALTHCARE_PRESET = {
+    id: "preset-healthcare",
+    name: "Healthcare (HIPAA) Prep",
+    domain: "healthcare",
+    rules: [
+        {
+            id: "hc-1",
+            name: "Mask Emails",
+            description: "Identify and mask email addresses",
+            condition: { column: "*", operator: "contains", value: "@" },
+            action: { type: "CustomMask", params: { method: "hash", salt: "vesper-pii" } }
+        },
+        {
+            id: "hc-2",
+            name: "Normalize Dates",
+            description: "Ensure ISO-8601 for DOB/Admit dates",
+            condition: { column: "date", operator: "is_null", value: false },
+            action: { type: "FixTypes", params: { type: "date" } }
+        }
+    ]
+};
+export const FINANCE_PRESET = {
+    id: "preset-finance",
+    name: "Financial Data Prep",
+    domain: "finance",
+    rules: [
+        {
+            id: "fin-1",
+            name: "Currency Cleanup",
+            description: "Remove currency symbols and parse as float",
+            condition: { column: "amount", operator: "matches_regex", value: "[\\$\\€\\£]" },
+            action: { type: "FixTypes", params: { type: "float", strip: "[^0-9\\.]" } }
+        }
+    ]
+};

package/build/cleaning/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/build/cloud/adapters/local.js ADDED Viewed

@@ -0,0 +1,37 @@
+import fs from "fs";
+import path from "path";
+/**
+ * LocalAdapter mimics cloud storage by copying files to a local vault directory.
+ */
+export class LocalAdapter {
+    vaultPath;
+    constructor(vaultPath) {
+        this.vaultPath = path.resolve(vaultPath);
+        if (!fs.existsSync(this.vaultPath)) {
+            fs.mkdirSync(this.vaultPath, { recursive: true });
+        }
+    }
+    async upload(localPath, remotePath) {
+        if (!fs.existsSync(localPath)) {
+            throw new Error(`Local file not found: ${localPath}`);
+        }
+        const destPath = path.join(this.vaultPath, remotePath);
+        const destDir = path.dirname(destPath);
+        if (!fs.existsSync(destDir)) {
+            fs.mkdirSync(destDir, { recursive: true });
+        }
+        fs.copyFileSync(localPath, destPath);
+        // Returns a file URI as the "url"
+        return `file://${destPath}`;
+    }
+    async delete(remotePath) {
+        const destPath = path.join(this.vaultPath, remotePath);
+        if (fs.existsSync(destPath)) {
+            fs.unlinkSync(destPath);
+        }
+    }
+    async getSignedUrl(remotePath, expiresValue) {
+        // For local, just return the file URI
+        return `file://${path.join(this.vaultPath, remotePath)}`;
+    }
+}

package/build/cloud/adapters/s3.js ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * S3Adapter Stub.
+ * Note: Requires @aws-sdk/client-s3 to be installed for full functionality.
+ */
+export class S3Adapter {
+    bucket;
+    region;
+    credentials;
+    constructor(bucket, region, credentials) {
+        this.bucket = bucket;
+        this.region = region;
+        this.credentials = credentials;
+    }
+    async upload(localPath, remotePath) {
+        console.warn("S3Adapter: Full implementation requires @aws-sdk/client-s3. This is a stub.");
+        return `https://${this.bucket}.s3.${this.region}.amazonaws.com/${remotePath}`;
+    }
+    async delete(remotePath) {
+        console.warn("S3Adapter: Delete stub called.");
+    }
+    async getSignedUrl(remotePath) {
+        return `https://${this.bucket}.s3.${this.region}.amazonaws.com/${remotePath}?stub=true`;
+    }
+}

package/build/cloud/adapters/supabase.js ADDED Viewed

@@ -0,0 +1,49 @@
+import fs from "fs";
+import { readFile } from "fs/promises";
+import { createClient } from "@supabase/supabase-js";
+export class SupabaseAdapter {
+    bucket;
+    client;
+    constructor(bucket, supabaseUrl, supabaseServiceRoleKey) {
+        this.bucket = bucket;
+        const resolvedUrl = supabaseUrl || process.env.SUPABASE_URL;
+        const resolvedServiceRoleKey = supabaseServiceRoleKey || process.env.SUPABASE_SERVICE_ROLE_KEY;
+        if (!resolvedUrl || !resolvedServiceRoleKey) {
+            throw new Error("Supabase requires SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY");
+        }
+        this.client = createClient(resolvedUrl, resolvedServiceRoleKey, {
+            auth: {
+                persistSession: false,
+            },
+        });
+    }
+    async upload(localPath, remotePath) {
+        if (!fs.existsSync(localPath)) {
+            throw new Error(`Local file not found: ${localPath}`);
+        }
+        const fileBuffer = await readFile(localPath);
+        const { error } = await this.client.storage
+            .from(this.bucket)
+            .upload(remotePath, fileBuffer, { upsert: true });
+        if (error) {
+            throw new Error(`Supabase upload failed: ${error.message}`);
+        }
+        const { data } = this.client.storage.from(this.bucket).getPublicUrl(remotePath);
+        return data.publicUrl;
+    }
+    async delete(remotePath) {
+        const { error } = await this.client.storage.from(this.bucket).remove([remotePath]);
+        if (error) {
+            throw new Error(`Supabase delete failed: ${error.message}`);
+        }
+    }
+    async getSignedUrl(remotePath, expiresValue = 3600) {
+        const { data, error } = await this.client.storage
+            .from(this.bucket)
+            .createSignedUrl(remotePath, expiresValue);
+        if (error || !data?.signedUrl) {
+            throw new Error(`Supabase signed URL failed: ${error?.message || "No signed URL returned"}`);
+        }
+        return data.signedUrl;
+    }
+}

package/build/cloud/storage-manager.js ADDED Viewed

@@ -0,0 +1,26 @@
+import { LocalAdapter } from "./adapters/local.js";
+import { S3Adapter } from "./adapters/s3.js";
+import { SupabaseAdapter } from "./adapters/supabase.js";
+export class StorageManager {
+    /**
+     * Creates an adapter based on configuration
+     */
+    static createAdapter(config) {
+        switch (config.type) {
+            case "local":
+                return new LocalAdapter(config.options.basePath || "./storage_vault");
+            case "s3":
+                if (!config.options.bucket || !config.options.region) {
+                    throw new Error("S3 requires bucket and region");
+                }
+                return new S3Adapter(config.options.bucket, config.options.region, config.options.credentials);
+            case "supabase":
+                if (!config.options.bucket) {
+                    throw new Error("Supabase requires bucket");
+                }
+                return new SupabaseAdapter(config.options.bucket, config.options.supabaseUrl, config.options.supabaseServiceRoleKey);
+            default:
+                throw new Error(`Unsupported storage type: ${config.type}`);
+        }
+    }
+}

package/build/cloud/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/build/compliance/service.js ADDED Viewed

@@ -0,0 +1,73 @@
+import { v4 as uuidv4 } from "uuid";
+export class ComplianceService {
+    store;
+    constructor(store) {
+        this.store = store;
+    }
+    async verifyGDPR(dataset) {
+        const issues = [];
+        // 1. Check for PII metadata flag
+        if (dataset.has_personal_data) {
+            issues.push("Dataset explicitly flagged as containing personal data.");
+        }
+        // 2. Check quality warnings for PII
+        if (dataset.quality_warnings) {
+            const piiWarnings = dataset.quality_warnings.filter(w => w.toLowerCase().includes("pii") || w.toLowerCase().includes("personal"));
+            if (piiWarnings.length > 0) {
+                issues.push(...piiWarnings);
+            }
+        }
+        // 3. Check for consent record
+        const consent = this.store.getConsent(dataset.id);
+        if (!consent || !consent.consent_obtained) {
+            issues.push("No valid consent record found for this dataset.");
+        }
+        const result = {
+            dataset_id: dataset.id,
+            standard: "GDPR",
+            passed: issues.length === 0,
+            issues,
+            timestamp: new Date().toISOString()
+        };
+        this.store.saveCheck(result);
+        return result;
+    }
+    async verifyHIPAA(dataset) {
+        const issues = [];
+        // 1. De-identification check (simulated)
+        // If domain is medical but no de-identification flag exists
+        if (dataset.domain === "healthcare" || dataset.domain === "medical") {
+            if (dataset.has_personal_data) {
+                issues.push("Medical dataset contains personal data (not de-identified).");
+            }
+        }
+        const result = {
+            dataset_id: dataset.id,
+            standard: "HIPAA",
+            passed: issues.length === 0,
+            issues,
+            timestamp: new Date().toISOString()
+        };
+        this.store.saveCheck(result);
+        return result;
+    }
+    logOperation(userId, datasetId, operation, metadata = {}) {
+        const event = {
+            id: uuidv4(),
+            user_id: userId,
+            dataset_id: datasetId,
+            operation,
+            timestamp: new Date().toISOString(),
+            details: JSON.stringify(metadata)
+        };
+        this.store.saveAudit(event);
+    }
+    exportAuditLog(datasetId) {
+        const logs = this.store.getAuditLogs(datasetId);
+        if (logs.length === 0)
+            return "No audit logs found.";
+        const header = "ID,Timestamp,User,Operation,Dataset,Details\n";
+        const rows = logs.map(l => `${l.id},${l.timestamp},${l.user_id},${l.operation},${l.dataset_id},"${l.details.replace(/"/g, '""')}"`).join("\n");
+        return header + rows;
+    }
+}

package/build/compliance/store.js ADDED Viewed

@@ -0,0 +1,80 @@
+export class ComplianceStore {
+    db;
+    constructor(db) {
+        this.db = db;
+        this.init();
+    }
+    init() {
+        this.db.exec(`
+            CREATE TABLE IF NOT EXISTS audit_logs (
+                id TEXT PRIMARY KEY,
+                user_id TEXT,
+                dataset_id TEXT,
+                operation TEXT,
+                timestamp TEXT,
+                details TEXT
+            );
+            CREATE TABLE IF NOT EXISTS compliance_checks (
+                dataset_id TEXT,
+                standard TEXT,
+                passed BOOLEAN,
+                issues TEXT, -- JSON array
+                timestamp TEXT,
+                PRIMARY KEY (dataset_id, standard)
+            );
+            CREATE TABLE IF NOT EXISTS consent_records (
+                dataset_id TEXT PRIMARY KEY,
+                consent_obtained BOOLEAN,
+                source TEXT,
+                last_verified TEXT
+            );
+        `);
+    }
+    saveAudit(event) {
+        const stmt = this.db.prepare(`
+            INSERT INTO audit_logs (id, user_id, dataset_id, operation, timestamp, details)
+            VALUES (?, ?, ?, ?, ?, ?)
+        `);
+        stmt.run(event.id, event.user_id, event.dataset_id, event.operation, event.timestamp, event.details);
+    }
+    getAuditLogs(datasetId) {
+        let query = "SELECT * FROM audit_logs";
+        const params = [];
+        if (datasetId) {
+            query += " WHERE dataset_id = ?";
+            params.push(datasetId);
+        }
+        query += " ORDER BY timestamp DESC";
+        return this.db.prepare(query).all(...params);
+    }
+    saveCheck(result) {
+        const stmt = this.db.prepare(`
+            INSERT INTO compliance_checks (dataset_id, standard, passed, issues, timestamp)
+            VALUES (?, ?, ?, ?, ?)
+            ON CONFLICT(dataset_id, standard) DO UPDATE SET
+                passed=excluded.passed,
+                issues=excluded.issues,
+                timestamp=excluded.timestamp
+        `);
+        stmt.run(result.dataset_id, result.standard, result.passed ? 1 : 0, JSON.stringify(result.issues), result.timestamp);
+    }
+    saveConsent(record) {
+        const stmt = this.db.prepare(`
+            INSERT INTO consent_records (dataset_id, consent_obtained, source, last_verified)
+            VALUES (?, ?, ?, ?)
+            ON CONFLICT(dataset_id) DO UPDATE SET
+                consent_obtained=excluded.consent_obtained,
+                source=excluded.source,
+                last_verified=excluded.last_verified
+        `);
+        stmt.run(record.dataset_id, record.consent_obtained ? 1 : 0, record.source, record.last_verified);
+    }
+    getConsent(datasetId) {
+        const row = this.db.prepare("SELECT * FROM consent_records WHERE dataset_id = ?").get(datasetId);
+        if (!row)
+            return null;
+        return { ...row, consent_obtained: Boolean(row.consent_obtained) };
+    }
+}

package/build/compliance/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};