PyPI - nuvu-scan - Versions diffs - 1.3.7__tar.gz → 2.0.0__tar.gz - Mend

nuvu-scan 1.3.7tar.gz → 2.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{nuvu_scan-1.3.7 → nuvu_scan-2.0.0}/.github/workflows/ci.yml RENAMED Viewed

@@ -7,43 +7,58 @@ on:
     branches: [main, develop]
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Set up Python
+        run: uv python install 3.12
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Run pre-commit
+        run: uv run pre-commit run --all-files
   test:
     runs-on: ubuntu-latest
+    needs: lint
     strategy:
       matrix:
         python-version: ["3.10", "3.11", "3.12", "3.13"]
     steps:
       - uses: actions/checkout@v4
       - name: Install uv
         uses: astral-sh/setup-uv@v4
         with:
           version: "latest"
       - name: Set up Python ${{ matrix.python-version }}
         run: uv python install ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
           uv sync --dev
-      - name: Run linter
-        run: |
-          uv run ruff check .
-          uv run black --check .
       - name: Run type checker
         run: |
           uv run mypy nuvu_scan || true  # Allow failures for now
       - name: Run tests
         run: |
           uv run pytest --cov=nuvu_scan --cov-report=xml
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
       - name: Upload coverage
         uses: codecov/codecov-action@v3
         with:
@@ -54,22 +69,22 @@ jobs:
     runs-on: ubuntu-latest
     needs: test
     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
     steps:
       - uses: actions/checkout@v4
       - name: Install uv
         uses: astral-sh/setup-uv@v4
         with:
           version: "latest"
       - name: Set up Python
         run: uv python install 3.11
       - name: Build package
         run: |
           uv build
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:

{nuvu_scan-1.3.7 → nuvu_scan-2.0.0}/.github/workflows/release.yml RENAMED Viewed

@@ -18,21 +18,21 @@ jobs:
     permissions:
       contents: write  # Required to create tags and releases
       id-token: write  # Required for trusted publishing
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
         with:
           fetch-depth: 0  # Fetch full history for version comparison
       - name: Install uv
         uses: astral-sh/setup-uv@v4
         with:
           version: "latest"
       - name: Set up Python
         run: uv python install 3.12
       - name: Extract version from pyproject.toml
         id: get_version
         run: |
@@ -41,18 +41,18 @@ jobs:
           VERSION=$(grep -E '^\s*version\s*=' pyproject.toml | head -1 | sed -E "s/.*version\s*=\s*['\"]([^'\"]+)['\"].*/\1/" | tr -d ' ')
           echo "version=$VERSION" >> $GITHUB_OUTPUT
           echo "Current version: $VERSION"
       - name: Check if version was bumped
         id: check_version
         if: github.event_name == 'push'
         run: |
           CURRENT_VERSION="${{ steps.get_version.outputs.version }}"
           # Get the previous commit's version
           git checkout HEAD~1 pyproject.toml 2>/dev/null || echo "No previous commit found"
           PREVIOUS_VERSION=$(grep -E '^\s*version\s*=' pyproject.toml 2>/dev/null | head -1 | sed -E "s/.*version\s*=\s*['\"]([^'\"]+)['\"].*/\1/" | tr -d ' ' || echo "")
           git checkout HEAD pyproject.toml
           if [ -z "$PREVIOUS_VERSION" ]; then
             echo "No previous version found, assuming first release"
             echo "should_release=true" >> $GITHUB_OUTPUT
@@ -63,7 +63,7 @@ jobs:
             echo "Version unchanged ($CURRENT_VERSION), skipping release"
             echo "should_release=false" >> $GITHUB_OUTPUT
           fi
       - name: Check if tag already exists
         id: check_tag
         if: |
@@ -72,7 +72,7 @@ jobs:
         run: |
           VERSION="${{ steps.get_version.outputs.version }}"
           TAG="v${VERSION}"
           if git rev-parse "$TAG" >/dev/null 2>&1; then
             echo "Tag $TAG already exists, skipping release"
             echo "tag_exists=true" >> $GITHUB_OUTPUT
@@ -80,7 +80,7 @@ jobs:
             echo "Tag $TAG does not exist, will create release"
             echo "tag_exists=false" >> $GITHUB_OUTPUT
           fi
       - name: Create Git Tag
         if: |
           ((github.event_name == 'push' && steps.check_version.outputs.should_release == 'true' && steps.check_tag.outputs.tag_exists == 'false') ||
@@ -88,14 +88,14 @@ jobs:
         run: |
           VERSION="${{ steps.get_version.outputs.version }}"
           TAG="v${VERSION}"
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
           git tag -a "$TAG" -m "Release $TAG"
           git push origin "$TAG"
           echo "Created and pushed tag: $TAG"
       - name: Create GitHub Release
         if: |
           ((github.event_name == 'push' && steps.check_version.outputs.should_release == 'true' && steps.check_tag.outputs.tag_exists == 'false') ||
@@ -106,12 +106,12 @@ jobs:
           name: Release v${{ steps.get_version.outputs.version }}
           body: |
             ## Release v${{ steps.get_version.outputs.version }}
             Automated release created from merged PR.
             ### Changes
             See the merged PR for detailed changelog.
             ### Installation
             ```bash
             pip install --upgrade nuvu-scan==${{ steps.get_version.outputs.version }}
@@ -120,14 +120,14 @@ jobs:
           prerelease: false
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       - name: Build package
         if: |
           ((github.event_name == 'push' && steps.check_version.outputs.should_release == 'true' && steps.check_tag.outputs.tag_exists == 'false') ||
            (github.event_name == 'workflow_dispatch' && steps.check_tag.outputs.tag_exists == 'false'))
         run: |
           uv build
       - name: Publish to PyPI
         if: |
           ((github.event_name == 'push' && steps.check_version.outputs.should_release == 'true' && steps.check_tag.outputs.tag_exists == 'false') ||
@@ -136,7 +136,7 @@ jobs:
         with:
           packages-dir: dist/
           print-hash: true
       - name: Summary
         if: always()
         run: |

nuvu_scan-2.0.0/.pre-commit-config.yaml ADDED Viewed

@@ -0,0 +1,42 @@
+# Pre-commit hooks for nuvu-scan
+# Install: uv run pre-commit install
+# Run manually: uv run pre-commit run --all-files
+repos:
+  # Ruff - Fast Python linter and formatter
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.6
+    hooks:
+      # Run the linter with auto-fix
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+      # Run the formatter
+      - id: ruff-format
+  # Pre-commit hooks for general file hygiene
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-json
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: check-merge-conflict
+      - id: detect-private-key
+  # Check for common security issues
+  - repo: https://github.com/PyCQA/bandit
+    rev: 1.8.2
+    hooks:
+      - id: bandit
+        args: ["-c", "pyproject.toml"]
+        additional_dependencies: ["bandit[toml]"]
+# Configuration
+ci:
+  autofix_commit_msg: |
+    [pre-commit.ci] auto fixes from pre-commit hooks
+  autoupdate_commit_msg: |
+    [pre-commit.ci] pre-commit autoupdate

nuvu_scan-2.0.0/DEVELOPMENT_STATUS.md ADDED Viewed

@@ -0,0 +1,359 @@
+# Nuvu Scan - Development Status
+**Multi-Cloud Data Asset Control** - Designed from the ground up to support AWS, GCP, Azure, and Databricks.
+## ✅ Completed (v2.0.0)
+### Core Architecture
+- ✅ Cloud-agnostic base interface (`CloudProviderScan`)
+- ✅ Normalized asset categories enum (now includes `DATA_PIPELINE`, `DATA_SHARING`)
+- ✅ Cloud-agnostic data models (`Asset`, `ScanResult`, `ScanConfig`)
+- ✅ Provider module structure for future multi-cloud support
+- ✅ Modern Python packaging with `uv` and `pyproject.toml`
+- ✅ Python 3.10+ support (removed EOL versions 3.8, 3.9)
+### AWS Provider Implementation
+#### S3 Bucket Collector
+- ✅ Lists all buckets across all regions
+- ✅ Gets bucket metadata (size, storage class, tags)
+- ✅ Detects public access and policy status
+- ✅ Estimates costs (storage + requests)
+- ✅ Flags risks (empty buckets, PII naming, public access)
+- ✅ Infers ownership from tags
+- ✅ Last activity tracking via CloudTrail
+#### Glue Data Catalog Collector (Enhanced in v2.0.0)
+- ✅ **Databases & Tables**
+  - Lists databases and tables
+  - Detects empty tables and databases
+  - Links databases to their crawlers for activity tracking
+  - Table update time tracking
+  - External table (Spectrum) detection
+- ✅ **Glue Crawlers** (NEW)
+  - Lists all crawlers with status (READY, RUNNING)
+  - Schedule expression and state (SCHEDULED, unscheduled)
+  - Last crawl time and status
+  - Tables created/updated/deleted counts
+  - Risk flags: `stale_crawler` (>90 days), `no_schedule`, `never_run`
+- ✅ **Glue ETL Jobs** (NEW)
+  - Lists all ETL jobs
+  - Job type, Glue version, allocated capacity
+  - Last run status and time
+  - Cost estimation based on DPU hours
+  - Risk flags: `stale_job`, `never_run`, `failed_job`
+- ✅ **Glue Connections** (NEW)
+  - Lists JDBC connections
+  - Connection type and masked JDBC URLs
+  - Risk flags: `external_connection` (non-AWS databases)
+#### Athena Workgroup Collector
+- ✅ Lists workgroups
+- ✅ Analyzes query history (last 90 days)
+- ✅ Detects idle workgroups
+- ✅ Flags high failure rates
+- ✅ Last activity tracking from query stats
+#### Redshift Collector (Major Enhancement in v2.0.0)
+- ✅ **Provisioned Clusters** (Enhanced)
+  - Lists all clusters with detailed metrics
+  - Node type, count, encryption status
+  - CloudWatch-based activity tracking (DatabaseConnections, CPUUtilization)
+  - Cluster age calculation
+  - VPC and public accessibility detection
+  - **Reservation coverage analysis** - checks if covered by reserved nodes
+  - **WLM configuration analysis** - queue count, auto WLM, unlimited queues
+  - Potential reservation savings calculation (40% estimate)
+  - Risk flags: `publicly_accessible`, `unencrypted`, `low_activity`, `potentially_unused`, `no_reservation_long_running`, `default_wlm_only`, `unlimited_wlm_queue`
+- ✅ **Redshift Serverless**
+  - Namespaces with encryption status
+  - Workgroups with base capacity and cost estimation
+  - Risk flags: `publicly_accessible`
+- ✅ **Redshift Datashares** (NEW)
+  - Lists all datashares (inbound and outbound)
+  - Consumer account identification
+  - Cross-account and cross-region detection
+  - Public consumer allowance check
+  - Risk flags: `cross_account_sharing`, `cross_region_sharing`, `allows_public_consumers`
+- ✅ **Redshift Snapshots** (NEW)
+  - Lists all snapshots (manual and automated)
+  - Snapshot size and storage cost estimation
+  - Snapshot age tracking
+  - Orphan snapshot detection (source cluster deleted)
+  - Risk flags: `old_snapshot` (>90 days), `very_old_snapshot` (>365 days), `large_snapshot` (>1TB), `orphan_snapshot`
+- ✅ **Redshift Reserved Nodes** (NEW)
+  - Lists all reserved nodes (active and retired)
+  - Node type, count, offering type
+  - Remaining duration calculation
+  - Expiration tracking
+  - Annual and monthly cost calculation
+  - Risk flags: `reservation_expired`, `reservation_expiring_soon`, `reservation_retired`
+#### IAM Roles Collector
+- ✅ Lists IAM roles with data-access permissions
+- ✅ Detects unused roles (90+ days)
+- ✅ Flags overly permissive policies
+- ✅ Infers ownership from tags and role names
+- ✅ Last activity tracking from `RoleLastUsed`
+#### MWAA (Managed Workflows for Apache Airflow) Collector
+- ✅ Lists MWAA environments across regions
+- ✅ Collects environment details (status, version, worker counts)
+- ✅ Estimates costs based on environment class
+- ✅ Infers ownership from tags
+- ✅ Last activity tracking from `LastUpdate`
+#### Cost Explorer Integration
+- ✅ Retrieves actual costs from AWS Cost Explorer API
+- ✅ Service-level cost breakdown
+- ✅ Monthly cost estimates based on last 30 days
+- ✅ Cost summary asset in scan results
+### GCP Provider Implementation
+#### GCS (Google Cloud Storage) Collector
+- ✅ Lists all buckets
+- ✅ Gets bucket metadata (size, storage class, labels)
+- ✅ Detects public access
+- ✅ Estimates costs
+- ✅ Flags risks (empty buckets, public access)
+- ✅ Infers ownership from labels
+- ✅ Last activity tracking from bucket update time
+#### BigQuery Collector
+- ✅ Lists datasets and tables
+- ✅ Analyzes query job history (last 90 days)
+- ✅ Tracks query costs (including public datasets)
+- ✅ Creates dedicated asset for query costs
+- ✅ Estimates costs with 1 TB free tier consideration
+- ✅ Detailed usage metrics (TB processed, monthly estimates)
+- ✅ Last activity tracking from query stats
+#### Dataproc Collector
+- ✅ Lists Dataproc clusters
+- ✅ Collects cluster details and job history
+- ✅ Estimates costs
+- ✅ Last activity tracking from job stats
+#### Pub/Sub Collector
+- ✅ Lists topics and subscriptions
+- ✅ Collects topic metadata
+- ✅ Estimates costs
+- ✅ Last activity tracking
+#### IAM Service Accounts Collector
+- ✅ Lists service accounts
+- ✅ Checks for data-access roles
+- ✅ Flags overly permissive roles
+- ✅ Infers ownership from display names and email patterns
+- ✅ Last activity tracking from update time
+#### Gemini API Collector
+- ✅ Checks if Gemini API is enabled
+- ✅ Retrieves actual costs from BigQuery billing export
+- ✅ Fallback to Cloud Monitoring API for usage detection
+- ✅ Last activity tracking from billing data
+### CLI
+- ✅ Command-line interface with `nuvu scan --provider <aws|gcp>`
+- ✅ Support for multiple output formats:
+  - HTML (default) - Beautiful interactive report with governance insights
+  - JSON - Machine-readable format
+  - CSV - Spreadsheet-friendly format
+- ✅ Credential handling:
+  - AWS: env vars, CLI args, AWS profiles, IAM role assumption
+  - GCP: JSON key files, `GOOGLE_APPLICATION_CREDENTIALS`, JSON content
+- ✅ Region filtering support (AWS)
+- ✅ Project ID support (GCP)
+- ✅ **Nuvu Cloud API push** (`--push --api-key`)
+- ✅ **Collector Filtering** (NEW)
+  - `--collectors` / `-c` option to run specific collectors
+  - `--list-collectors` to show available collectors
+  - AWS collectors: `s3`, `glue`, `athena`, `redshift`, `iam`, `mwaa`
+  - GCP collectors: `gcs`, `bigquery`, `dataproc`, `pubsub`, `iam`, `gemini`
+  - Omit option for full scan (all collectors)
+- ✅ **Progress Logging** - Real-time status updates during collection
+### Enhanced HTML Reports (v2.0.0)
+- ✅ **Executive Summary** with key metrics
+- ✅ **Cost Optimization Section**
+  - Snapshot cost analysis with old snapshot flagging
+  - Reserved node status and expiration tracking
+  - Potential savings calculation
+- ✅ **Governance Insights Section**
+  - Stale/unused crawlers and ETL jobs
+  - Cross-account data sharing alerts
+  - WLM configuration review
+- ✅ Improved styling with insight boxes (warning, alert, info)
+- ✅ Potential savings card in summary
+### New Asset Categories (v2.0.0)
+- ✅ `DATA_PIPELINE` - ETL jobs, crawlers, workflows
+- ✅ `DATA_SHARING` - Datashares, cross-account sharing
+### New Asset Types (v2.0.0)
+| Asset Type | Service | Description |
+|------------|---------|-------------|
+| `glue_crawler` | Glue | Crawler status, schedule, last run |
+| `glue_job` | Glue | ETL job status, DPU allocation |
+| `glue_connection` | Glue | JDBC connections to external DBs |
+| `redshift_datashare` | Redshift | Cross-account data sharing |
+| `redshift_snapshot` | Redshift | Manual and automated snapshots |
+| `redshift_reserved_node` | Redshift | Reserved capacity purchases |
+| `redshift_serverless_workgroup` | Redshift | Serverless workgroup details |
+### New Risk Flags (v2.0.0)
+| Category | Flag | Description |
+|----------|------|-------------|
+| Glue | `stale_crawler` | Crawler hasn't run in 90+ days |
+| Glue | `no_schedule` | Crawler has no schedule configured |
+| Glue | `never_run` | Crawler or job has never been executed |
+| Glue | `stale_job` | ETL job hasn't run in 90+ days |
+| Glue | `failed_job` | Last job run failed |
+| Glue | `external_connection` | JDBC connection to non-AWS database |
+| Redshift | `cross_account_sharing` | Datashare shared to another AWS account |
+| Redshift | `cross_region_sharing` | Datashare shared across regions |
+| Redshift | `allows_public_consumers` | Datashare allows public consumers |
+| Redshift | `old_snapshot` | Snapshot older than 90 days |
+| Redshift | `very_old_snapshot` | Snapshot older than 365 days |
+| Redshift | `large_snapshot` | Snapshot larger than 1TB |
+| Redshift | `orphan_snapshot` | Source cluster no longer exists |
+| Redshift | `no_reservation_long_running` | Cluster running 90+ days without reservation |
+| Redshift | `reservation_expired` | Reserved node has expired |
+| Redshift | `reservation_expiring_soon` | Reserved node expires within 30 days |
+| Redshift | `default_wlm_only` | Cluster using only default WLM queue |
+| Redshift | `unlimited_wlm_queue` | WLM queue with no concurrency limit |
+### Cost Tracking & Reporting
+- ✅ Asset-level cost estimation for all resources
+- ✅ AWS Cost Explorer API integration for actual costs
+- ✅ GCP Cloud Billing API integration (Gemini costs)
+- ✅ BigQuery query cost tracking (including public datasets)
+- ✅ Redshift snapshot storage cost estimation
+- ✅ Potential reservation savings calculation
+- ✅ Cost summary assets showing service-level breakdowns
+### Usage & Activity Tracking
+- ✅ Last activity timestamp for all assets (`last_activity_at`)
+- ✅ Days since last use calculation
+- ✅ **CloudWatch metrics for Redshift** (DatabaseConnections, CPUUtilization)
+- ✅ CloudTrail integration for AWS (S3, Redshift)
+- ✅ Crawler run times for Glue database/table activity
+- ✅ Query history analysis (Athena, BigQuery)
+- ✅ Job history analysis (Dataproc, Glue ETL)
+### Package & Distribution
+- ✅ Modern Python packaging with `pyproject.toml` and `uv`
+- ✅ Comprehensive README.md with setup instructions
+- ✅ IAM policy file (`aws-iam-policy.json`) with 60+ read-only actions
+- ✅ GitHub Actions CI/CD workflows
+- ✅ Package structure ready for PyPI
+## 🧪 Tested
+### AWS (v2.0.0 Test Results)
+- ✅ Discovered 2,344 assets in single-region scan (us-west-2)
+  - 90 S3 buckets
+  - 1,013 Glue assets (94 databases, 904 tables, 10 crawlers, 2 jobs, 3 connections)
+  - 1 Athena workgroup
+  - 1,141 Redshift assets (5 clusters, 2 namespaces, 2 workgroups, 12 datashares, 1,096 snapshots, 24 reserved nodes)
+  - 95 IAM roles
+  - 3 MWAA environments
+- ✅ Snapshot cost totaling $88,684.92/month identified
+- ✅ Reserved node status correctly identified (active vs retired)
+- ✅ Cross-account datashares flagged correctly
+- ✅ WLM configuration analysis working
+- ✅ CloudWatch-based activity tracking working
+- ✅ HTML report with Cost Optimization and Governance sections
+### GCP
+- ✅ Discovered GCS buckets, BigQuery datasets, Dataproc clusters, Pub/Sub topics
+- ✅ IAM service accounts scanning
+- ✅ Gemini API cost tracking from billing export
+- ✅ BigQuery query cost tracking (including public datasets)
+## 🔒 IAM Permissions Required
+### AWS
+The complete IAM policy is available in `aws-iam-policy.json`. Key permission groups:
+| Permission Group | Actions | Purpose |
+|-----------------|---------|---------|
+| S3 | 9 actions | Bucket metadata, public access, encryption |
+| Glue Data Catalog | 6 actions | Databases, tables, partitions |
+| Glue Crawlers | 4 actions | Crawler status, metrics |
+| Glue ETL Jobs | 5 actions | Job status, run history |
+| Glue Connections | 2 actions | JDBC connections |
+| Athena | 4 actions | Workgroups, query history |
+| Redshift Clusters | 4 actions | Cluster metadata, logging |
+| Redshift Snapshots | 3 actions | Snapshot inventory |
+| Redshift Reserved Nodes | 3 actions | Reservation status |
+| Redshift WLM | 2 actions | Parameter groups |
+| Redshift Datashares | 3 actions | Cross-account sharing |
+| Redshift Serverless | 5 actions | Namespaces, workgroups |
+| IAM | 8 actions | Role policies, data access |
+| MWAA | 3 actions | Airflow environments |
+| CloudWatch | 3 actions | Metrics for activity tracking |
+| CloudTrail | 1 action | Last activity detection |
+| Cost Explorer | 5 actions | Actual cost reporting |
+| STS | 1 action | Account identity |
+**Total: 66 read-only actions** following the principle of least privilege.
+### GCP
+Required IAM roles for the service account:
+- `roles/storage.objectViewer` - Cloud Storage
+- `roles/bigquery.dataViewer` + `roles/bigquery.jobUser` - BigQuery
+- `roles/dataproc.viewer` - Dataproc
+- `roles/pubsub.subscriber` - Pub/Sub
+- `roles/iam.serviceAccountViewer` - IAM service accounts
+- `roles/serviceusage.serviceUsageViewer` - API status
+- `roles/billing.costsViewer` - Cost Explorer (optional)
+- `roles/monitoring.viewer` - Cloud Monitoring
+## 📋 TODO for Full v2
+### Additional AWS Collectors
+- [ ] OpenSearch collector
+- [ ] EMR collector
+- [ ] SageMaker collector
+- [ ] Bedrock collector
+- [ ] MSK (Kafka) collector
+- [ ] Kinesis collector
+- [ ] DataSync/Transfer Family collector
+- [ ] EBS Volumes & Snapshots collector
+- [ ] VPC Endpoints collector
+- [ ] Lake Formation collector
+- [ ] Step Functions collector
+- [ ] EventBridge collector
+### Redshift Deep Governance (Phase 2)
+- [ ] Schema-level inventory via Redshift Data API
+- [ ] Table-level inventory with column metadata
+- [ ] PII detection via column naming heuristics
+- [ ] Permission matrix visualization
+- [ ] Usage-based stale table detection (STL_SCAN)
+### Additional GCP Collectors
+- [ ] Cloud SQL collector
+- [ ] Cloud Spanner collector
+- [ ] Bigtable collector
+- [ ] Firestore collector
+- [ ] Vertex AI collector
+- [ ] Dataflow collector
+- [ ] Cloud Composer collector
+### Enhancements
+- [ ] Parallel collection for faster scans
+- [ ] Progress bars with ETA
+- [ ] PDF report export
+- [ ] Cost alerts and thresholds
+- [ ] Asset dependency mapping
+- [ ] Realized savings tracking (scan-over-scan comparison)
+## 🚀 Next Steps
+1. **Redshift Deep Governance** - Schema/table level inventory without data access
+2. **Azure Provider** - Blob Storage, Data Lake, Synapse, Databricks
+3. **Databricks Provider** - Workspace discovery, Unity Catalog
+4. **Enterprise Features** - RBAC, audit logging, compliance reporting

nuvu_scan-2.0.0/Makefile ADDED Viewed

@@ -0,0 +1,39 @@
+.PHONY: install lint format fix test build clean help
+help:
+	@echo "Available commands:"
+	@echo "  make install    - Install dependencies (including dev)"
+	@echo "  make lint       - Run linting checks"
+	@echo "  make format     - Check code formatting"
+	@echo "  make fix        - Auto-fix linting and formatting issues"
+	@echo "  make pre-commit - Run all pre-commit hooks"
+	@echo "  make test       - Run tests with coverage"
+	@echo "  make build      - Build package"
+	@echo "  make clean      - Clean build artifacts"
+install:
+	uv sync --dev
+	uv run pre-commit install
+lint:
+	uv run ruff check .
+format:
+	uv run ruff format --check .
+fix:
+	uv run ruff check --fix .
+	uv run ruff format .
+pre-commit:
+	uv run pre-commit run --all-files
+test:
+	uv run pytest --cov=nuvu_scan --cov-report=term-missing
+build:
+	uv build
+clean:
+	rm -rf dist/ build/ *.egg-info/ .coverage coverage.xml .pytest_cache/ .ruff_cache/
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true

nuvu-scan 1.3.7__tar.gz → 2.0.0__tar.gz

nuvu-scan 1.3.7tar.gz → 2.0.0tar.gz