nuvu-scan 2.0.2__tar.gz → 2.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. nuvu_scan-2.1.6/.cursorrules +103 -0
  2. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.gitignore +3 -0
  3. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.pre-commit-config.yaml +11 -0
  4. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/CONTRIBUTING.md +39 -12
  5. nuvu_scan-2.1.6/DEVELOPMENT_STATUS.md +216 -0
  6. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/PKG-INFO +45 -30
  7. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/README.md +44 -29
  8. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/commands/scan.py +10 -1
  9. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/html.py +141 -20
  10. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/base.py +44 -0
  11. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/aws_scanner.py +187 -42
  12. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/apigateway.py +197 -0
  13. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/athena.py +181 -0
  14. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/backup.py +252 -0
  15. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/cloudfront.py +132 -0
  16. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/cloudtrail.py +189 -0
  17. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/cloudwatch.py +163 -0
  18. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/cost_explorer.py +90 -0
  19. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/dynamodb.py +236 -0
  20. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/ec2.py +572 -0
  21. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/ecs.py +243 -0
  22. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/eks.py +246 -0
  23. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/elasticache.py +325 -0
  24. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/elb.py +198 -0
  25. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
  26. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/iam.py +593 -0
  27. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/kinesis.py +174 -0
  28. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/kms.py +186 -0
  29. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/lakeformation.py +303 -0
  30. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/lambda_collector.py +224 -0
  31. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/misc_services.py +320 -0
  32. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
  33. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/rds.py +405 -0
  34. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
  35. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/route53.py +183 -0
  36. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/secrets.py +178 -0
  37. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/security_services.py +329 -0
  38. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/sns_sqs.py +284 -0
  39. nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/vpc_costs.py +296 -0
  40. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/pyproject.toml +1 -1
  41. nuvu_scan-2.1.6/tests/test_cli.py +135 -0
  42. nuvu_scan-2.1.6/tests/test_formatters.py +172 -0
  43. nuvu_scan-2.1.6/tests/test_push_payload.py +232 -0
  44. nuvu_scan-2.1.6/tests/test_scanners.py +149 -0
  45. nuvu_scan-2.0.2/DEVELOPMENT_STATUS.md +0 -359
  46. nuvu_scan-2.0.2/nuvu_scan/core/providers/aws/collectors/athena.py +0 -146
  47. nuvu_scan-2.0.2/nuvu_scan/core/providers/aws/collectors/iam.py +0 -277
  48. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  49. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.github/workflows/ci.yml +0 -0
  50. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.github/workflows/release.yml +0 -0
  51. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/Makefile +0 -0
  52. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/RELEASE.md +0 -0
  53. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/__init__.py +0 -0
  54. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/__init__.py +0 -0
  55. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/commands/__init__.py +0 -0
  56. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/__init__.py +0 -0
  57. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/csv.py +0 -0
  58. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/json.py +0 -0
  59. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/main.py +0 -0
  60. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/__init__.py +0 -0
  61. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/analyzers/__init__.py +0 -0
  62. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/models/__init__.py +0 -0
  63. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/__init__.py +0 -0
  64. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/__init__.py +0 -0
  65. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/__init__.py +0 -0
  66. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/s3.py +0 -0
  67. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/__init__.py +0 -0
  68. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/__init__.py +0 -0
  69. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/bigquery.py +0 -0
  70. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/billing.py +0 -0
  71. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/dataproc.py +0 -0
  72. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/gcs.py +0 -0
  73. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/gemini.py +0 -0
  74. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/iam.py +0 -0
  75. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/pubsub.py +0 -0
  76. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/gcp_scanner.py +0 -0
  77. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/tests/__init__.py +0 -0
  78. {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/tests/test_base.py +0 -0
@@ -0,0 +1,103 @@
1
+ # Nuvu Scan - AI Agent Instructions
2
+
3
+ ## Project Overview
4
+ Nuvu Scan is an open-source CLI tool for cloud data governance. It scans AWS and GCP to discover, govern, and optimize cloud data assets.
5
+
6
+ ## Critical Rules for AI Agents
7
+
8
+ ### 1. ALWAYS Write Tests for New Features
9
+ Every new feature, CLI option, or code change MUST include corresponding tests:
10
+
11
+ - **CLI changes**: Add tests in `tests/test_cli.py`
12
+ - **Formatter changes**: Add tests in `tests/test_formatters.py`
13
+ - **Scanner/collector changes**: Add tests in `tests/test_scanners.py`
14
+ - **API/push changes**: Add tests in `tests/test_push_payload.py`
15
+ - **New collectors**: Create `tests/test_<collector_name>.py`
16
+
17
+ Tests run automatically on every commit via pre-commit hooks. Commits will be blocked if tests fail.
18
+
19
+ ### 2. Code Quality Standards
20
+ - Use `ruff` for linting and formatting (NOT black)
21
+ - Run `uv run ruff format .` before committing
22
+ - Run `uv run ruff check .` to check for issues
23
+ - Pre-commit hooks will auto-run: ruff, ruff-format, bandit, pytest
24
+
25
+ ### 3. CLI Option Changes
26
+ When adding/modifying CLI options:
27
+ 1. Add `@click.option()` decorator in `nuvu_scan/cli/commands/scan.py`
28
+ 2. Add corresponding function parameter
29
+ 3. Add test in `tests/test_cli.py` to verify option exists
30
+ 4. Update `README.md` with usage examples
31
+
32
+ ### 4. Push Payload Format (API Compatibility)
33
+ When modifying push functionality:
34
+ - Payload MUST match the schema expected by `/api/scans/import`
35
+ - Required fields: `provider`, `account_id`, `scan_timestamp`, `assets`, `total_cost_estimate_usd`
36
+ - Asset fields: `provider`, `asset_type`, `normalized_category`, `region`, `arn`, `name`
37
+ - Add tests in `tests/test_push_payload.py` to verify format
38
+
39
+ ### 5. Normalized Categories
40
+ Use only these categories from `NormalizedCategory` enum:
41
+ - OBJECT_STORAGE, DATA_WAREHOUSE, STREAMING, COMPUTE, ML_TRAINING
42
+ - DATA_CATALOG, DATA_INTEGRATION, DATA_PIPELINE, DATA_SHARING
43
+ - QUERY_ENGINE, SEARCH, DATABASE, SECURITY, BILLING
44
+
45
+ ### 6. Adding New Collectors
46
+ When adding a new AWS/GCP collector:
47
+ 1. Create collector in `nuvu_scan/core/providers/<provider>/collectors/<name>.py`
48
+ 2. Register in the scanner's collector list
49
+ 3. Add to `--list-collectors` output
50
+ 4. Update `README.md` with new service coverage
51
+ 5. Create tests with mocked API responses
52
+ 6. Update IAM policy if new permissions needed
53
+
54
+ ### 7. Dependencies
55
+ - Use `uv` for package management (NOT pip directly)
56
+ - Add dependencies to `pyproject.toml`
57
+ - Run `uv sync --dev` after adding dependencies
58
+
59
+ ### 8. Testing Commands
60
+ ```bash
61
+ # Run all tests
62
+ uv run pytest
63
+
64
+ # Run with coverage
65
+ uv run pytest --cov=nuvu_scan
66
+
67
+ # Run specific test file
68
+ uv run pytest tests/test_cli.py
69
+
70
+ # Run pre-commit checks (includes tests)
71
+ uv run pre-commit run --all-files
72
+ ```
73
+
74
+ ### 9. File Structure
75
+ ```
76
+ nuvu_scan/
77
+ ├── cli/
78
+ │ ├── commands/scan.py # CLI commands and options
79
+ │ └── formatters/ # HTML, JSON, CSV output
80
+ ├── core/
81
+ │ ├── base.py # Asset, ScanResult, NormalizedCategory
82
+ │ └── providers/
83
+ │ ├── aws/collectors/ # S3, Glue, Redshift, etc.
84
+ │ └── gcp/collectors/ # GCS, BigQuery, etc.
85
+ tests/
86
+ ├── test_cli.py # CLI option tests
87
+ ├── test_formatters.py # Output format tests
88
+ ├── test_scanners.py # Scanner tests
89
+ └── test_push_payload.py # API payload tests
90
+ ```
91
+
92
+ ### 10. Commit Guidelines
93
+ - Pre-commit hooks will run automatically
94
+ - All tests must pass before commit is accepted
95
+ - Use conventional commit messages: `feat:`, `fix:`, `test:`, `docs:`, `chore:`
96
+
97
+ ## Summary
98
+ **Before any code change is complete, ensure:**
99
+ 1. ✅ Tests are written/updated
100
+ 2. ✅ `uv run pytest` passes
101
+ 3. ✅ `uv run ruff check .` passes
102
+ 4. ✅ README.md is updated (if user-facing)
103
+ 5. ✅ Pre-commit hooks pass on commit
@@ -61,6 +61,9 @@ test-scan*.html
61
61
  *.json
62
62
  !tests/**/*.json
63
63
 
64
+ # Raw scan data (contains sensitive AWS data)
65
+ raws/
66
+
64
67
  # Environment variables
65
68
  .env
66
69
  .env.local
@@ -34,6 +34,17 @@ repos:
34
34
  args: ["-c", "pyproject.toml"]
35
35
  additional_dependencies: ["bandit[toml]"]
36
36
 
37
+ # Run tests before commit (only when Python files change)
38
+ - repo: local
39
+ hooks:
40
+ - id: pytest
41
+ name: pytest
42
+ entry: uv run pytest tests/ -x -q --tb=no
43
+ language: system
44
+ pass_filenames: false
45
+ types: [python]
46
+ stages: [pre-commit]
47
+
37
48
  # Configuration
38
49
  ci:
39
50
  autofix_commit_msg: |
@@ -30,14 +30,22 @@ cd nuvu-scan
30
30
  uv sync --dev # Creates .venv automatically, no activation needed!
31
31
  ```
32
32
 
33
- ### 2. Make Changes
33
+ ### 2. Install Pre-commit Hooks
34
+
35
+ ```bash
36
+ uv run pre-commit install
37
+ ```
38
+
39
+ This ensures tests and linting run automatically on every commit.
40
+
41
+ ### 3. Make Changes
34
42
 
35
43
  - Write clear, readable code
36
- - Follow existing code style (enforced by black and ruff)
37
- - Add tests for new functionality
44
+ - Follow existing code style (enforced by ruff)
45
+ - **⚠️ Add tests for new functionality** (required - commits will fail without tests)
38
46
  - Update documentation
39
47
 
40
- ### 3. Test Your Changes
48
+ ### 4. Test Your Changes
41
49
 
42
50
  ```bash
43
51
  # Run all tests (uv automatically uses .venv)
@@ -47,23 +55,30 @@ uv run pytest
47
55
  uv run pytest --cov=nuvu_scan
48
56
 
49
57
  # Check code quality
50
- uv run black .
58
+ uv run ruff format .
51
59
  uv run ruff check .
52
60
  uv run mypy nuvu_scan
61
+
62
+ # Run all pre-commit checks (recommended)
63
+ uv run pre-commit run --all-files
53
64
  ```
54
65
 
55
66
  **Note**: No need to activate `.venv` - `uv run` handles it automatically!
56
67
 
57
- ### 4. Commit
68
+ ### 5. Commit
69
+
70
+ Pre-commit hooks will automatically run ruff, bandit, and pytest. If any check fails, the commit will be blocked.
58
71
 
59
- Use clear, descriptive commit messages:
72
+ Use conventional commit messages:
60
73
 
61
74
  ```bash
62
- git commit -m "Add GCP BigQuery collector"
63
- git commit -m "Fix S3 bucket size calculation"
75
+ git commit -m "feat: add GCP BigQuery collector"
76
+ git commit -m "fix: correct S3 bucket size calculation"
77
+ git commit -m "test: add tests for Redshift collector"
78
+ git commit -m "docs: update CLI options in README"
64
79
  ```
65
80
 
66
- ### 5. Push and Create PR
81
+ ### 6. Push and Create PR
67
82
 
68
83
  ```bash
69
84
  git push origin feature/your-feature
@@ -93,11 +108,23 @@ See the detailed guide in README.md under "Adding a New Cloud Provider".
93
108
 
94
109
  ## Code Style
95
110
 
96
- - **Formatting**: Use `black` (line length: 100)
97
- - **Linting**: Use `ruff`
111
+ - **Formatting**: Use `ruff format`
112
+ - **Linting**: Use `ruff check`
98
113
  - **Type hints**: Add type hints where helpful
99
114
  - **Docstrings**: Add docstrings for public functions/classes
100
115
 
116
+ ## Testing Requirements
117
+
118
+ **Every new feature MUST include tests.** Pre-commit hooks run `pytest` automatically.
119
+
120
+ | Change Type | Test File |
121
+ |-------------|-----------|
122
+ | CLI options | `tests/test_cli.py` |
123
+ | Formatters (HTML/JSON/CSV) | `tests/test_formatters.py` |
124
+ | Scanners/Collectors | `tests/test_scanners.py` |
125
+ | Push/API changes | `tests/test_push_payload.py` |
126
+ | New collector | `tests/test_<collector>.py` |
127
+
101
128
  ## Pull Request Process
102
129
 
103
130
  1. Ensure all tests pass
@@ -0,0 +1,216 @@
1
+ # Nuvu Scan - Development Status
2
+
3
+ **Multi-Cloud Data Asset Scanner** - Open-source scanner designed to discover and inventory cloud data assets across AWS, GCP, Azure, and Databricks.
4
+
5
+ > **Note**: nuvu-scan is a read-only scanner that collects asset metadata. For governance, policy enforcement, and decision-making, see [nuvu-cloud](https://github.com/nuvudev/nuvu-cloud).
6
+
7
+ ## ✅ Available Collectors (v2.1.0)
8
+
9
+ ### AWS Collectors (18 collectors)
10
+
11
+ | Collector | Command Flag | What It Scans | Key Metrics |
12
+ |-----------|--------------|---------------|-------------|
13
+ | **S3** | `s3` | Buckets, policies, encryption | Size, public access, versioning |
14
+ | **Glue** | `glue` | Databases, tables, crawlers, jobs, connections | Table counts, crawl status, job runs |
15
+ | **Athena** | `athena` | Workgroups, query history | Query stats, failure rates |
16
+ | **Redshift** | `redshift` | Clusters, serverless, snapshots, datashares, reserved nodes | CPU, connections, WLM, costs |
17
+ | **IAM** | `iam` | Roles, users, groups, access keys | Permissions, MFA, key age, last used |
18
+ | **MWAA** | `mwaa` | Apache Airflow environments | Environment class, worker counts |
19
+ | **EC2/VPC** | `ec2` | Security groups, VPCs, instances, EBS volumes, Elastic IPs | Open ports, public IPs, volume encryption |
20
+ | **KMS** | `kms` | Customer-managed encryption keys | Rotation status, key state |
21
+ | **RDS** | `rds` | RDS instances, Aurora clusters, snapshots | Encryption, multi-AZ, backup retention |
22
+ | **DynamoDB** | `dynamodb` | DynamoDB tables | PITR, encryption, capacity mode |
23
+ | **Lambda** | `lambda` | Lambda functions | Runtime, code size, VPC config |
24
+ | **Secrets Manager** | `secrets` | Secrets | Rotation, last accessed, age |
25
+ | **AWS Backup** | `backup` | Backup vaults, backup plans | Recovery points, lifecycle |
26
+ | **EKS** | `eks` | EKS clusters, node groups | K8s version, endpoint access |
27
+ | **SNS/SQS** | `sns_sqs` | SNS topics, SQS queues | Encryption, DLQ, message counts |
28
+ | **Lake Formation** | `lakeformation` | Data lake settings, permissions, LF-Tags | Permission grants, admin count |
29
+ | **CloudTrail** | `cloudtrail` | CloudTrail trails | Multi-region, encryption, logging status |
30
+ | **CloudWatch** | `cloudwatch` | CloudWatch log groups | Retention, encryption, size |
31
+
32
+ ### GCP Collectors (6 collectors)
33
+
34
+ | Collector | Command Flag | What It Scans | Key Metrics |
35
+ |-----------|--------------|---------------|-------------|
36
+ | **GCS** | `gcs` | Cloud Storage buckets | Size, public access, lifecycle |
37
+ | **BigQuery** | `bigquery` | Datasets, tables, query history | Table sizes, query costs |
38
+ | **Dataproc** | `dataproc` | Dataproc clusters | Cluster config, job history |
39
+ | **Pub/Sub** | `pubsub` | Topics, subscriptions | Message counts |
40
+ | **IAM** | `iam` | Service accounts | Roles, permissions |
41
+ | **Gemini** | `gemini` | Gemini API usage | API costs |
42
+
43
+ ## 📋 Usage
44
+
45
+ ### Basic Scan
46
+ ```bash
47
+ # Scan all AWS collectors
48
+ nuvu-scan aws
49
+
50
+ # Scan all GCP collectors
51
+ nuvu-scan gcp --credentials /path/to/key.json
52
+ ```
53
+
54
+ ### Selective Scanning
55
+ ```bash
56
+ # Scan specific collectors
57
+ nuvu-scan aws --collectors s3,rds,iam,kms
58
+
59
+ # List available collectors
60
+ nuvu-scan aws --list-collectors
61
+ ```
62
+
63
+ ### Output Formats
64
+ ```bash
65
+ # HTML report (default)
66
+ nuvu-scan aws -o report.html
67
+
68
+ # JSON output
69
+ nuvu-scan aws -o assets.json
70
+
71
+ # CSV output
72
+ nuvu-scan aws -o assets.csv
73
+ ```
74
+
75
+ ### Push to Nuvu Cloud
76
+ ```bash
77
+ # Push results to nuvu-cloud for governance
78
+ nuvu-scan aws --push --api-key YOUR_API_KEY
79
+ ```
80
+
81
+ ## 🔒 IAM Permissions
82
+
83
+ The complete IAM policy is in `aws-iam-policy.json` (60+ permission statements).
84
+
85
+ ### Permission Categories
86
+
87
+ | Category | Services | Example Actions |
88
+ |----------|----------|-----------------|
89
+ | **Storage** | S3, EBS | `s3:GetBucket*`, `ec2:DescribeVolumes` |
90
+ | **Compute** | EC2, Lambda, EKS | `ec2:DescribeInstances`, `lambda:ListFunctions` |
91
+ | **Database** | RDS, DynamoDB, Redshift | `rds:DescribeDB*`, `dynamodb:DescribeTable` |
92
+ | **Data Analytics** | Glue, Athena, Lake Formation | `glue:GetTables`, `athena:ListWorkGroups` |
93
+ | **Security** | IAM, KMS, Secrets Manager | `iam:ListRoles`, `kms:DescribeKey` |
94
+ | **Networking** | VPC, Security Groups | `ec2:DescribeSecurityGroups`, `ec2:DescribeVpcs` |
95
+ | **Messaging** | SNS, SQS | `sns:GetTopicAttributes`, `sqs:GetQueueAttributes` |
96
+ | **Observability** | CloudWatch, CloudTrail | `logs:DescribeLogGroups`, `cloudtrail:DescribeTrails` |
97
+ | **Resilience** | AWS Backup | `backup:ListBackupVaults`, `backup:ListBackupPlans` |
98
+ | **Cost** | Cost Explorer | `ce:GetCostAndUsage` |
99
+
100
+ All permissions are **read-only** following the principle of least privilege.
101
+
102
+ ## 📊 Asset Types Collected
103
+
104
+ ### Compute
105
+ - EC2 instances, EBS volumes, Elastic IPs
106
+ - Lambda functions
107
+ - EKS clusters, node groups
108
+
109
+ ### Storage
110
+ - S3 buckets
111
+ - EBS volumes
112
+
113
+ ### Databases
114
+ - RDS instances, Aurora clusters
115
+ - DynamoDB tables
116
+ - Redshift clusters (provisioned & serverless)
117
+
118
+ ### Data Catalog
119
+ - Glue databases, tables, crawlers, jobs
120
+ - Lake Formation settings, permissions, LF-Tags
121
+
122
+ ### Security
123
+ - IAM roles, users, groups, access keys
124
+ - KMS keys
125
+ - Secrets Manager secrets
126
+ - Security groups, VPCs
127
+
128
+ ### Observability
129
+ - CloudWatch log groups
130
+ - CloudTrail trails
131
+
132
+ ### Messaging
133
+ - SNS topics
134
+ - SQS queues
135
+
136
+ ### Backup
137
+ - AWS Backup vaults and plans
138
+
139
+ ## 🏷️ Risk Flags
140
+
141
+ nuvu-scan identifies potential issues by flagging assets:
142
+
143
+ | Category | Example Flags |
144
+ |----------|---------------|
145
+ | **Security** | `unencrypted`, `publicly_accessible`, `mfa_disabled`, `open_to_internet` |
146
+ | **Access** | `unused_role`, `old_key`, `overly_permissive`, `public_access` |
147
+ | **Operations** | `no_backups`, `stale_crawler`, `deprecated_runtime`, `logging_disabled` |
148
+ | **Cost** | `unattached_volume`, `old_snapshot`, `unused_eip` |
149
+ | **Compliance** | `no_retention_policy`, `rotation_disabled`, `pitr_disabled` |
150
+
151
+ ## 🧪 Testing
152
+
153
+ ```bash
154
+ # Run tests
155
+ uv run pytest
156
+
157
+ # Run with coverage
158
+ uv run pytest --cov=nuvu_scan
159
+ ```
160
+
161
+ ## 📦 Installation
162
+
163
+ ```bash
164
+ # From PyPI
165
+ pip install nuvu-scan
166
+
167
+ # From source
168
+ git clone https://github.com/nuvudev/nuvu-scan
169
+ cd nuvu-scan
170
+ uv sync
171
+ ```
172
+
173
+ ## 📋 Roadmap
174
+
175
+ ### Additional AWS Collectors
176
+ - [ ] OpenSearch collector
177
+ - [ ] EMR collector
178
+ - [ ] SageMaker collector
179
+ - [ ] Bedrock collector
180
+ - [ ] MSK (Kafka) collector
181
+ - [ ] Kinesis collector
182
+ - [ ] Step Functions collector
183
+ - [ ] EventBridge collector
184
+
185
+ ### Additional GCP Collectors
186
+ - [ ] Cloud SQL collector
187
+ - [ ] Cloud Spanner collector
188
+ - [ ] Bigtable collector
189
+ - [ ] Firestore collector
190
+ - [ ] Vertex AI collector
191
+ - [ ] Dataflow collector
192
+ - [ ] Cloud Composer collector
193
+
194
+ ### Azure Provider
195
+ - [ ] Blob Storage collector
196
+ - [ ] Data Lake collector
197
+ - [ ] Synapse collector
198
+ - [ ] Azure Databricks collector
199
+
200
+ ### Databricks Provider
201
+ - [ ] Workspace discovery
202
+ - [ ] Unity Catalog
203
+
204
+ ### Enhancements
205
+ - [ ] Parallel collection for faster scans
206
+ - [ ] Progress bars with ETA
207
+ - [ ] Incremental scanning (delta detection)
208
+ - [ ] Schema-level inventory (Redshift Data API)
209
+
210
+ ## 🤝 Contributing
211
+
212
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
213
+
214
+ ## 📄 License
215
+
216
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nuvu-scan
3
- Version: 2.0.2
3
+ Version: 2.1.6
4
4
  Summary: Multi-Cloud Data Asset Control - Discover, govern, and optimize your cloud data assets across AWS and GCP
5
5
  Project-URL: Homepage, https://nuvu.dev
6
6
  Project-URL: Documentation, https://github.com/nuvudev/nuvu-scan#readme
@@ -48,6 +48,10 @@ Description-Content-Type: text/markdown
48
48
 
49
49
  # Nuvu Scan
50
50
 
51
+ [![PyPI Downloads](https://img.shields.io/pypi/dm/nuvu-scan?style=flat-square&label=downloads&color=667eea)](https://pypistats.org/packages/nuvu-scan)
52
+ [![GitHub Stars](https://img.shields.io/github/stars/nuvudev/nuvu-scan?style=flat-square&label=stars&color=667eea)](https://github.com/nuvudev/nuvu-scan)
53
+ [![PyPI Version](https://img.shields.io/pypi/v/nuvu-scan?style=flat-square&label=version&color=764ba2)](https://pypi.org/project/nuvu-scan/)
54
+
51
55
  **Take Control of Your Cloud Data Estate**
52
56
  Discover, govern, and optimize your cloud data assets across **AWS and GCP** — reduce wasted spend, enforce compliance, and gain full visibility into unused, idle, or risky resources.
53
57
 
@@ -79,28 +83,6 @@ pip install nuvu-scan
79
83
 
80
84
  ## Usage
81
85
 
82
- ### Optional: Push results to Nuvu Cloud
83
-
84
- Nuvu Scan is fully open-source and runs standalone — no account required.
85
- If you want dashboards, team workflows, and long‑term history, you can optionally push results to Nuvu Cloud.
86
-
87
- ```bash
88
- # Push results to Nuvu Cloud (optional)
89
- nuvu scan --provider aws --push --api-key your_nuvu_api_key
90
-
91
- # Or use environment variable
92
- export NUVU_API_KEY=your_nuvu_api_key
93
- nuvu scan --provider aws --push
94
-
95
- # Custom cloud URL (defaults to https://nuvu.dev)
96
- nuvu scan --provider aws --push --nuvu-cloud-url https://nuvu.dev
97
- ```
98
-
99
- What this means for open‑source users:
100
- - You can keep everything local and export JSON/CSV/HTML.
101
- - No cloud credentials are ever sent to Nuvu Cloud — only scan results.
102
- - The data collected is identical whether you run locally or push.
103
-
104
86
  ### AWS Scanning
105
87
 
106
88
  **Prerequisites:** Create an IAM user or role with the read-only policy from `aws-iam-policy.json`. See the [AWS Setup](#aws-v1---available-now) section below for detailed instructions.
@@ -120,11 +102,14 @@ nuvu scan --provider aws \
120
102
  --access-key-id your-key \
121
103
  --secret-access-key your-secret
122
104
 
123
- # Output to JSON
124
- nuvu scan --provider aws --output-format json --output-file report.json
105
+ # Output to HTML/JSON/CSV
106
+ nuvu scan --provider aws --output-format html --output-file report.json
125
107
 
126
108
  # Scan specific regions
127
109
  nuvu scan --provider aws --region us-east-1 --region eu-west-1
110
+
111
+ # Scan specific collector
112
+ nuvu scan --provider aws --output-format html --collectors redshift --region us-west-1
128
113
  ```
129
114
 
130
115
  #### 2. Access Key + Secret Key + Session Token (Temporary Credentials)
@@ -203,6 +188,10 @@ You can optionally push scan results to a remote API for centralized tracking:
203
188
  ```bash
204
189
  # Push results to a remote endpoint
205
190
  nuvu scan --provider aws --push --api-key your-api-key --api-url https://your-api.example.com
191
+
192
+ # Push results to NUVU Cloud for Data Goverance layer
193
+ nuvu scan --provider aws --push --api-key your-api-key
194
+
206
195
  ```
207
196
 
208
197
  This is useful for integrating with your own data governance platforms or CI/CD pipelines.
@@ -364,6 +353,29 @@ Nuvu requires read-only access to your GCP project via a Service Account. The to
364
353
  ### Azure, Databricks (Coming Soon)
365
354
  Multi-cloud support is built into the architecture. Additional providers will be added in future releases.
366
355
 
356
+ ### Optional: Push results to Nuvu Cloud
357
+
358
+ Nuvu Scan is fully open-source and runs standalone — no account required.
359
+ If you want dashboards, team workflows, data estate time travel and long‑term history, you can optionally push results to Nuvu Cloud.
360
+
361
+ ```bash
362
+ # Push results to Nuvu Cloud (optional)
363
+ nuvu scan --provider aws --push --api-key your_nuvu_api_key
364
+
365
+ # Or use environment variable
366
+ export NUVU_API_KEY=your_nuvu_api_key
367
+ nuvu scan --provider aws --push
368
+
369
+ # Custom API URL (defaults to https://nuvu.dev)
370
+ nuvu scan --provider aws --push --api-url https://your-api.example.com
371
+ ```
372
+
373
+ What this means for open‑source users:
374
+ - You can keep everything local and export JSON/CSV/HTML.
375
+ - No cloud credentials are ever sent to Nuvu Cloud — only scan results.
376
+ - The data collected is identical whether you run locally or push.
377
+
378
+
367
379
  ## License
368
380
 
369
381
  Apache 2.0
@@ -413,14 +425,17 @@ uv run pytest tests/test_s3_collector.py
413
425
  ### Code Quality
414
426
 
415
427
  ```bash
416
- # Format code with black
417
- uv run black .
428
+ # Format code with ruff
429
+ uv run ruff format .
418
430
 
419
431
  # Lint with ruff
420
432
  uv run ruff check .
421
433
 
422
434
  # Type checking with mypy
423
435
  uv run mypy nuvu_scan
436
+
437
+ # Run all pre-commit checks (including tests)
438
+ uv run pre-commit run --all-files
424
439
  ```
425
440
 
426
441
  ### Building the Package
@@ -503,11 +518,11 @@ git checkout -b fix/your-bug-description
503
518
 
504
519
  ### 3. Make Changes
505
520
 
506
- - Follow the existing code style (enforced by black and ruff)
507
- - Add tests for new features
521
+ - Follow the existing code style (enforced by ruff)
522
+ - **Add tests for new features** (required - pre-commit runs tests)
508
523
  - Update documentation as needed
509
524
  - Ensure all tests pass: `uv run pytest`
510
- - Run code quality checks: `uv run black . && uv run ruff check .`
525
+ - Run code quality checks: `uv run ruff format . && uv run ruff check .`
511
526
 
512
527
  ### 4. Commit and Push
513
528