nuvu-scan 2.0.2__tar.gz → 2.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nuvu_scan-2.1.6/.cursorrules +103 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.gitignore +3 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.pre-commit-config.yaml +11 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/CONTRIBUTING.md +39 -12
- nuvu_scan-2.1.6/DEVELOPMENT_STATUS.md +216 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/PKG-INFO +45 -30
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/README.md +44 -29
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/commands/scan.py +10 -1
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/html.py +141 -20
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/base.py +44 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/aws_scanner.py +187 -42
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/apigateway.py +197 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/athena.py +181 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/backup.py +252 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/cloudfront.py +132 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/cloudtrail.py +189 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/cloudwatch.py +163 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/cost_explorer.py +90 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/dynamodb.py +236 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/ec2.py +572 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/ecs.py +243 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/eks.py +246 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/elasticache.py +325 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/elb.py +198 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/iam.py +593 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/kinesis.py +174 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/kms.py +186 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/lakeformation.py +303 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/lambda_collector.py +224 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/misc_services.py +320 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/rds.py +405 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/route53.py +183 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/secrets.py +178 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/security_services.py +329 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/sns_sqs.py +284 -0
- nuvu_scan-2.1.6/nuvu_scan/core/providers/aws/collectors/vpc_costs.py +296 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/pyproject.toml +1 -1
- nuvu_scan-2.1.6/tests/test_cli.py +135 -0
- nuvu_scan-2.1.6/tests/test_formatters.py +172 -0
- nuvu_scan-2.1.6/tests/test_push_payload.py +232 -0
- nuvu_scan-2.1.6/tests/test_scanners.py +149 -0
- nuvu_scan-2.0.2/DEVELOPMENT_STATUS.md +0 -359
- nuvu_scan-2.0.2/nuvu_scan/core/providers/aws/collectors/athena.py +0 -146
- nuvu_scan-2.0.2/nuvu_scan/core/providers/aws/collectors/iam.py +0 -277
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.github/workflows/ci.yml +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/.github/workflows/release.yml +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/Makefile +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/RELEASE.md +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/commands/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/csv.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/formatters/json.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/cli/main.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/analyzers/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/models/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/aws/collectors/s3.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/bigquery.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/billing.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/dataproc.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/gcs.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/gemini.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/iam.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/collectors/pubsub.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/nuvu_scan/core/providers/gcp/gcp_scanner.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/tests/__init__.py +0 -0
- {nuvu_scan-2.0.2 → nuvu_scan-2.1.6}/tests/test_base.py +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Nuvu Scan - AI Agent Instructions
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
Nuvu Scan is an open-source CLI tool for cloud data governance. It scans AWS and GCP to discover, govern, and optimize cloud data assets.
|
|
5
|
+
|
|
6
|
+
## Critical Rules for AI Agents
|
|
7
|
+
|
|
8
|
+
### 1. ALWAYS Write Tests for New Features
|
|
9
|
+
Every new feature, CLI option, or code change MUST include corresponding tests:
|
|
10
|
+
|
|
11
|
+
- **CLI changes**: Add tests in `tests/test_cli.py`
|
|
12
|
+
- **Formatter changes**: Add tests in `tests/test_formatters.py`
|
|
13
|
+
- **Scanner/collector changes**: Add tests in `tests/test_scanners.py`
|
|
14
|
+
- **API/push changes**: Add tests in `tests/test_push_payload.py`
|
|
15
|
+
- **New collectors**: Create `tests/test_<collector_name>.py`
|
|
16
|
+
|
|
17
|
+
Tests run automatically on every commit via pre-commit hooks. Commits will be blocked if tests fail.
|
|
18
|
+
|
|
19
|
+
### 2. Code Quality Standards
|
|
20
|
+
- Use `ruff` for linting and formatting (NOT black)
|
|
21
|
+
- Run `uv run ruff format .` before committing
|
|
22
|
+
- Run `uv run ruff check .` to check for issues
|
|
23
|
+
- Pre-commit hooks will auto-run: ruff, ruff-format, bandit, pytest
|
|
24
|
+
|
|
25
|
+
### 3. CLI Option Changes
|
|
26
|
+
When adding/modifying CLI options:
|
|
27
|
+
1. Add `@click.option()` decorator in `nuvu_scan/cli/commands/scan.py`
|
|
28
|
+
2. Add corresponding function parameter
|
|
29
|
+
3. Add test in `tests/test_cli.py` to verify option exists
|
|
30
|
+
4. Update `README.md` with usage examples
|
|
31
|
+
|
|
32
|
+
### 4. Push Payload Format (API Compatibility)
|
|
33
|
+
When modifying push functionality:
|
|
34
|
+
- Payload MUST match the schema expected by `/api/scans/import`
|
|
35
|
+
- Required fields: `provider`, `account_id`, `scan_timestamp`, `assets`, `total_cost_estimate_usd`
|
|
36
|
+
- Asset fields: `provider`, `asset_type`, `normalized_category`, `region`, `arn`, `name`
|
|
37
|
+
- Add tests in `tests/test_push_payload.py` to verify format
|
|
38
|
+
|
|
39
|
+
### 5. Normalized Categories
|
|
40
|
+
Use only these categories from `NormalizedCategory` enum:
|
|
41
|
+
- OBJECT_STORAGE, DATA_WAREHOUSE, STREAMING, COMPUTE, ML_TRAINING
|
|
42
|
+
- DATA_CATALOG, DATA_INTEGRATION, DATA_PIPELINE, DATA_SHARING
|
|
43
|
+
- QUERY_ENGINE, SEARCH, DATABASE, SECURITY, BILLING
|
|
44
|
+
|
|
45
|
+
### 6. Adding New Collectors
|
|
46
|
+
When adding a new AWS/GCP collector:
|
|
47
|
+
1. Create collector in `nuvu_scan/core/providers/<provider>/collectors/<name>.py`
|
|
48
|
+
2. Register in the scanner's collector list
|
|
49
|
+
3. Add to `--list-collectors` output
|
|
50
|
+
4. Update `README.md` with new service coverage
|
|
51
|
+
5. Create tests with mocked API responses
|
|
52
|
+
6. Update IAM policy if new permissions needed
|
|
53
|
+
|
|
54
|
+
### 7. Dependencies
|
|
55
|
+
- Use `uv` for package management (NOT pip directly)
|
|
56
|
+
- Add dependencies to `pyproject.toml`
|
|
57
|
+
- Run `uv sync --dev` after adding dependencies
|
|
58
|
+
|
|
59
|
+
### 8. Testing Commands
|
|
60
|
+
```bash
|
|
61
|
+
# Run all tests
|
|
62
|
+
uv run pytest
|
|
63
|
+
|
|
64
|
+
# Run with coverage
|
|
65
|
+
uv run pytest --cov=nuvu_scan
|
|
66
|
+
|
|
67
|
+
# Run specific test file
|
|
68
|
+
uv run pytest tests/test_cli.py
|
|
69
|
+
|
|
70
|
+
# Run pre-commit checks (includes tests)
|
|
71
|
+
uv run pre-commit run --all-files
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 9. File Structure
|
|
75
|
+
```
|
|
76
|
+
nuvu_scan/
|
|
77
|
+
├── cli/
|
|
78
|
+
│ ├── commands/scan.py # CLI commands and options
|
|
79
|
+
│ └── formatters/ # HTML, JSON, CSV output
|
|
80
|
+
├── core/
|
|
81
|
+
│ ├── base.py # Asset, ScanResult, NormalizedCategory
|
|
82
|
+
│ └── providers/
|
|
83
|
+
│ ├── aws/collectors/ # S3, Glue, Redshift, etc.
|
|
84
|
+
│ └── gcp/collectors/ # GCS, BigQuery, etc.
|
|
85
|
+
tests/
|
|
86
|
+
├── test_cli.py # CLI option tests
|
|
87
|
+
├── test_formatters.py # Output format tests
|
|
88
|
+
├── test_scanners.py # Scanner tests
|
|
89
|
+
└── test_push_payload.py # API payload tests
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 10. Commit Guidelines
|
|
93
|
+
- Pre-commit hooks will run automatically
|
|
94
|
+
- All tests must pass before commit is accepted
|
|
95
|
+
- Use conventional commit messages: `feat:`, `fix:`, `test:`, `docs:`, `chore:`
|
|
96
|
+
|
|
97
|
+
## Summary
|
|
98
|
+
**Before any code change is complete, ensure:**
|
|
99
|
+
1. ✅ Tests are written/updated
|
|
100
|
+
2. ✅ `uv run pytest` passes
|
|
101
|
+
3. ✅ `uv run ruff check .` passes
|
|
102
|
+
4. ✅ README.md is updated (if user-facing)
|
|
103
|
+
5. ✅ Pre-commit hooks pass on commit
|
|
@@ -34,6 +34,17 @@ repos:
|
|
|
34
34
|
args: ["-c", "pyproject.toml"]
|
|
35
35
|
additional_dependencies: ["bandit[toml]"]
|
|
36
36
|
|
|
37
|
+
# Run tests before commit (only when Python files change)
|
|
38
|
+
- repo: local
|
|
39
|
+
hooks:
|
|
40
|
+
- id: pytest
|
|
41
|
+
name: pytest
|
|
42
|
+
entry: uv run pytest tests/ -x -q --tb=no
|
|
43
|
+
language: system
|
|
44
|
+
pass_filenames: false
|
|
45
|
+
types: [python]
|
|
46
|
+
stages: [pre-commit]
|
|
47
|
+
|
|
37
48
|
# Configuration
|
|
38
49
|
ci:
|
|
39
50
|
autofix_commit_msg: |
|
|
@@ -30,14 +30,22 @@ cd nuvu-scan
|
|
|
30
30
|
uv sync --dev # Creates .venv automatically, no activation needed!
|
|
31
31
|
```
|
|
32
32
|
|
|
33
|
-
### 2.
|
|
33
|
+
### 2. Install Pre-commit Hooks
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
uv run pre-commit install
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This ensures tests and linting run automatically on every commit.
|
|
40
|
+
|
|
41
|
+
### 3. Make Changes
|
|
34
42
|
|
|
35
43
|
- Write clear, readable code
|
|
36
|
-
- Follow existing code style (enforced by
|
|
37
|
-
- Add tests for new functionality
|
|
44
|
+
- Follow existing code style (enforced by ruff)
|
|
45
|
+
- **⚠️ Add tests for new functionality** (required - commits will fail without tests)
|
|
38
46
|
- Update documentation
|
|
39
47
|
|
|
40
|
-
###
|
|
48
|
+
### 4. Test Your Changes
|
|
41
49
|
|
|
42
50
|
```bash
|
|
43
51
|
# Run all tests (uv automatically uses .venv)
|
|
@@ -47,23 +55,30 @@ uv run pytest
|
|
|
47
55
|
uv run pytest --cov=nuvu_scan
|
|
48
56
|
|
|
49
57
|
# Check code quality
|
|
50
|
-
uv run
|
|
58
|
+
uv run ruff format .
|
|
51
59
|
uv run ruff check .
|
|
52
60
|
uv run mypy nuvu_scan
|
|
61
|
+
|
|
62
|
+
# Run all pre-commit checks (recommended)
|
|
63
|
+
uv run pre-commit run --all-files
|
|
53
64
|
```
|
|
54
65
|
|
|
55
66
|
**Note**: No need to activate `.venv` - `uv run` handles it automatically!
|
|
56
67
|
|
|
57
|
-
###
|
|
68
|
+
### 5. Commit
|
|
69
|
+
|
|
70
|
+
Pre-commit hooks will automatically run ruff, bandit, and pytest. If any check fails, the commit will be blocked.
|
|
58
71
|
|
|
59
|
-
Use
|
|
72
|
+
Use conventional commit messages:
|
|
60
73
|
|
|
61
74
|
```bash
|
|
62
|
-
git commit -m "
|
|
63
|
-
git commit -m "
|
|
75
|
+
git commit -m "feat: add GCP BigQuery collector"
|
|
76
|
+
git commit -m "fix: correct S3 bucket size calculation"
|
|
77
|
+
git commit -m "test: add tests for Redshift collector"
|
|
78
|
+
git commit -m "docs: update CLI options in README"
|
|
64
79
|
```
|
|
65
80
|
|
|
66
|
-
###
|
|
81
|
+
### 6. Push and Create PR
|
|
67
82
|
|
|
68
83
|
```bash
|
|
69
84
|
git push origin feature/your-feature
|
|
@@ -93,11 +108,23 @@ See the detailed guide in README.md under "Adding a New Cloud Provider".
|
|
|
93
108
|
|
|
94
109
|
## Code Style
|
|
95
110
|
|
|
96
|
-
- **Formatting**: Use `
|
|
97
|
-
- **Linting**: Use `ruff`
|
|
111
|
+
- **Formatting**: Use `ruff format`
|
|
112
|
+
- **Linting**: Use `ruff check`
|
|
98
113
|
- **Type hints**: Add type hints where helpful
|
|
99
114
|
- **Docstrings**: Add docstrings for public functions/classes
|
|
100
115
|
|
|
116
|
+
## Testing Requirements
|
|
117
|
+
|
|
118
|
+
**Every new feature MUST include tests.** Pre-commit hooks run `pytest` automatically.
|
|
119
|
+
|
|
120
|
+
| Change Type | Test File |
|
|
121
|
+
|-------------|-----------|
|
|
122
|
+
| CLI options | `tests/test_cli.py` |
|
|
123
|
+
| Formatters (HTML/JSON/CSV) | `tests/test_formatters.py` |
|
|
124
|
+
| Scanners/Collectors | `tests/test_scanners.py` |
|
|
125
|
+
| Push/API changes | `tests/test_push_payload.py` |
|
|
126
|
+
| New collector | `tests/test_<collector>.py` |
|
|
127
|
+
|
|
101
128
|
## Pull Request Process
|
|
102
129
|
|
|
103
130
|
1. Ensure all tests pass
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# Nuvu Scan - Development Status
|
|
2
|
+
|
|
3
|
+
**Multi-Cloud Data Asset Scanner** - Open-source scanner designed to discover and inventory cloud data assets across AWS, GCP, Azure, and Databricks.
|
|
4
|
+
|
|
5
|
+
> **Note**: nuvu-scan is a read-only scanner that collects asset metadata. For governance, policy enforcement, and decision-making, see [nuvu-cloud](https://github.com/nuvudev/nuvu-cloud).
|
|
6
|
+
|
|
7
|
+
## ✅ Available Collectors (v2.1.0)
|
|
8
|
+
|
|
9
|
+
### AWS Collectors (18 collectors)
|
|
10
|
+
|
|
11
|
+
| Collector | Command Flag | What It Scans | Key Metrics |
|
|
12
|
+
|-----------|--------------|---------------|-------------|
|
|
13
|
+
| **S3** | `s3` | Buckets, policies, encryption | Size, public access, versioning |
|
|
14
|
+
| **Glue** | `glue` | Databases, tables, crawlers, jobs, connections | Table counts, crawl status, job runs |
|
|
15
|
+
| **Athena** | `athena` | Workgroups, query history | Query stats, failure rates |
|
|
16
|
+
| **Redshift** | `redshift` | Clusters, serverless, snapshots, datashares, reserved nodes | CPU, connections, WLM, costs |
|
|
17
|
+
| **IAM** | `iam` | Roles, users, groups, access keys | Permissions, MFA, key age, last used |
|
|
18
|
+
| **MWAA** | `mwaa` | Apache Airflow environments | Environment class, worker counts |
|
|
19
|
+
| **EC2/VPC** | `ec2` | Security groups, VPCs, instances, EBS volumes, Elastic IPs | Open ports, public IPs, volume encryption |
|
|
20
|
+
| **KMS** | `kms` | Customer-managed encryption keys | Rotation status, key state |
|
|
21
|
+
| **RDS** | `rds` | RDS instances, Aurora clusters, snapshots | Encryption, multi-AZ, backup retention |
|
|
22
|
+
| **DynamoDB** | `dynamodb` | DynamoDB tables | PITR, encryption, capacity mode |
|
|
23
|
+
| **Lambda** | `lambda` | Lambda functions | Runtime, code size, VPC config |
|
|
24
|
+
| **Secrets Manager** | `secrets` | Secrets | Rotation, last accessed, age |
|
|
25
|
+
| **AWS Backup** | `backup` | Backup vaults, backup plans | Recovery points, lifecycle |
|
|
26
|
+
| **EKS** | `eks` | EKS clusters, node groups | K8s version, endpoint access |
|
|
27
|
+
| **SNS/SQS** | `sns_sqs` | SNS topics, SQS queues | Encryption, DLQ, message counts |
|
|
28
|
+
| **Lake Formation** | `lakeformation` | Data lake settings, permissions, LF-Tags | Permission grants, admin count |
|
|
29
|
+
| **CloudTrail** | `cloudtrail` | CloudTrail trails | Multi-region, encryption, logging status |
|
|
30
|
+
| **CloudWatch** | `cloudwatch` | CloudWatch log groups | Retention, encryption, size |
|
|
31
|
+
|
|
32
|
+
### GCP Collectors (6 collectors)
|
|
33
|
+
|
|
34
|
+
| Collector | Command Flag | What It Scans | Key Metrics |
|
|
35
|
+
|-----------|--------------|---------------|-------------|
|
|
36
|
+
| **GCS** | `gcs` | Cloud Storage buckets | Size, public access, lifecycle |
|
|
37
|
+
| **BigQuery** | `bigquery` | Datasets, tables, query history | Table sizes, query costs |
|
|
38
|
+
| **Dataproc** | `dataproc` | Dataproc clusters | Cluster config, job history |
|
|
39
|
+
| **Pub/Sub** | `pubsub` | Topics, subscriptions | Message counts |
|
|
40
|
+
| **IAM** | `iam` | Service accounts | Roles, permissions |
|
|
41
|
+
| **Gemini** | `gemini` | Gemini API usage | API costs |
|
|
42
|
+
|
|
43
|
+
## 📋 Usage
|
|
44
|
+
|
|
45
|
+
### Basic Scan
|
|
46
|
+
```bash
|
|
47
|
+
# Scan all AWS collectors
|
|
48
|
+
nuvu-scan aws
|
|
49
|
+
|
|
50
|
+
# Scan all GCP collectors
|
|
51
|
+
nuvu-scan gcp --credentials /path/to/key.json
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Selective Scanning
|
|
55
|
+
```bash
|
|
56
|
+
# Scan specific collectors
|
|
57
|
+
nuvu-scan aws --collectors s3,rds,iam,kms
|
|
58
|
+
|
|
59
|
+
# List available collectors
|
|
60
|
+
nuvu-scan aws --list-collectors
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Output Formats
|
|
64
|
+
```bash
|
|
65
|
+
# HTML report (default)
|
|
66
|
+
nuvu-scan aws -o report.html
|
|
67
|
+
|
|
68
|
+
# JSON output
|
|
69
|
+
nuvu-scan aws -o assets.json
|
|
70
|
+
|
|
71
|
+
# CSV output
|
|
72
|
+
nuvu-scan aws -o assets.csv
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Push to Nuvu Cloud
|
|
76
|
+
```bash
|
|
77
|
+
# Push results to nuvu-cloud for governance
|
|
78
|
+
nuvu-scan aws --push --api-key YOUR_API_KEY
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## 🔒 IAM Permissions
|
|
82
|
+
|
|
83
|
+
The complete IAM policy is in `aws-iam-policy.json` (60+ permission statements).
|
|
84
|
+
|
|
85
|
+
### Permission Categories
|
|
86
|
+
|
|
87
|
+
| Category | Services | Example Actions |
|
|
88
|
+
|----------|----------|-----------------|
|
|
89
|
+
| **Storage** | S3, EBS | `s3:GetBucket*`, `ec2:DescribeVolumes` |
|
|
90
|
+
| **Compute** | EC2, Lambda, EKS | `ec2:DescribeInstances`, `lambda:ListFunctions` |
|
|
91
|
+
| **Database** | RDS, DynamoDB, Redshift | `rds:DescribeDB*`, `dynamodb:DescribeTable` |
|
|
92
|
+
| **Data Analytics** | Glue, Athena, Lake Formation | `glue:GetTables`, `athena:ListWorkGroups` |
|
|
93
|
+
| **Security** | IAM, KMS, Secrets Manager | `iam:ListRoles`, `kms:DescribeKey` |
|
|
94
|
+
| **Networking** | VPC, Security Groups | `ec2:DescribeSecurityGroups`, `ec2:DescribeVpcs` |
|
|
95
|
+
| **Messaging** | SNS, SQS | `sns:GetTopicAttributes`, `sqs:GetQueueAttributes` |
|
|
96
|
+
| **Observability** | CloudWatch, CloudTrail | `logs:DescribeLogGroups`, `cloudtrail:DescribeTrails` |
|
|
97
|
+
| **Resilience** | AWS Backup | `backup:ListBackupVaults`, `backup:ListBackupPlans` |
|
|
98
|
+
| **Cost** | Cost Explorer | `ce:GetCostAndUsage` |
|
|
99
|
+
|
|
100
|
+
All permissions are **read-only** following the principle of least privilege.
|
|
101
|
+
|
|
102
|
+
## 📊 Asset Types Collected
|
|
103
|
+
|
|
104
|
+
### Compute
|
|
105
|
+
- EC2 instances, EBS volumes, Elastic IPs
|
|
106
|
+
- Lambda functions
|
|
107
|
+
- EKS clusters, node groups
|
|
108
|
+
|
|
109
|
+
### Storage
|
|
110
|
+
- S3 buckets
|
|
111
|
+
- EBS volumes
|
|
112
|
+
|
|
113
|
+
### Databases
|
|
114
|
+
- RDS instances, Aurora clusters
|
|
115
|
+
- DynamoDB tables
|
|
116
|
+
- Redshift clusters (provisioned & serverless)
|
|
117
|
+
|
|
118
|
+
### Data Catalog
|
|
119
|
+
- Glue databases, tables, crawlers, jobs
|
|
120
|
+
- Lake Formation settings, permissions, LF-Tags
|
|
121
|
+
|
|
122
|
+
### Security
|
|
123
|
+
- IAM roles, users, groups, access keys
|
|
124
|
+
- KMS keys
|
|
125
|
+
- Secrets Manager secrets
|
|
126
|
+
- Security groups, VPCs
|
|
127
|
+
|
|
128
|
+
### Observability
|
|
129
|
+
- CloudWatch log groups
|
|
130
|
+
- CloudTrail trails
|
|
131
|
+
|
|
132
|
+
### Messaging
|
|
133
|
+
- SNS topics
|
|
134
|
+
- SQS queues
|
|
135
|
+
|
|
136
|
+
### Backup
|
|
137
|
+
- AWS Backup vaults and plans
|
|
138
|
+
|
|
139
|
+
## 🏷️ Risk Flags
|
|
140
|
+
|
|
141
|
+
nuvu-scan identifies potential issues by flagging assets:
|
|
142
|
+
|
|
143
|
+
| Category | Example Flags |
|
|
144
|
+
|----------|---------------|
|
|
145
|
+
| **Security** | `unencrypted`, `publicly_accessible`, `mfa_disabled`, `open_to_internet` |
|
|
146
|
+
| **Access** | `unused_role`, `old_key`, `overly_permissive`, `public_access` |
|
|
147
|
+
| **Operations** | `no_backups`, `stale_crawler`, `deprecated_runtime`, `logging_disabled` |
|
|
148
|
+
| **Cost** | `unattached_volume`, `old_snapshot`, `unused_eip` |
|
|
149
|
+
| **Compliance** | `no_retention_policy`, `rotation_disabled`, `pitr_disabled` |
|
|
150
|
+
|
|
151
|
+
## 🧪 Testing
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Run tests
|
|
155
|
+
uv run pytest
|
|
156
|
+
|
|
157
|
+
# Run with coverage
|
|
158
|
+
uv run pytest --cov=nuvu_scan
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## 📦 Installation
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# From PyPI
|
|
165
|
+
pip install nuvu-scan
|
|
166
|
+
|
|
167
|
+
# From source
|
|
168
|
+
git clone https://github.com/nuvudev/nuvu-scan
|
|
169
|
+
cd nuvu-scan
|
|
170
|
+
uv sync
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## 📋 Roadmap
|
|
174
|
+
|
|
175
|
+
### Additional AWS Collectors
|
|
176
|
+
- [ ] OpenSearch collector
|
|
177
|
+
- [ ] EMR collector
|
|
178
|
+
- [ ] SageMaker collector
|
|
179
|
+
- [ ] Bedrock collector
|
|
180
|
+
- [ ] MSK (Kafka) collector
|
|
181
|
+
- [ ] Kinesis collector
|
|
182
|
+
- [ ] Step Functions collector
|
|
183
|
+
- [ ] EventBridge collector
|
|
184
|
+
|
|
185
|
+
### Additional GCP Collectors
|
|
186
|
+
- [ ] Cloud SQL collector
|
|
187
|
+
- [ ] Cloud Spanner collector
|
|
188
|
+
- [ ] Bigtable collector
|
|
189
|
+
- [ ] Firestore collector
|
|
190
|
+
- [ ] Vertex AI collector
|
|
191
|
+
- [ ] Dataflow collector
|
|
192
|
+
- [ ] Cloud Composer collector
|
|
193
|
+
|
|
194
|
+
### Azure Provider
|
|
195
|
+
- [ ] Blob Storage collector
|
|
196
|
+
- [ ] Data Lake collector
|
|
197
|
+
- [ ] Synapse collector
|
|
198
|
+
- [ ] Azure Databricks collector
|
|
199
|
+
|
|
200
|
+
### Databricks Provider
|
|
201
|
+
- [ ] Workspace discovery
|
|
202
|
+
- [ ] Unity Catalog
|
|
203
|
+
|
|
204
|
+
### Enhancements
|
|
205
|
+
- [ ] Parallel collection for faster scans
|
|
206
|
+
- [ ] Progress bars with ETA
|
|
207
|
+
- [ ] Incremental scanning (delta detection)
|
|
208
|
+
- [ ] Schema-level inventory (Redshift Data API)
|
|
209
|
+
|
|
210
|
+
## 🤝 Contributing
|
|
211
|
+
|
|
212
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
213
|
+
|
|
214
|
+
## 📄 License
|
|
215
|
+
|
|
216
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nuvu-scan
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.6
|
|
4
4
|
Summary: Multi-Cloud Data Asset Control - Discover, govern, and optimize your cloud data assets across AWS and GCP
|
|
5
5
|
Project-URL: Homepage, https://nuvu.dev
|
|
6
6
|
Project-URL: Documentation, https://github.com/nuvudev/nuvu-scan#readme
|
|
@@ -48,6 +48,10 @@ Description-Content-Type: text/markdown
|
|
|
48
48
|
|
|
49
49
|
# Nuvu Scan
|
|
50
50
|
|
|
51
|
+
[](https://pypistats.org/packages/nuvu-scan)
|
|
52
|
+
[](https://github.com/nuvudev/nuvu-scan)
|
|
53
|
+
[](https://pypi.org/project/nuvu-scan/)
|
|
54
|
+
|
|
51
55
|
**Take Control of Your Cloud Data Estate**
|
|
52
56
|
Discover, govern, and optimize your cloud data assets across **AWS and GCP** — reduce wasted spend, enforce compliance, and gain full visibility into unused, idle, or risky resources.
|
|
53
57
|
|
|
@@ -79,28 +83,6 @@ pip install nuvu-scan
|
|
|
79
83
|
|
|
80
84
|
## Usage
|
|
81
85
|
|
|
82
|
-
### Optional: Push results to Nuvu Cloud
|
|
83
|
-
|
|
84
|
-
Nuvu Scan is fully open-source and runs standalone — no account required.
|
|
85
|
-
If you want dashboards, team workflows, and long‑term history, you can optionally push results to Nuvu Cloud.
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
# Push results to Nuvu Cloud (optional)
|
|
89
|
-
nuvu scan --provider aws --push --api-key your_nuvu_api_key
|
|
90
|
-
|
|
91
|
-
# Or use environment variable
|
|
92
|
-
export NUVU_API_KEY=your_nuvu_api_key
|
|
93
|
-
nuvu scan --provider aws --push
|
|
94
|
-
|
|
95
|
-
# Custom cloud URL (defaults to https://nuvu.dev)
|
|
96
|
-
nuvu scan --provider aws --push --nuvu-cloud-url https://nuvu.dev
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
What this means for open‑source users:
|
|
100
|
-
- You can keep everything local and export JSON/CSV/HTML.
|
|
101
|
-
- No cloud credentials are ever sent to Nuvu Cloud — only scan results.
|
|
102
|
-
- The data collected is identical whether you run locally or push.
|
|
103
|
-
|
|
104
86
|
### AWS Scanning
|
|
105
87
|
|
|
106
88
|
**Prerequisites:** Create an IAM user or role with the read-only policy from `aws-iam-policy.json`. See the [AWS Setup](#aws-v1---available-now) section below for detailed instructions.
|
|
@@ -120,11 +102,14 @@ nuvu scan --provider aws \
|
|
|
120
102
|
--access-key-id your-key \
|
|
121
103
|
--secret-access-key your-secret
|
|
122
104
|
|
|
123
|
-
# Output to JSON
|
|
124
|
-
nuvu scan --provider aws --output-format
|
|
105
|
+
# Output to HTML/JSON/CSV
|
|
106
|
+
nuvu scan --provider aws --output-format html --output-file report.json
|
|
125
107
|
|
|
126
108
|
# Scan specific regions
|
|
127
109
|
nuvu scan --provider aws --region us-east-1 --region eu-west-1
|
|
110
|
+
|
|
111
|
+
# Scan specific collector
|
|
112
|
+
nuvu scan --provider aws --output-format html --collectors redshift --region us-west-1
|
|
128
113
|
```
|
|
129
114
|
|
|
130
115
|
#### 2. Access Key + Secret Key + Session Token (Temporary Credentials)
|
|
@@ -203,6 +188,10 @@ You can optionally push scan results to a remote API for centralized tracking:
|
|
|
203
188
|
```bash
|
|
204
189
|
# Push results to a remote endpoint
|
|
205
190
|
nuvu scan --provider aws --push --api-key your-api-key --api-url https://your-api.example.com
|
|
191
|
+
|
|
192
|
+
# Push results to NUVU Cloud for Data Goverance layer
|
|
193
|
+
nuvu scan --provider aws --push --api-key your-api-key
|
|
194
|
+
|
|
206
195
|
```
|
|
207
196
|
|
|
208
197
|
This is useful for integrating with your own data governance platforms or CI/CD pipelines.
|
|
@@ -364,6 +353,29 @@ Nuvu requires read-only access to your GCP project via a Service Account. The to
|
|
|
364
353
|
### Azure, Databricks (Coming Soon)
|
|
365
354
|
Multi-cloud support is built into the architecture. Additional providers will be added in future releases.
|
|
366
355
|
|
|
356
|
+
### Optional: Push results to Nuvu Cloud
|
|
357
|
+
|
|
358
|
+
Nuvu Scan is fully open-source and runs standalone — no account required.
|
|
359
|
+
If you want dashboards, team workflows, data estate time travel and long‑term history, you can optionally push results to Nuvu Cloud.
|
|
360
|
+
|
|
361
|
+
```bash
|
|
362
|
+
# Push results to Nuvu Cloud (optional)
|
|
363
|
+
nuvu scan --provider aws --push --api-key your_nuvu_api_key
|
|
364
|
+
|
|
365
|
+
# Or use environment variable
|
|
366
|
+
export NUVU_API_KEY=your_nuvu_api_key
|
|
367
|
+
nuvu scan --provider aws --push
|
|
368
|
+
|
|
369
|
+
# Custom API URL (defaults to https://nuvu.dev)
|
|
370
|
+
nuvu scan --provider aws --push --api-url https://your-api.example.com
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
What this means for open‑source users:
|
|
374
|
+
- You can keep everything local and export JSON/CSV/HTML.
|
|
375
|
+
- No cloud credentials are ever sent to Nuvu Cloud — only scan results.
|
|
376
|
+
- The data collected is identical whether you run locally or push.
|
|
377
|
+
|
|
378
|
+
|
|
367
379
|
## License
|
|
368
380
|
|
|
369
381
|
Apache 2.0
|
|
@@ -413,14 +425,17 @@ uv run pytest tests/test_s3_collector.py
|
|
|
413
425
|
### Code Quality
|
|
414
426
|
|
|
415
427
|
```bash
|
|
416
|
-
# Format code with
|
|
417
|
-
uv run
|
|
428
|
+
# Format code with ruff
|
|
429
|
+
uv run ruff format .
|
|
418
430
|
|
|
419
431
|
# Lint with ruff
|
|
420
432
|
uv run ruff check .
|
|
421
433
|
|
|
422
434
|
# Type checking with mypy
|
|
423
435
|
uv run mypy nuvu_scan
|
|
436
|
+
|
|
437
|
+
# Run all pre-commit checks (including tests)
|
|
438
|
+
uv run pre-commit run --all-files
|
|
424
439
|
```
|
|
425
440
|
|
|
426
441
|
### Building the Package
|
|
@@ -503,11 +518,11 @@ git checkout -b fix/your-bug-description
|
|
|
503
518
|
|
|
504
519
|
### 3. Make Changes
|
|
505
520
|
|
|
506
|
-
- Follow the existing code style (enforced by
|
|
507
|
-
- Add tests for new features
|
|
521
|
+
- Follow the existing code style (enforced by ruff)
|
|
522
|
+
- **Add tests for new features** (required - pre-commit runs tests)
|
|
508
523
|
- Update documentation as needed
|
|
509
524
|
- Ensure all tests pass: `uv run pytest`
|
|
510
|
-
- Run code quality checks: `uv run
|
|
525
|
+
- Run code quality checks: `uv run ruff format . && uv run ruff check .`
|
|
511
526
|
|
|
512
527
|
### 4. Commit and Push
|
|
513
528
|
|