nuvu-scan 2.0.1__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nuvu_scan-2.1.2/.cursorrules +103 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.github/workflows/ci.yml +14 -14
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.pre-commit-config.yaml +11 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/CONTRIBUTING.md +39 -12
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/DEVELOPMENT_STATUS.md +22 -4
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/PKG-INFO +41 -30
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/README.md +40 -29
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/commands/scan.py +18 -7
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/html.py +141 -20
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/base.py +34 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/aws_scanner.py +52 -37
- nuvu_scan-2.1.2/nuvu_scan/core/providers/aws/collectors/athena.py +181 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/pyproject.toml +1 -1
- nuvu_scan-2.1.2/tests/test_cli.py +135 -0
- nuvu_scan-2.1.2/tests/test_formatters.py +172 -0
- nuvu_scan-2.1.2/tests/test_push_payload.py +232 -0
- nuvu_scan-2.1.2/tests/test_scanners.py +149 -0
- nuvu_scan-2.0.1/nuvu_scan/core/providers/aws/collectors/athena.py +0 -146
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.github/workflows/release.yml +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.gitignore +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/Makefile +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/RELEASE.md +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/commands/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/csv.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/json.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/main.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/analyzers/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/models/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/cost_explorer.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/iam.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/s3.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/bigquery.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/billing.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/dataproc.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/gcs.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/gemini.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/iam.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/pubsub.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/gcp_scanner.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/tests/__init__.py +0 -0
- {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/tests/test_base.py +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Nuvu Scan - AI Agent Instructions
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
Nuvu Scan is an open-source CLI tool for cloud data governance. It scans AWS and GCP to discover, govern, and optimize cloud data assets.
|
|
5
|
+
|
|
6
|
+
## Critical Rules for AI Agents
|
|
7
|
+
|
|
8
|
+
### 1. ALWAYS Write Tests for New Features
|
|
9
|
+
Every new feature, CLI option, or code change MUST include corresponding tests:
|
|
10
|
+
|
|
11
|
+
- **CLI changes**: Add tests in `tests/test_cli.py`
|
|
12
|
+
- **Formatter changes**: Add tests in `tests/test_formatters.py`
|
|
13
|
+
- **Scanner/collector changes**: Add tests in `tests/test_scanners.py`
|
|
14
|
+
- **API/push changes**: Add tests in `tests/test_push_payload.py`
|
|
15
|
+
- **New collectors**: Create `tests/test_<collector_name>.py`
|
|
16
|
+
|
|
17
|
+
Tests run automatically on every commit via pre-commit hooks. Commits will be blocked if tests fail.
|
|
18
|
+
|
|
19
|
+
### 2. Code Quality Standards
|
|
20
|
+
- Use `ruff` for linting and formatting (NOT black)
|
|
21
|
+
- Run `uv run ruff format .` before committing
|
|
22
|
+
- Run `uv run ruff check .` to check for issues
|
|
23
|
+
- Pre-commit hooks will auto-run: ruff, ruff-format, bandit, pytest
|
|
24
|
+
|
|
25
|
+
### 3. CLI Option Changes
|
|
26
|
+
When adding/modifying CLI options:
|
|
27
|
+
1. Add `@click.option()` decorator in `nuvu_scan/cli/commands/scan.py`
|
|
28
|
+
2. Add corresponding function parameter
|
|
29
|
+
3. Add test in `tests/test_cli.py` to verify option exists
|
|
30
|
+
4. Update `README.md` with usage examples
|
|
31
|
+
|
|
32
|
+
### 4. Push Payload Format (API Compatibility)
|
|
33
|
+
When modifying push functionality:
|
|
34
|
+
- Payload MUST match the schema expected by `/api/scans/import`
|
|
35
|
+
- Required fields: `provider`, `account_id`, `scan_timestamp`, `assets`, `total_cost_estimate_usd`
|
|
36
|
+
- Asset fields: `provider`, `asset_type`, `normalized_category`, `region`, `arn`, `name`
|
|
37
|
+
- Add tests in `tests/test_push_payload.py` to verify format
|
|
38
|
+
|
|
39
|
+
### 5. Normalized Categories
|
|
40
|
+
Use only these categories from `NormalizedCategory` enum:
|
|
41
|
+
- OBJECT_STORAGE, DATA_WAREHOUSE, STREAMING, COMPUTE, ML_TRAINING
|
|
42
|
+
- DATA_CATALOG, DATA_INTEGRATION, DATA_PIPELINE, DATA_SHARING
|
|
43
|
+
- QUERY_ENGINE, SEARCH, DATABASE, SECURITY, BILLING
|
|
44
|
+
|
|
45
|
+
### 6. Adding New Collectors
|
|
46
|
+
When adding a new AWS/GCP collector:
|
|
47
|
+
1. Create collector in `nuvu_scan/core/providers/<provider>/collectors/<name>.py`
|
|
48
|
+
2. Register in the scanner's collector list
|
|
49
|
+
3. Add to `--list-collectors` output
|
|
50
|
+
4. Update `README.md` with new service coverage
|
|
51
|
+
5. Create tests with mocked API responses
|
|
52
|
+
6. Update IAM policy if new permissions needed
|
|
53
|
+
|
|
54
|
+
### 7. Dependencies
|
|
55
|
+
- Use `uv` for package management (NOT pip directly)
|
|
56
|
+
- Add dependencies to `pyproject.toml`
|
|
57
|
+
- Run `uv sync --dev` after adding dependencies
|
|
58
|
+
|
|
59
|
+
### 8. Testing Commands
|
|
60
|
+
```bash
|
|
61
|
+
# Run all tests
|
|
62
|
+
uv run pytest
|
|
63
|
+
|
|
64
|
+
# Run with coverage
|
|
65
|
+
uv run pytest --cov=nuvu_scan
|
|
66
|
+
|
|
67
|
+
# Run specific test file
|
|
68
|
+
uv run pytest tests/test_cli.py
|
|
69
|
+
|
|
70
|
+
# Run pre-commit checks (includes tests)
|
|
71
|
+
uv run pre-commit run --all-files
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 9. File Structure
|
|
75
|
+
```
|
|
76
|
+
nuvu_scan/
|
|
77
|
+
├── cli/
|
|
78
|
+
│ ├── commands/scan.py # CLI commands and options
|
|
79
|
+
│ └── formatters/ # HTML, JSON, CSV output
|
|
80
|
+
├── core/
|
|
81
|
+
│ ├── base.py # Asset, ScanResult, NormalizedCategory
|
|
82
|
+
│ └── providers/
|
|
83
|
+
│ ├── aws/collectors/ # S3, Glue, Redshift, etc.
|
|
84
|
+
│ └── gcp/collectors/ # GCS, BigQuery, etc.
|
|
85
|
+
tests/
|
|
86
|
+
├── test_cli.py # CLI option tests
|
|
87
|
+
├── test_formatters.py # Output format tests
|
|
88
|
+
├── test_scanners.py # Scanner tests
|
|
89
|
+
└── test_push_payload.py # API payload tests
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 10. Commit Guidelines
|
|
93
|
+
- Pre-commit hooks will run automatically
|
|
94
|
+
- All tests must pass before commit is accepted
|
|
95
|
+
- Use conventional commit messages: `feat:`, `fix:`, `test:`, `docs:`, `chore:`
|
|
96
|
+
|
|
97
|
+
## Summary
|
|
98
|
+
**Before any code change is complete, ensure:**
|
|
99
|
+
1. ✅ Tests are written/updated
|
|
100
|
+
2. ✅ `uv run pytest` passes
|
|
101
|
+
3. ✅ `uv run ruff check .` passes
|
|
102
|
+
4. ✅ README.md is updated (if user-facing)
|
|
103
|
+
5. ✅ Pre-commit hooks pass on commit
|
|
@@ -36,38 +36,38 @@ jobs:
|
|
|
36
36
|
strategy:
|
|
37
37
|
matrix:
|
|
38
38
|
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
steps:
|
|
41
41
|
- uses: actions/checkout@v4
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
- name: Install uv
|
|
44
44
|
uses: astral-sh/setup-uv@v4
|
|
45
45
|
with:
|
|
46
46
|
version: "latest"
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
- name: Set up Python ${{ matrix.python-version }}
|
|
49
49
|
run: uv python install ${{ matrix.python-version }}
|
|
50
|
-
|
|
50
|
+
|
|
51
51
|
- name: Install dependencies
|
|
52
52
|
run: |
|
|
53
53
|
uv sync --dev
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
- name: Run linter
|
|
56
56
|
run: |
|
|
57
57
|
uv run ruff check .
|
|
58
|
-
uv run
|
|
59
|
-
|
|
58
|
+
uv run ruff format --check .
|
|
59
|
+
|
|
60
60
|
- name: Run type checker
|
|
61
61
|
run: |
|
|
62
62
|
uv run mypy nuvu_scan || true # Allow failures for now
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
- name: Run tests
|
|
65
65
|
run: |
|
|
66
66
|
uv run pytest --cov=nuvu_scan --cov-report=xml
|
|
67
67
|
env:
|
|
68
68
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
69
69
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
- name: Upload coverage
|
|
72
72
|
uses: codecov/codecov-action@v3
|
|
73
73
|
with:
|
|
@@ -78,22 +78,22 @@ jobs:
|
|
|
78
78
|
runs-on: ubuntu-latest
|
|
79
79
|
needs: test
|
|
80
80
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
81
|
-
|
|
81
|
+
|
|
82
82
|
steps:
|
|
83
83
|
- uses: actions/checkout@v4
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
- name: Install uv
|
|
86
86
|
uses: astral-sh/setup-uv@v4
|
|
87
87
|
with:
|
|
88
88
|
version: "latest"
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
- name: Set up Python
|
|
91
91
|
run: uv python install 3.11
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
- name: Build package
|
|
94
94
|
run: |
|
|
95
95
|
uv build
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
- name: Upload artifacts
|
|
98
98
|
uses: actions/upload-artifact@v4
|
|
99
99
|
with:
|
|
@@ -34,6 +34,17 @@ repos:
|
|
|
34
34
|
args: ["-c", "pyproject.toml"]
|
|
35
35
|
additional_dependencies: ["bandit[toml]"]
|
|
36
36
|
|
|
37
|
+
# Run tests before commit (only when Python files change)
|
|
38
|
+
- repo: local
|
|
39
|
+
hooks:
|
|
40
|
+
- id: pytest
|
|
41
|
+
name: pytest
|
|
42
|
+
entry: uv run pytest tests/ -x -q --tb=no
|
|
43
|
+
language: system
|
|
44
|
+
pass_filenames: false
|
|
45
|
+
types: [python]
|
|
46
|
+
stages: [pre-commit]
|
|
47
|
+
|
|
37
48
|
# Configuration
|
|
38
49
|
ci:
|
|
39
50
|
autofix_commit_msg: |
|
|
@@ -30,14 +30,22 @@ cd nuvu-scan
|
|
|
30
30
|
uv sync --dev # Creates .venv automatically, no activation needed!
|
|
31
31
|
```
|
|
32
32
|
|
|
33
|
-
### 2.
|
|
33
|
+
### 2. Install Pre-commit Hooks
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
uv run pre-commit install
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This ensures tests and linting run automatically on every commit.
|
|
40
|
+
|
|
41
|
+
### 3. Make Changes
|
|
34
42
|
|
|
35
43
|
- Write clear, readable code
|
|
36
|
-
- Follow existing code style (enforced by
|
|
37
|
-
- Add tests for new functionality
|
|
44
|
+
- Follow existing code style (enforced by ruff)
|
|
45
|
+
- **⚠️ Add tests for new functionality** (required - commits will fail without tests)
|
|
38
46
|
- Update documentation
|
|
39
47
|
|
|
40
|
-
###
|
|
48
|
+
### 4. Test Your Changes
|
|
41
49
|
|
|
42
50
|
```bash
|
|
43
51
|
# Run all tests (uv automatically uses .venv)
|
|
@@ -47,23 +55,30 @@ uv run pytest
|
|
|
47
55
|
uv run pytest --cov=nuvu_scan
|
|
48
56
|
|
|
49
57
|
# Check code quality
|
|
50
|
-
uv run
|
|
58
|
+
uv run ruff format .
|
|
51
59
|
uv run ruff check .
|
|
52
60
|
uv run mypy nuvu_scan
|
|
61
|
+
|
|
62
|
+
# Run all pre-commit checks (recommended)
|
|
63
|
+
uv run pre-commit run --all-files
|
|
53
64
|
```
|
|
54
65
|
|
|
55
66
|
**Note**: No need to activate `.venv` - `uv run` handles it automatically!
|
|
56
67
|
|
|
57
|
-
###
|
|
68
|
+
### 5. Commit
|
|
69
|
+
|
|
70
|
+
Pre-commit hooks will automatically run ruff, bandit, and pytest. If any check fails, the commit will be blocked.
|
|
58
71
|
|
|
59
|
-
Use
|
|
72
|
+
Use conventional commit messages:
|
|
60
73
|
|
|
61
74
|
```bash
|
|
62
|
-
git commit -m "
|
|
63
|
-
git commit -m "
|
|
75
|
+
git commit -m "feat: add GCP BigQuery collector"
|
|
76
|
+
git commit -m "fix: correct S3 bucket size calculation"
|
|
77
|
+
git commit -m "test: add tests for Redshift collector"
|
|
78
|
+
git commit -m "docs: update CLI options in README"
|
|
64
79
|
```
|
|
65
80
|
|
|
66
|
-
###
|
|
81
|
+
### 6. Push and Create PR
|
|
67
82
|
|
|
68
83
|
```bash
|
|
69
84
|
git push origin feature/your-feature
|
|
@@ -93,11 +108,23 @@ See the detailed guide in README.md under "Adding a New Cloud Provider".
|
|
|
93
108
|
|
|
94
109
|
## Code Style
|
|
95
110
|
|
|
96
|
-
- **Formatting**: Use `
|
|
97
|
-
- **Linting**: Use `ruff`
|
|
111
|
+
- **Formatting**: Use `ruff format`
|
|
112
|
+
- **Linting**: Use `ruff check`
|
|
98
113
|
- **Type hints**: Add type hints where helpful
|
|
99
114
|
- **Docstrings**: Add docstrings for public functions/classes
|
|
100
115
|
|
|
116
|
+
## Testing Requirements
|
|
117
|
+
|
|
118
|
+
**Every new feature MUST include tests.** Pre-commit hooks run `pytest` automatically.
|
|
119
|
+
|
|
120
|
+
| Change Type | Test File |
|
|
121
|
+
|-------------|-----------|
|
|
122
|
+
| CLI options | `tests/test_cli.py` |
|
|
123
|
+
| Formatters (HTML/JSON/CSV) | `tests/test_formatters.py` |
|
|
124
|
+
| Scanners/Collectors | `tests/test_scanners.py` |
|
|
125
|
+
| Push/API changes | `tests/test_push_payload.py` |
|
|
126
|
+
| New collector | `tests/test_<collector>.py` |
|
|
127
|
+
|
|
101
128
|
## Pull Request Process
|
|
102
129
|
|
|
103
130
|
1. Ensure all tests pass
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
|
|
57
57
|
#### Redshift Collector (Major Enhancement in v2.0.0)
|
|
58
58
|
- ✅ **Provisioned Clusters** (Enhanced)
|
|
59
|
-
- Lists all clusters
|
|
59
|
+
- Lists all clusters across ALL regions (auto-discovery)
|
|
60
60
|
- Node type, count, encryption status
|
|
61
61
|
- CloudWatch-based activity tracking (DatabaseConnections, CPUUtilization)
|
|
62
62
|
- Cluster age calculation
|
|
@@ -64,11 +64,22 @@
|
|
|
64
64
|
- **Reservation coverage analysis** - checks if covered by reserved nodes
|
|
65
65
|
- **WLM configuration analysis** - queue count, auto WLM, unlimited queues
|
|
66
66
|
- Potential reservation savings calculation (40% estimate)
|
|
67
|
-
-
|
|
68
|
-
-
|
|
67
|
+
- **Performance metrics from CloudWatch** (NEW)
|
|
68
|
+
- CPU utilization max/avg (24h)
|
|
69
|
+
- Queries completed (24h)
|
|
70
|
+
- Disk space usage percentage
|
|
71
|
+
- Query duration and queue time
|
|
72
|
+
- Performance recommendations (right-sizing)
|
|
73
|
+
- Risk flags: `publicly_accessible`, `unencrypted`, `low_activity`, `potentially_unused`, `no_reservation_long_running`, `default_wlm_only`, `unlimited_wlm_queue`, `low_cpu_utilization`
|
|
74
|
+
- ✅ **Redshift Serverless** (Enhanced)
|
|
69
75
|
- Namespaces with encryption status
|
|
70
76
|
- Workgroups with base capacity and cost estimation
|
|
71
|
-
-
|
|
77
|
+
- **RPU utilization metrics from CloudWatch** (NEW)
|
|
78
|
+
- RPU max/avg (24h and 7d)
|
|
79
|
+
- Queries completed/failed (24h)
|
|
80
|
+
- Query duration metrics
|
|
81
|
+
- Utilization recommendations for capacity right-sizing
|
|
82
|
+
- Risk flags: `publicly_accessible`, `low_rpu_utilization`, `high_query_failure_rate`
|
|
72
83
|
- ✅ **Redshift Datashares** (NEW)
|
|
73
84
|
- Lists all datashares (inbound and outbound)
|
|
74
85
|
- Consumer account identification
|
|
@@ -175,6 +186,7 @@
|
|
|
175
186
|
- ✅ **Progress Logging** - Real-time status updates during collection
|
|
176
187
|
|
|
177
188
|
### Enhanced HTML Reports (v2.0.0)
|
|
189
|
+
- ✅ **Scan Scope Section** (NEW) - Shows which collectors and regions were scanned
|
|
178
190
|
- ✅ **Executive Summary** with key metrics
|
|
179
191
|
- ✅ **Cost Optimization Section**
|
|
180
192
|
- Snapshot cost analysis with old snapshot flagging
|
|
@@ -184,8 +196,11 @@
|
|
|
184
196
|
- Stale/unused crawlers and ETL jobs
|
|
185
197
|
- Cross-account data sharing alerts
|
|
186
198
|
- WLM configuration review
|
|
199
|
+
- **Cluster Performance table** (NEW) - CPU, queries, disk, recommendations
|
|
200
|
+
- **Serverless Workgroup Utilization table** (NEW) - RPU metrics, recommendations
|
|
187
201
|
- ✅ Improved styling with insight boxes (warning, alert, info)
|
|
188
202
|
- ✅ Potential savings card in summary
|
|
203
|
+
- ✅ **Footer with nuvu-scan attribution** and GitHub repository link
|
|
189
204
|
|
|
190
205
|
### New Asset Categories (v2.0.0)
|
|
191
206
|
- ✅ `DATA_PIPELINE` - ETL jobs, crawlers, workflows
|
|
@@ -223,6 +238,9 @@
|
|
|
223
238
|
| Redshift | `reservation_expiring_soon` | Reserved node expires within 30 days |
|
|
224
239
|
| Redshift | `default_wlm_only` | Cluster using only default WLM queue |
|
|
225
240
|
| Redshift | `unlimited_wlm_queue` | WLM queue with no concurrency limit |
|
|
241
|
+
| Redshift | `low_cpu_utilization` | Cluster CPU never exceeds 20% (right-sizing opportunity) |
|
|
242
|
+
| Redshift Serverless | `low_rpu_utilization` | RPU usage below 50% of base capacity |
|
|
243
|
+
| Redshift Serverless | `high_query_failure_rate` | >10% of queries failing |
|
|
226
244
|
|
|
227
245
|
### Cost Tracking & Reporting
|
|
228
246
|
- ✅ Asset-level cost estimation for all resources
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nuvu-scan
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: Multi-Cloud Data Asset Control - Discover, govern, and optimize your cloud data assets across AWS and GCP
|
|
5
5
|
Project-URL: Homepage, https://nuvu.dev
|
|
6
6
|
Project-URL: Documentation, https://github.com/nuvudev/nuvu-scan#readme
|
|
@@ -79,28 +79,6 @@ pip install nuvu-scan
|
|
|
79
79
|
|
|
80
80
|
## Usage
|
|
81
81
|
|
|
82
|
-
### Optional: Push results to Nuvu Cloud
|
|
83
|
-
|
|
84
|
-
Nuvu Scan is fully open-source and runs standalone — no account required.
|
|
85
|
-
If you want dashboards, team workflows, and long‑term history, you can optionally push results to Nuvu Cloud.
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
# Push results to Nuvu Cloud (optional)
|
|
89
|
-
nuvu scan --provider aws --push --api-key your_nuvu_api_key
|
|
90
|
-
|
|
91
|
-
# Or use environment variable
|
|
92
|
-
export NUVU_API_KEY=your_nuvu_api_key
|
|
93
|
-
nuvu scan --provider aws --push
|
|
94
|
-
|
|
95
|
-
# Custom cloud URL (defaults to https://nuvu.dev)
|
|
96
|
-
nuvu scan --provider aws --push --nuvu-cloud-url https://nuvu.dev
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
What this means for open‑source users:
|
|
100
|
-
- You can keep everything local and export JSON/CSV/HTML.
|
|
101
|
-
- No cloud credentials are ever sent to Nuvu Cloud — only scan results.
|
|
102
|
-
- The data collected is identical whether you run locally or push.
|
|
103
|
-
|
|
104
82
|
### AWS Scanning
|
|
105
83
|
|
|
106
84
|
**Prerequisites:** Create an IAM user or role with the read-only policy from `aws-iam-policy.json`. See the [AWS Setup](#aws-v1---available-now) section below for detailed instructions.
|
|
@@ -120,11 +98,14 @@ nuvu scan --provider aws \
|
|
|
120
98
|
--access-key-id your-key \
|
|
121
99
|
--secret-access-key your-secret
|
|
122
100
|
|
|
123
|
-
# Output to JSON
|
|
124
|
-
nuvu scan --provider aws --output-format
|
|
101
|
+
# Output to HTML/JSON/CSV
|
|
102
|
+
nuvu scan --provider aws --output-format html --output-file report.json
|
|
125
103
|
|
|
126
104
|
# Scan specific regions
|
|
127
105
|
nuvu scan --provider aws --region us-east-1 --region eu-west-1
|
|
106
|
+
|
|
107
|
+
# Scan specific collector
|
|
108
|
+
nuvu scan --provider aws --output-format html --collectors redshift --region us-west-1
|
|
128
109
|
```
|
|
129
110
|
|
|
130
111
|
#### 2. Access Key + Secret Key + Session Token (Temporary Credentials)
|
|
@@ -203,6 +184,10 @@ You can optionally push scan results to a remote API for centralized tracking:
|
|
|
203
184
|
```bash
|
|
204
185
|
# Push results to a remote endpoint
|
|
205
186
|
nuvu scan --provider aws --push --api-key your-api-key --api-url https://your-api.example.com
|
|
187
|
+
|
|
188
|
+
# Push results to NUVU Cloud for Data Goverance layer
|
|
189
|
+
nuvu scan --provider aws --push --api-key your-api-key
|
|
190
|
+
|
|
206
191
|
```
|
|
207
192
|
|
|
208
193
|
This is useful for integrating with your own data governance platforms or CI/CD pipelines.
|
|
@@ -364,6 +349,29 @@ Nuvu requires read-only access to your GCP project via a Service Account. The to
|
|
|
364
349
|
### Azure, Databricks (Coming Soon)
|
|
365
350
|
Multi-cloud support is built into the architecture. Additional providers will be added in future releases.
|
|
366
351
|
|
|
352
|
+
### Optional: Push results to Nuvu Cloud
|
|
353
|
+
|
|
354
|
+
Nuvu Scan is fully open-source and runs standalone — no account required.
|
|
355
|
+
If you want dashboards, team workflows, data estate time travel and long‑term history, you can optionally push results to Nuvu Cloud.
|
|
356
|
+
|
|
357
|
+
```bash
|
|
358
|
+
# Push results to Nuvu Cloud (optional)
|
|
359
|
+
nuvu scan --provider aws --push --api-key your_nuvu_api_key
|
|
360
|
+
|
|
361
|
+
# Or use environment variable
|
|
362
|
+
export NUVU_API_KEY=your_nuvu_api_key
|
|
363
|
+
nuvu scan --provider aws --push
|
|
364
|
+
|
|
365
|
+
# Custom API URL (defaults to https://nuvu.dev)
|
|
366
|
+
nuvu scan --provider aws --push --api-url https://your-api.example.com
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
What this means for open‑source users:
|
|
370
|
+
- You can keep everything local and export JSON/CSV/HTML.
|
|
371
|
+
- No cloud credentials are ever sent to Nuvu Cloud — only scan results.
|
|
372
|
+
- The data collected is identical whether you run locally or push.
|
|
373
|
+
|
|
374
|
+
|
|
367
375
|
## License
|
|
368
376
|
|
|
369
377
|
Apache 2.0
|
|
@@ -413,14 +421,17 @@ uv run pytest tests/test_s3_collector.py
|
|
|
413
421
|
### Code Quality
|
|
414
422
|
|
|
415
423
|
```bash
|
|
416
|
-
# Format code with
|
|
417
|
-
uv run
|
|
424
|
+
# Format code with ruff
|
|
425
|
+
uv run ruff format .
|
|
418
426
|
|
|
419
427
|
# Lint with ruff
|
|
420
428
|
uv run ruff check .
|
|
421
429
|
|
|
422
430
|
# Type checking with mypy
|
|
423
431
|
uv run mypy nuvu_scan
|
|
432
|
+
|
|
433
|
+
# Run all pre-commit checks (including tests)
|
|
434
|
+
uv run pre-commit run --all-files
|
|
424
435
|
```
|
|
425
436
|
|
|
426
437
|
### Building the Package
|
|
@@ -503,11 +514,11 @@ git checkout -b fix/your-bug-description
|
|
|
503
514
|
|
|
504
515
|
### 3. Make Changes
|
|
505
516
|
|
|
506
|
-
- Follow the existing code style (enforced by
|
|
507
|
-
- Add tests for new features
|
|
517
|
+
- Follow the existing code style (enforced by ruff)
|
|
518
|
+
- **Add tests for new features** (required - pre-commit runs tests)
|
|
508
519
|
- Update documentation as needed
|
|
509
520
|
- Ensure all tests pass: `uv run pytest`
|
|
510
|
-
- Run code quality checks: `uv run
|
|
521
|
+
- Run code quality checks: `uv run ruff format . && uv run ruff check .`
|
|
511
522
|
|
|
512
523
|
### 4. Commit and Push
|
|
513
524
|
|
|
@@ -31,28 +31,6 @@ pip install nuvu-scan
|
|
|
31
31
|
|
|
32
32
|
## Usage
|
|
33
33
|
|
|
34
|
-
### Optional: Push results to Nuvu Cloud
|
|
35
|
-
|
|
36
|
-
Nuvu Scan is fully open-source and runs standalone — no account required.
|
|
37
|
-
If you want dashboards, team workflows, and long‑term history, you can optionally push results to Nuvu Cloud.
|
|
38
|
-
|
|
39
|
-
```bash
|
|
40
|
-
# Push results to Nuvu Cloud (optional)
|
|
41
|
-
nuvu scan --provider aws --push --api-key your_nuvu_api_key
|
|
42
|
-
|
|
43
|
-
# Or use environment variable
|
|
44
|
-
export NUVU_API_KEY=your_nuvu_api_key
|
|
45
|
-
nuvu scan --provider aws --push
|
|
46
|
-
|
|
47
|
-
# Custom cloud URL (defaults to https://nuvu.dev)
|
|
48
|
-
nuvu scan --provider aws --push --nuvu-cloud-url https://nuvu.dev
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
What this means for open‑source users:
|
|
52
|
-
- You can keep everything local and export JSON/CSV/HTML.
|
|
53
|
-
- No cloud credentials are ever sent to Nuvu Cloud — only scan results.
|
|
54
|
-
- The data collected is identical whether you run locally or push.
|
|
55
|
-
|
|
56
34
|
### AWS Scanning
|
|
57
35
|
|
|
58
36
|
**Prerequisites:** Create an IAM user or role with the read-only policy from `aws-iam-policy.json`. See the [AWS Setup](#aws-v1---available-now) section below for detailed instructions.
|
|
@@ -72,11 +50,14 @@ nuvu scan --provider aws \
|
|
|
72
50
|
--access-key-id your-key \
|
|
73
51
|
--secret-access-key your-secret
|
|
74
52
|
|
|
75
|
-
# Output to JSON
|
|
76
|
-
nuvu scan --provider aws --output-format
|
|
53
|
+
# Output to HTML/JSON/CSV
|
|
54
|
+
nuvu scan --provider aws --output-format html --output-file report.json
|
|
77
55
|
|
|
78
56
|
# Scan specific regions
|
|
79
57
|
nuvu scan --provider aws --region us-east-1 --region eu-west-1
|
|
58
|
+
|
|
59
|
+
# Scan specific collector
|
|
60
|
+
nuvu scan --provider aws --output-format html --collectors redshift --region us-west-1
|
|
80
61
|
```
|
|
81
62
|
|
|
82
63
|
#### 2. Access Key + Secret Key + Session Token (Temporary Credentials)
|
|
@@ -155,6 +136,10 @@ You can optionally push scan results to a remote API for centralized tracking:
|
|
|
155
136
|
```bash
|
|
156
137
|
# Push results to a remote endpoint
|
|
157
138
|
nuvu scan --provider aws --push --api-key your-api-key --api-url https://your-api.example.com
|
|
139
|
+
|
|
140
|
+
# Push results to NUVU Cloud for Data Goverance layer
|
|
141
|
+
nuvu scan --provider aws --push --api-key your-api-key
|
|
142
|
+
|
|
158
143
|
```
|
|
159
144
|
|
|
160
145
|
This is useful for integrating with your own data governance platforms or CI/CD pipelines.
|
|
@@ -316,6 +301,29 @@ Nuvu requires read-only access to your GCP project via a Service Account. The to
|
|
|
316
301
|
### Azure, Databricks (Coming Soon)
|
|
317
302
|
Multi-cloud support is built into the architecture. Additional providers will be added in future releases.
|
|
318
303
|
|
|
304
|
+
### Optional: Push results to Nuvu Cloud
|
|
305
|
+
|
|
306
|
+
Nuvu Scan is fully open-source and runs standalone — no account required.
|
|
307
|
+
If you want dashboards, team workflows, data estate time travel and long‑term history, you can optionally push results to Nuvu Cloud.
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
# Push results to Nuvu Cloud (optional)
|
|
311
|
+
nuvu scan --provider aws --push --api-key your_nuvu_api_key
|
|
312
|
+
|
|
313
|
+
# Or use environment variable
|
|
314
|
+
export NUVU_API_KEY=your_nuvu_api_key
|
|
315
|
+
nuvu scan --provider aws --push
|
|
316
|
+
|
|
317
|
+
# Custom API URL (defaults to https://nuvu.dev)
|
|
318
|
+
nuvu scan --provider aws --push --api-url https://your-api.example.com
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
What this means for open‑source users:
|
|
322
|
+
- You can keep everything local and export JSON/CSV/HTML.
|
|
323
|
+
- No cloud credentials are ever sent to Nuvu Cloud — only scan results.
|
|
324
|
+
- The data collected is identical whether you run locally or push.
|
|
325
|
+
|
|
326
|
+
|
|
319
327
|
## License
|
|
320
328
|
|
|
321
329
|
Apache 2.0
|
|
@@ -365,14 +373,17 @@ uv run pytest tests/test_s3_collector.py
|
|
|
365
373
|
### Code Quality
|
|
366
374
|
|
|
367
375
|
```bash
|
|
368
|
-
# Format code with
|
|
369
|
-
uv run
|
|
376
|
+
# Format code with ruff
|
|
377
|
+
uv run ruff format .
|
|
370
378
|
|
|
371
379
|
# Lint with ruff
|
|
372
380
|
uv run ruff check .
|
|
373
381
|
|
|
374
382
|
# Type checking with mypy
|
|
375
383
|
uv run mypy nuvu_scan
|
|
384
|
+
|
|
385
|
+
# Run all pre-commit checks (including tests)
|
|
386
|
+
uv run pre-commit run --all-files
|
|
376
387
|
```
|
|
377
388
|
|
|
378
389
|
### Building the Package
|
|
@@ -455,11 +466,11 @@ git checkout -b fix/your-bug-description
|
|
|
455
466
|
|
|
456
467
|
### 3. Make Changes
|
|
457
468
|
|
|
458
|
-
- Follow the existing code style (enforced by
|
|
459
|
-
- Add tests for new features
|
|
469
|
+
- Follow the existing code style (enforced by ruff)
|
|
470
|
+
- **Add tests for new features** (required - pre-commit runs tests)
|
|
460
471
|
- Update documentation as needed
|
|
461
472
|
- Ensure all tests pass: `uv run pytest`
|
|
462
|
-
- Run code quality checks: `uv run
|
|
473
|
+
- Run code quality checks: `uv run ruff format . && uv run ruff check .`
|
|
463
474
|
|
|
464
475
|
### 4. Commit and Push
|
|
465
476
|
|
|
@@ -2,12 +2,9 @@
|
|
|
2
2
|
Scan command for Nuvu CLI.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
5
|
import os
|
|
7
6
|
import sys
|
|
8
7
|
from datetime import datetime
|
|
9
|
-
from urllib.error import HTTPError, URLError
|
|
10
|
-
from urllib.request import Request, urlopen
|
|
11
8
|
|
|
12
9
|
import click
|
|
13
10
|
|
|
@@ -111,6 +108,11 @@ from ..formatters.json import JSONFormatter
|
|
|
111
108
|
default="https://nuvu.dev",
|
|
112
109
|
help="Nuvu Cloud API URL (default: https://nuvu.dev)",
|
|
113
110
|
)
|
|
111
|
+
@click.option(
|
|
112
|
+
"--list-collectors",
|
|
113
|
+
is_flag=True,
|
|
114
|
+
help="List available collectors for the specified provider and exit.",
|
|
115
|
+
)
|
|
114
116
|
def scan_command(
|
|
115
117
|
provider: str,
|
|
116
118
|
output_format: str,
|
|
@@ -333,8 +335,12 @@ def scan_command(
|
|
|
333
335
|
{
|
|
334
336
|
"provider": asset.provider,
|
|
335
337
|
"asset_type": asset.asset_type,
|
|
336
|
-
"normalized_category": asset.normalized_category.value
|
|
337
|
-
|
|
338
|
+
"normalized_category": asset.normalized_category.value
|
|
339
|
+
if asset.normalized_category
|
|
340
|
+
else "unknown",
|
|
341
|
+
"service": asset.service or asset.asset_type.split("_")[0]
|
|
342
|
+
if asset.asset_type
|
|
343
|
+
else "unknown",
|
|
338
344
|
"region": asset.region,
|
|
339
345
|
"arn": asset.arn,
|
|
340
346
|
"name": asset.name,
|
|
@@ -343,6 +349,7 @@ def scan_command(
|
|
|
343
349
|
"size_bytes": asset.size_bytes,
|
|
344
350
|
"tags": asset.tags,
|
|
345
351
|
"cost_estimate_usd": asset.cost_estimate_usd,
|
|
352
|
+
"usage_metrics": asset.usage_metrics, # Include all usage metrics
|
|
346
353
|
"risk_flags": asset.risk_flags,
|
|
347
354
|
"ownership_confidence": asset.ownership_confidence or "unknown",
|
|
348
355
|
"suggested_owner": asset.suggested_owner,
|
|
@@ -352,7 +359,8 @@ def scan_command(
|
|
|
352
359
|
}
|
|
353
360
|
|
|
354
361
|
# Push to API using the /api/scans/import endpoint
|
|
355
|
-
|
|
362
|
+
# Use longer timeout for large scans (2000+ assets can take minutes)
|
|
363
|
+
with httpx.Client(timeout=300) as client:
|
|
356
364
|
response = client.post(
|
|
357
365
|
f"{api_url.rstrip('/')}/api/scans/import",
|
|
358
366
|
json=payload,
|
|
@@ -368,7 +376,10 @@ def scan_command(
|
|
|
368
376
|
err=True,
|
|
369
377
|
)
|
|
370
378
|
except httpx.HTTPStatusError as e:
|
|
371
|
-
click.echo(
|
|
379
|
+
click.echo(
|
|
380
|
+
f"Error pushing to Nuvu Cloud: {e.response.status_code} - {e.response.text}",
|
|
381
|
+
err=True,
|
|
382
|
+
)
|
|
372
383
|
sys.exit(1)
|
|
373
384
|
except Exception as e:
|
|
374
385
|
click.echo(f"Error pushing to Nuvu Cloud: {e}", err=True)
|