nuvu-scan 2.0.1__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. nuvu_scan-2.1.2/.cursorrules +103 -0
  2. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.github/workflows/ci.yml +14 -14
  3. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.pre-commit-config.yaml +11 -0
  4. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/CONTRIBUTING.md +39 -12
  5. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/DEVELOPMENT_STATUS.md +22 -4
  6. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/PKG-INFO +41 -30
  7. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/README.md +40 -29
  8. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/commands/scan.py +18 -7
  9. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/html.py +141 -20
  10. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/base.py +34 -0
  11. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/aws_scanner.py +52 -37
  12. nuvu_scan-2.1.2/nuvu_scan/core/providers/aws/collectors/athena.py +181 -0
  13. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
  14. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
  15. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
  16. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/pyproject.toml +1 -1
  17. nuvu_scan-2.1.2/tests/test_cli.py +135 -0
  18. nuvu_scan-2.1.2/tests/test_formatters.py +172 -0
  19. nuvu_scan-2.1.2/tests/test_push_payload.py +232 -0
  20. nuvu_scan-2.1.2/tests/test_scanners.py +149 -0
  21. nuvu_scan-2.0.1/nuvu_scan/core/providers/aws/collectors/athena.py +0 -146
  22. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  23. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.github/workflows/release.yml +0 -0
  24. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/.gitignore +0 -0
  25. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/Makefile +0 -0
  26. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/RELEASE.md +0 -0
  27. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/__init__.py +0 -0
  28. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/__init__.py +0 -0
  29. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/commands/__init__.py +0 -0
  30. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/__init__.py +0 -0
  31. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/csv.py +0 -0
  32. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/formatters/json.py +0 -0
  33. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/cli/main.py +0 -0
  34. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/__init__.py +0 -0
  35. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/analyzers/__init__.py +0 -0
  36. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/models/__init__.py +0 -0
  37. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/__init__.py +0 -0
  38. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/__init__.py +0 -0
  39. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/__init__.py +0 -0
  40. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/cost_explorer.py +0 -0
  41. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/iam.py +0 -0
  42. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/aws/collectors/s3.py +0 -0
  43. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/__init__.py +0 -0
  44. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/__init__.py +0 -0
  45. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/bigquery.py +0 -0
  46. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/billing.py +0 -0
  47. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/dataproc.py +0 -0
  48. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/gcs.py +0 -0
  49. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/gemini.py +0 -0
  50. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/iam.py +0 -0
  51. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/collectors/pubsub.py +0 -0
  52. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/nuvu_scan/core/providers/gcp/gcp_scanner.py +0 -0
  53. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/tests/__init__.py +0 -0
  54. {nuvu_scan-2.0.1 → nuvu_scan-2.1.2}/tests/test_base.py +0 -0
@@ -0,0 +1,103 @@
1
+ # Nuvu Scan - AI Agent Instructions
2
+
3
+ ## Project Overview
4
+ Nuvu Scan is an open-source CLI tool for cloud data governance. It scans AWS and GCP to discover, govern, and optimize cloud data assets.
5
+
6
+ ## Critical Rules for AI Agents
7
+
8
+ ### 1. ALWAYS Write Tests for New Features
9
+ Every new feature, CLI option, or code change MUST include corresponding tests:
10
+
11
+ - **CLI changes**: Add tests in `tests/test_cli.py`
12
+ - **Formatter changes**: Add tests in `tests/test_formatters.py`
13
+ - **Scanner/collector changes**: Add tests in `tests/test_scanners.py`
14
+ - **API/push changes**: Add tests in `tests/test_push_payload.py`
15
+ - **New collectors**: Create `tests/test_<collector_name>.py`
16
+
17
+ Tests run automatically on every commit via pre-commit hooks. Commits will be blocked if tests fail.
18
+
19
+ ### 2. Code Quality Standards
20
+ - Use `ruff` for linting and formatting (NOT black)
21
+ - Run `uv run ruff format .` before committing
22
+ - Run `uv run ruff check .` to check for issues
23
+ - Pre-commit hooks will auto-run: ruff, ruff-format, bandit, pytest
24
+
25
+ ### 3. CLI Option Changes
26
+ When adding/modifying CLI options:
27
+ 1. Add `@click.option()` decorator in `nuvu_scan/cli/commands/scan.py`
28
+ 2. Add corresponding function parameter
29
+ 3. Add test in `tests/test_cli.py` to verify option exists
30
+ 4. Update `README.md` with usage examples
31
+
32
+ ### 4. Push Payload Format (API Compatibility)
33
+ When modifying push functionality:
34
+ - Payload MUST match the schema expected by `/api/scans/import`
35
+ - Required fields: `provider`, `account_id`, `scan_timestamp`, `assets`, `total_cost_estimate_usd`
36
+ - Asset fields: `provider`, `asset_type`, `normalized_category`, `region`, `arn`, `name`
37
+ - Add tests in `tests/test_push_payload.py` to verify format
38
+
39
+ ### 5. Normalized Categories
40
+ Use only these categories from `NormalizedCategory` enum:
41
+ - OBJECT_STORAGE, DATA_WAREHOUSE, STREAMING, COMPUTE, ML_TRAINING
42
+ - DATA_CATALOG, DATA_INTEGRATION, DATA_PIPELINE, DATA_SHARING
43
+ - QUERY_ENGINE, SEARCH, DATABASE, SECURITY, BILLING
44
+
45
+ ### 6. Adding New Collectors
46
+ When adding a new AWS/GCP collector:
47
+ 1. Create collector in `nuvu_scan/core/providers/<provider>/collectors/<name>.py`
48
+ 2. Register in the scanner's collector list
49
+ 3. Add to `--list-collectors` output
50
+ 4. Update `README.md` with new service coverage
51
+ 5. Create tests with mocked API responses
52
+ 6. Update IAM policy if new permissions needed
53
+
54
+ ### 7. Dependencies
55
+ - Use `uv` for package management (NOT pip directly)
56
+ - Add dependencies to `pyproject.toml`
57
+ - Run `uv sync --dev` after adding dependencies
58
+
59
+ ### 8. Testing Commands
60
+ ```bash
61
+ # Run all tests
62
+ uv run pytest
63
+
64
+ # Run with coverage
65
+ uv run pytest --cov=nuvu_scan
66
+
67
+ # Run specific test file
68
+ uv run pytest tests/test_cli.py
69
+
70
+ # Run pre-commit checks (includes tests)
71
+ uv run pre-commit run --all-files
72
+ ```
73
+
74
+ ### 9. File Structure
75
+ ```
76
+ nuvu_scan/
77
+ ├── cli/
78
+ │ ├── commands/scan.py # CLI commands and options
79
+ │ └── formatters/ # HTML, JSON, CSV output
80
+ ├── core/
81
+ │ ├── base.py # Asset, ScanResult, NormalizedCategory
82
+ │ └── providers/
83
+ │ ├── aws/collectors/ # S3, Glue, Redshift, etc.
84
+ │ └── gcp/collectors/ # GCS, BigQuery, etc.
85
+ tests/
86
+ ├── test_cli.py # CLI option tests
87
+ ├── test_formatters.py # Output format tests
88
+ ├── test_scanners.py # Scanner tests
89
+ └── test_push_payload.py # API payload tests
90
+ ```
91
+
92
+ ### 10. Commit Guidelines
93
+ - Pre-commit hooks will run automatically
94
+ - All tests must pass before commit is accepted
95
+ - Use conventional commit messages: `feat:`, `fix:`, `test:`, `docs:`, `chore:`
96
+
97
+ ## Summary
98
+ **Before any code change is complete, ensure:**
99
+ 1. ✅ Tests are written/updated
100
+ 2. ✅ `uv run pytest` passes
101
+ 3. ✅ `uv run ruff check .` passes
102
+ 4. ✅ README.md is updated (if user-facing)
103
+ 5. ✅ Pre-commit hooks pass on commit
@@ -36,38 +36,38 @@ jobs:
36
36
  strategy:
37
37
  matrix:
38
38
  python-version: ["3.10", "3.11", "3.12", "3.13"]
39
-
39
+
40
40
  steps:
41
41
  - uses: actions/checkout@v4
42
-
42
+
43
43
  - name: Install uv
44
44
  uses: astral-sh/setup-uv@v4
45
45
  with:
46
46
  version: "latest"
47
-
47
+
48
48
  - name: Set up Python ${{ matrix.python-version }}
49
49
  run: uv python install ${{ matrix.python-version }}
50
-
50
+
51
51
  - name: Install dependencies
52
52
  run: |
53
53
  uv sync --dev
54
-
54
+
55
55
  - name: Run linter
56
56
  run: |
57
57
  uv run ruff check .
58
- uv run black --check .
59
-
58
+ uv run ruff format --check .
59
+
60
60
  - name: Run type checker
61
61
  run: |
62
62
  uv run mypy nuvu_scan || true # Allow failures for now
63
-
63
+
64
64
  - name: Run tests
65
65
  run: |
66
66
  uv run pytest --cov=nuvu_scan --cov-report=xml
67
67
  env:
68
68
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
69
69
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
70
-
70
+
71
71
  - name: Upload coverage
72
72
  uses: codecov/codecov-action@v3
73
73
  with:
@@ -78,22 +78,22 @@ jobs:
78
78
  runs-on: ubuntu-latest
79
79
  needs: test
80
80
  if: github.event_name == 'push' && github.ref == 'refs/heads/main'
81
-
81
+
82
82
  steps:
83
83
  - uses: actions/checkout@v4
84
-
84
+
85
85
  - name: Install uv
86
86
  uses: astral-sh/setup-uv@v4
87
87
  with:
88
88
  version: "latest"
89
-
89
+
90
90
  - name: Set up Python
91
91
  run: uv python install 3.11
92
-
92
+
93
93
  - name: Build package
94
94
  run: |
95
95
  uv build
96
-
96
+
97
97
  - name: Upload artifacts
98
98
  uses: actions/upload-artifact@v4
99
99
  with:
@@ -34,6 +34,17 @@ repos:
34
34
  args: ["-c", "pyproject.toml"]
35
35
  additional_dependencies: ["bandit[toml]"]
36
36
 
37
+ # Run tests before commit (only when Python files change)
38
+ - repo: local
39
+ hooks:
40
+ - id: pytest
41
+ name: pytest
42
+ entry: uv run pytest tests/ -x -q --tb=no
43
+ language: system
44
+ pass_filenames: false
45
+ types: [python]
46
+ stages: [pre-commit]
47
+
37
48
  # Configuration
38
49
  ci:
39
50
  autofix_commit_msg: |
@@ -30,14 +30,22 @@ cd nuvu-scan
30
30
  uv sync --dev # Creates .venv automatically, no activation needed!
31
31
  ```
32
32
 
33
- ### 2. Make Changes
33
+ ### 2. Install Pre-commit Hooks
34
+
35
+ ```bash
36
+ uv run pre-commit install
37
+ ```
38
+
39
+ This ensures tests and linting run automatically on every commit.
40
+
41
+ ### 3. Make Changes
34
42
 
35
43
  - Write clear, readable code
36
- - Follow existing code style (enforced by black and ruff)
37
- - Add tests for new functionality
44
+ - Follow existing code style (enforced by ruff)
45
+ - **⚠️ Add tests for new functionality** (required - commits will fail without tests)
38
46
  - Update documentation
39
47
 
40
- ### 3. Test Your Changes
48
+ ### 4. Test Your Changes
41
49
 
42
50
  ```bash
43
51
  # Run all tests (uv automatically uses .venv)
@@ -47,23 +55,30 @@ uv run pytest
47
55
  uv run pytest --cov=nuvu_scan
48
56
 
49
57
  # Check code quality
50
- uv run black .
58
+ uv run ruff format .
51
59
  uv run ruff check .
52
60
  uv run mypy nuvu_scan
61
+
62
+ # Run all pre-commit checks (recommended)
63
+ uv run pre-commit run --all-files
53
64
  ```
54
65
 
55
66
  **Note**: No need to activate `.venv` - `uv run` handles it automatically!
56
67
 
57
- ### 4. Commit
68
+ ### 5. Commit
69
+
70
+ Pre-commit hooks will automatically run ruff, bandit, and pytest. If any check fails, the commit will be blocked.
58
71
 
59
- Use clear, descriptive commit messages:
72
+ Use conventional commit messages:
60
73
 
61
74
  ```bash
62
- git commit -m "Add GCP BigQuery collector"
63
- git commit -m "Fix S3 bucket size calculation"
75
+ git commit -m "feat: add GCP BigQuery collector"
76
+ git commit -m "fix: correct S3 bucket size calculation"
77
+ git commit -m "test: add tests for Redshift collector"
78
+ git commit -m "docs: update CLI options in README"
64
79
  ```
65
80
 
66
- ### 5. Push and Create PR
81
+ ### 6. Push and Create PR
67
82
 
68
83
  ```bash
69
84
  git push origin feature/your-feature
@@ -93,11 +108,23 @@ See the detailed guide in README.md under "Adding a New Cloud Provider".
93
108
 
94
109
  ## Code Style
95
110
 
96
- - **Formatting**: Use `black` (line length: 100)
97
- - **Linting**: Use `ruff`
111
+ - **Formatting**: Use `ruff format`
112
+ - **Linting**: Use `ruff check`
98
113
  - **Type hints**: Add type hints where helpful
99
114
  - **Docstrings**: Add docstrings for public functions/classes
100
115
 
116
+ ## Testing Requirements
117
+
118
+ **Every new feature MUST include tests.** Pre-commit hooks run `pytest` automatically.
119
+
120
+ | Change Type | Test File |
121
+ |-------------|-----------|
122
+ | CLI options | `tests/test_cli.py` |
123
+ | Formatters (HTML/JSON/CSV) | `tests/test_formatters.py` |
124
+ | Scanners/Collectors | `tests/test_scanners.py` |
125
+ | Push/API changes | `tests/test_push_payload.py` |
126
+ | New collector | `tests/test_<collector>.py` |
127
+
101
128
  ## Pull Request Process
102
129
 
103
130
  1. Ensure all tests pass
@@ -56,7 +56,7 @@
56
56
 
57
57
  #### Redshift Collector (Major Enhancement in v2.0.0)
58
58
  - ✅ **Provisioned Clusters** (Enhanced)
59
- - Lists all clusters with detailed metrics
59
+ - Lists all clusters across ALL regions (auto-discovery)
60
60
  - Node type, count, encryption status
61
61
  - CloudWatch-based activity tracking (DatabaseConnections, CPUUtilization)
62
62
  - Cluster age calculation
@@ -64,11 +64,22 @@
64
64
  - **Reservation coverage analysis** - checks if covered by reserved nodes
65
65
  - **WLM configuration analysis** - queue count, auto WLM, unlimited queues
66
66
  - Potential reservation savings calculation (40% estimate)
67
- - Risk flags: `publicly_accessible`, `unencrypted`, `low_activity`, `potentially_unused`, `no_reservation_long_running`, `default_wlm_only`, `unlimited_wlm_queue`
68
- - **Redshift Serverless**
67
+ - **Performance metrics from CloudWatch** (NEW)
68
+ - CPU utilization max/avg (24h)
69
+ - Queries completed (24h)
70
+ - Disk space usage percentage
71
+ - Query duration and queue time
72
+ - Performance recommendations (right-sizing)
73
+ - Risk flags: `publicly_accessible`, `unencrypted`, `low_activity`, `potentially_unused`, `no_reservation_long_running`, `default_wlm_only`, `unlimited_wlm_queue`, `low_cpu_utilization`
74
+ - ✅ **Redshift Serverless** (Enhanced)
69
75
  - Namespaces with encryption status
70
76
  - Workgroups with base capacity and cost estimation
71
- - Risk flags: `publicly_accessible`
77
+ - **RPU utilization metrics from CloudWatch** (NEW)
78
+ - RPU max/avg (24h and 7d)
79
+ - Queries completed/failed (24h)
80
+ - Query duration metrics
81
+ - Utilization recommendations for capacity right-sizing
82
+ - Risk flags: `publicly_accessible`, `low_rpu_utilization`, `high_query_failure_rate`
72
83
  - ✅ **Redshift Datashares** (NEW)
73
84
  - Lists all datashares (inbound and outbound)
74
85
  - Consumer account identification
@@ -175,6 +186,7 @@
175
186
  - ✅ **Progress Logging** - Real-time status updates during collection
176
187
 
177
188
  ### Enhanced HTML Reports (v2.0.0)
189
+ - ✅ **Scan Scope Section** (NEW) - Shows which collectors and regions were scanned
178
190
  - ✅ **Executive Summary** with key metrics
179
191
  - ✅ **Cost Optimization Section**
180
192
  - Snapshot cost analysis with old snapshot flagging
@@ -184,8 +196,11 @@
184
196
  - Stale/unused crawlers and ETL jobs
185
197
  - Cross-account data sharing alerts
186
198
  - WLM configuration review
199
+ - **Cluster Performance table** (NEW) - CPU, queries, disk, recommendations
200
+ - **Serverless Workgroup Utilization table** (NEW) - RPU metrics, recommendations
187
201
  - ✅ Improved styling with insight boxes (warning, alert, info)
188
202
  - ✅ Potential savings card in summary
203
+ - ✅ **Footer with nuvu-scan attribution** and GitHub repository link
189
204
 
190
205
  ### New Asset Categories (v2.0.0)
191
206
  - ✅ `DATA_PIPELINE` - ETL jobs, crawlers, workflows
@@ -223,6 +238,9 @@
223
238
  | Redshift | `reservation_expiring_soon` | Reserved node expires within 30 days |
224
239
  | Redshift | `default_wlm_only` | Cluster using only default WLM queue |
225
240
  | Redshift | `unlimited_wlm_queue` | WLM queue with no concurrency limit |
241
+ | Redshift | `low_cpu_utilization` | Cluster CPU never exceeds 20% (right-sizing opportunity) |
242
+ | Redshift Serverless | `low_rpu_utilization` | RPU usage below 50% of base capacity |
243
+ | Redshift Serverless | `high_query_failure_rate` | >10% of queries failing |
226
244
 
227
245
  ### Cost Tracking & Reporting
228
246
  - ✅ Asset-level cost estimation for all resources
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nuvu-scan
3
- Version: 2.0.1
3
+ Version: 2.1.2
4
4
  Summary: Multi-Cloud Data Asset Control - Discover, govern, and optimize your cloud data assets across AWS and GCP
5
5
  Project-URL: Homepage, https://nuvu.dev
6
6
  Project-URL: Documentation, https://github.com/nuvudev/nuvu-scan#readme
@@ -79,28 +79,6 @@ pip install nuvu-scan
79
79
 
80
80
  ## Usage
81
81
 
82
- ### Optional: Push results to Nuvu Cloud
83
-
84
- Nuvu Scan is fully open-source and runs standalone — no account required.
85
- If you want dashboards, team workflows, and long‑term history, you can optionally push results to Nuvu Cloud.
86
-
87
- ```bash
88
- # Push results to Nuvu Cloud (optional)
89
- nuvu scan --provider aws --push --api-key your_nuvu_api_key
90
-
91
- # Or use environment variable
92
- export NUVU_API_KEY=your_nuvu_api_key
93
- nuvu scan --provider aws --push
94
-
95
- # Custom cloud URL (defaults to https://nuvu.dev)
96
- nuvu scan --provider aws --push --nuvu-cloud-url https://nuvu.dev
97
- ```
98
-
99
- What this means for open‑source users:
100
- - You can keep everything local and export JSON/CSV/HTML.
101
- - No cloud credentials are ever sent to Nuvu Cloud — only scan results.
102
- - The data collected is identical whether you run locally or push.
103
-
104
82
  ### AWS Scanning
105
83
 
106
84
  **Prerequisites:** Create an IAM user or role with the read-only policy from `aws-iam-policy.json`. See the [AWS Setup](#aws-v1---available-now) section below for detailed instructions.
@@ -120,11 +98,14 @@ nuvu scan --provider aws \
120
98
  --access-key-id your-key \
121
99
  --secret-access-key your-secret
122
100
 
123
- # Output to JSON
124
- nuvu scan --provider aws --output-format json --output-file report.json
101
+ # Output to HTML/JSON/CSV
102
+ nuvu scan --provider aws --output-format html --output-file report.json
125
103
 
126
104
  # Scan specific regions
127
105
  nuvu scan --provider aws --region us-east-1 --region eu-west-1
106
+
107
+ # Scan specific collector
108
+ nuvu scan --provider aws --output-format html --collectors redshift --region us-west-1
128
109
  ```
129
110
 
130
111
  #### 2. Access Key + Secret Key + Session Token (Temporary Credentials)
@@ -203,6 +184,10 @@ You can optionally push scan results to a remote API for centralized tracking:
203
184
  ```bash
204
185
  # Push results to a remote endpoint
205
186
  nuvu scan --provider aws --push --api-key your-api-key --api-url https://your-api.example.com
187
+
188
+ # Push results to NUVU Cloud for Data Goverance layer
189
+ nuvu scan --provider aws --push --api-key your-api-key
190
+
206
191
  ```
207
192
 
208
193
  This is useful for integrating with your own data governance platforms or CI/CD pipelines.
@@ -364,6 +349,29 @@ Nuvu requires read-only access to your GCP project via a Service Account. The to
364
349
  ### Azure, Databricks (Coming Soon)
365
350
  Multi-cloud support is built into the architecture. Additional providers will be added in future releases.
366
351
 
352
+ ### Optional: Push results to Nuvu Cloud
353
+
354
+ Nuvu Scan is fully open-source and runs standalone — no account required.
355
+ If you want dashboards, team workflows, data estate time travel and long‑term history, you can optionally push results to Nuvu Cloud.
356
+
357
+ ```bash
358
+ # Push results to Nuvu Cloud (optional)
359
+ nuvu scan --provider aws --push --api-key your_nuvu_api_key
360
+
361
+ # Or use environment variable
362
+ export NUVU_API_KEY=your_nuvu_api_key
363
+ nuvu scan --provider aws --push
364
+
365
+ # Custom API URL (defaults to https://nuvu.dev)
366
+ nuvu scan --provider aws --push --api-url https://your-api.example.com
367
+ ```
368
+
369
+ What this means for open‑source users:
370
+ - You can keep everything local and export JSON/CSV/HTML.
371
+ - No cloud credentials are ever sent to Nuvu Cloud — only scan results.
372
+ - The data collected is identical whether you run locally or push.
373
+
374
+
367
375
  ## License
368
376
 
369
377
  Apache 2.0
@@ -413,14 +421,17 @@ uv run pytest tests/test_s3_collector.py
413
421
  ### Code Quality
414
422
 
415
423
  ```bash
416
- # Format code with black
417
- uv run black .
424
+ # Format code with ruff
425
+ uv run ruff format .
418
426
 
419
427
  # Lint with ruff
420
428
  uv run ruff check .
421
429
 
422
430
  # Type checking with mypy
423
431
  uv run mypy nuvu_scan
432
+
433
+ # Run all pre-commit checks (including tests)
434
+ uv run pre-commit run --all-files
424
435
  ```
425
436
 
426
437
  ### Building the Package
@@ -503,11 +514,11 @@ git checkout -b fix/your-bug-description
503
514
 
504
515
  ### 3. Make Changes
505
516
 
506
- - Follow the existing code style (enforced by black and ruff)
507
- - Add tests for new features
517
+ - Follow the existing code style (enforced by ruff)
518
+ - **Add tests for new features** (required - pre-commit runs tests)
508
519
  - Update documentation as needed
509
520
  - Ensure all tests pass: `uv run pytest`
510
- - Run code quality checks: `uv run black . && uv run ruff check .`
521
+ - Run code quality checks: `uv run ruff format . && uv run ruff check .`
511
522
 
512
523
  ### 4. Commit and Push
513
524
 
@@ -31,28 +31,6 @@ pip install nuvu-scan
31
31
 
32
32
  ## Usage
33
33
 
34
- ### Optional: Push results to Nuvu Cloud
35
-
36
- Nuvu Scan is fully open-source and runs standalone — no account required.
37
- If you want dashboards, team workflows, and long‑term history, you can optionally push results to Nuvu Cloud.
38
-
39
- ```bash
40
- # Push results to Nuvu Cloud (optional)
41
- nuvu scan --provider aws --push --api-key your_nuvu_api_key
42
-
43
- # Or use environment variable
44
- export NUVU_API_KEY=your_nuvu_api_key
45
- nuvu scan --provider aws --push
46
-
47
- # Custom cloud URL (defaults to https://nuvu.dev)
48
- nuvu scan --provider aws --push --nuvu-cloud-url https://nuvu.dev
49
- ```
50
-
51
- What this means for open‑source users:
52
- - You can keep everything local and export JSON/CSV/HTML.
53
- - No cloud credentials are ever sent to Nuvu Cloud — only scan results.
54
- - The data collected is identical whether you run locally or push.
55
-
56
34
  ### AWS Scanning
57
35
 
58
36
  **Prerequisites:** Create an IAM user or role with the read-only policy from `aws-iam-policy.json`. See the [AWS Setup](#aws-v1---available-now) section below for detailed instructions.
@@ -72,11 +50,14 @@ nuvu scan --provider aws \
72
50
  --access-key-id your-key \
73
51
  --secret-access-key your-secret
74
52
 
75
- # Output to JSON
76
- nuvu scan --provider aws --output-format json --output-file report.json
53
+ # Output to HTML/JSON/CSV
54
+ nuvu scan --provider aws --output-format html --output-file report.json
77
55
 
78
56
  # Scan specific regions
79
57
  nuvu scan --provider aws --region us-east-1 --region eu-west-1
58
+
59
+ # Scan specific collector
60
+ nuvu scan --provider aws --output-format html --collectors redshift --region us-west-1
80
61
  ```
81
62
 
82
63
  #### 2. Access Key + Secret Key + Session Token (Temporary Credentials)
@@ -155,6 +136,10 @@ You can optionally push scan results to a remote API for centralized tracking:
155
136
  ```bash
156
137
  # Push results to a remote endpoint
157
138
  nuvu scan --provider aws --push --api-key your-api-key --api-url https://your-api.example.com
139
+
140
+ # Push results to NUVU Cloud for Data Goverance layer
141
+ nuvu scan --provider aws --push --api-key your-api-key
142
+
158
143
  ```
159
144
 
160
145
  This is useful for integrating with your own data governance platforms or CI/CD pipelines.
@@ -316,6 +301,29 @@ Nuvu requires read-only access to your GCP project via a Service Account. The to
316
301
  ### Azure, Databricks (Coming Soon)
317
302
  Multi-cloud support is built into the architecture. Additional providers will be added in future releases.
318
303
 
304
+ ### Optional: Push results to Nuvu Cloud
305
+
306
+ Nuvu Scan is fully open-source and runs standalone — no account required.
307
+ If you want dashboards, team workflows, data estate time travel and long‑term history, you can optionally push results to Nuvu Cloud.
308
+
309
+ ```bash
310
+ # Push results to Nuvu Cloud (optional)
311
+ nuvu scan --provider aws --push --api-key your_nuvu_api_key
312
+
313
+ # Or use environment variable
314
+ export NUVU_API_KEY=your_nuvu_api_key
315
+ nuvu scan --provider aws --push
316
+
317
+ # Custom API URL (defaults to https://nuvu.dev)
318
+ nuvu scan --provider aws --push --api-url https://your-api.example.com
319
+ ```
320
+
321
+ What this means for open‑source users:
322
+ - You can keep everything local and export JSON/CSV/HTML.
323
+ - No cloud credentials are ever sent to Nuvu Cloud — only scan results.
324
+ - The data collected is identical whether you run locally or push.
325
+
326
+
319
327
  ## License
320
328
 
321
329
  Apache 2.0
@@ -365,14 +373,17 @@ uv run pytest tests/test_s3_collector.py
365
373
  ### Code Quality
366
374
 
367
375
  ```bash
368
- # Format code with black
369
- uv run black .
376
+ # Format code with ruff
377
+ uv run ruff format .
370
378
 
371
379
  # Lint with ruff
372
380
  uv run ruff check .
373
381
 
374
382
  # Type checking with mypy
375
383
  uv run mypy nuvu_scan
384
+
385
+ # Run all pre-commit checks (including tests)
386
+ uv run pre-commit run --all-files
376
387
  ```
377
388
 
378
389
  ### Building the Package
@@ -455,11 +466,11 @@ git checkout -b fix/your-bug-description
455
466
 
456
467
  ### 3. Make Changes
457
468
 
458
- - Follow the existing code style (enforced by black and ruff)
459
- - Add tests for new features
469
+ - Follow the existing code style (enforced by ruff)
470
+ - **Add tests for new features** (required - pre-commit runs tests)
460
471
  - Update documentation as needed
461
472
  - Ensure all tests pass: `uv run pytest`
462
- - Run code quality checks: `uv run black . && uv run ruff check .`
473
+ - Run code quality checks: `uv run ruff format . && uv run ruff check .`
463
474
 
464
475
  ### 4. Commit and Push
465
476
 
@@ -2,12 +2,9 @@
2
2
  Scan command for Nuvu CLI.
3
3
  """
4
4
 
5
- import json
6
5
  import os
7
6
  import sys
8
7
  from datetime import datetime
9
- from urllib.error import HTTPError, URLError
10
- from urllib.request import Request, urlopen
11
8
 
12
9
  import click
13
10
 
@@ -111,6 +108,11 @@ from ..formatters.json import JSONFormatter
111
108
  default="https://nuvu.dev",
112
109
  help="Nuvu Cloud API URL (default: https://nuvu.dev)",
113
110
  )
111
+ @click.option(
112
+ "--list-collectors",
113
+ is_flag=True,
114
+ help="List available collectors for the specified provider and exit.",
115
+ )
114
116
  def scan_command(
115
117
  provider: str,
116
118
  output_format: str,
@@ -333,8 +335,12 @@ def scan_command(
333
335
  {
334
336
  "provider": asset.provider,
335
337
  "asset_type": asset.asset_type,
336
- "normalized_category": asset.normalized_category.value if asset.normalized_category else "unknown",
337
- "service": asset.service or asset.asset_type.split("_")[0] if asset.asset_type else "unknown",
338
+ "normalized_category": asset.normalized_category.value
339
+ if asset.normalized_category
340
+ else "unknown",
341
+ "service": asset.service or asset.asset_type.split("_")[0]
342
+ if asset.asset_type
343
+ else "unknown",
338
344
  "region": asset.region,
339
345
  "arn": asset.arn,
340
346
  "name": asset.name,
@@ -343,6 +349,7 @@ def scan_command(
343
349
  "size_bytes": asset.size_bytes,
344
350
  "tags": asset.tags,
345
351
  "cost_estimate_usd": asset.cost_estimate_usd,
352
+ "usage_metrics": asset.usage_metrics, # Include all usage metrics
346
353
  "risk_flags": asset.risk_flags,
347
354
  "ownership_confidence": asset.ownership_confidence or "unknown",
348
355
  "suggested_owner": asset.suggested_owner,
@@ -352,7 +359,8 @@ def scan_command(
352
359
  }
353
360
 
354
361
  # Push to API using the /api/scans/import endpoint
355
- with httpx.Client(timeout=60) as client:
362
+ # Use longer timeout for large scans (2000+ assets can take minutes)
363
+ with httpx.Client(timeout=300) as client:
356
364
  response = client.post(
357
365
  f"{api_url.rstrip('/')}/api/scans/import",
358
366
  json=payload,
@@ -368,7 +376,10 @@ def scan_command(
368
376
  err=True,
369
377
  )
370
378
  except httpx.HTTPStatusError as e:
371
- click.echo(f"Error pushing to Nuvu Cloud: {e.response.status_code} - {e.response.text}", err=True)
379
+ click.echo(
380
+ f"Error pushing to Nuvu Cloud: {e.response.status_code} - {e.response.text}",
381
+ err=True,
382
+ )
372
383
  sys.exit(1)
373
384
  except Exception as e:
374
385
  click.echo(f"Error pushing to Nuvu Cloud: {e}", err=True)