featcat 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. featcat-0.1.0/.github/workflows/ci.yml +68 -0
  2. featcat-0.1.0/.github/workflows/publish.yml +94 -0
  3. featcat-0.1.0/.github/workflows/release-draft.yml +57 -0
  4. featcat-0.1.0/.gitignore +47 -0
  5. featcat-0.1.0/.pre-commit-config.yaml +18 -0
  6. featcat-0.1.0/CHANGELOG.md +25 -0
  7. featcat-0.1.0/CONTRIBUTING.md +97 -0
  8. featcat-0.1.0/LICENSE +21 -0
  9. featcat-0.1.0/Makefile +34 -0
  10. featcat-0.1.0/PKG-INFO +159 -0
  11. featcat-0.1.0/README.md +110 -0
  12. featcat-0.1.0/docs/README-vi.md +68 -0
  13. featcat-0.1.0/docs/admin-guide-vi.md +189 -0
  14. featcat-0.1.0/docs/admin-guide.md +189 -0
  15. featcat-0.1.0/docs/setup-vi.md +137 -0
  16. featcat-0.1.0/docs/setup.md +138 -0
  17. featcat-0.1.0/docs/user-guide-vi.md +207 -0
  18. featcat-0.1.0/docs/user-guide.md +207 -0
  19. featcat-0.1.0/featcat/__init__.py +3 -0
  20. featcat-0.1.0/featcat/catalog/__init__.py +1 -0
  21. featcat-0.1.0/featcat/catalog/backend.py +98 -0
  22. featcat-0.1.0/featcat/catalog/db.py +14 -0
  23. featcat-0.1.0/featcat/catalog/factory.py +28 -0
  24. featcat-0.1.0/featcat/catalog/local.py +274 -0
  25. featcat-0.1.0/featcat/catalog/models.py +74 -0
  26. featcat-0.1.0/featcat/catalog/remote.py +75 -0
  27. featcat-0.1.0/featcat/catalog/scanner.py +80 -0
  28. featcat-0.1.0/featcat/catalog/storage.py +117 -0
  29. featcat-0.1.0/featcat/cli.py +903 -0
  30. featcat-0.1.0/featcat/config.py +40 -0
  31. featcat-0.1.0/featcat/llm/__init__.py +17 -0
  32. featcat-0.1.0/featcat/llm/base.py +109 -0
  33. featcat-0.1.0/featcat/llm/cached.py +57 -0
  34. featcat-0.1.0/featcat/llm/llamacpp.py +126 -0
  35. featcat-0.1.0/featcat/llm/ollama.py +132 -0
  36. featcat-0.1.0/featcat/plugins/__init__.py +4 -0
  37. featcat-0.1.0/featcat/plugins/autodoc.py +203 -0
  38. featcat-0.1.0/featcat/plugins/base.py +43 -0
  39. featcat-0.1.0/featcat/plugins/discovery.py +66 -0
  40. featcat-0.1.0/featcat/plugins/monitoring.py +229 -0
  41. featcat-0.1.0/featcat/plugins/nl_query.py +131 -0
  42. featcat-0.1.0/featcat/py.typed +0 -0
  43. featcat-0.1.0/featcat/tui/__init__.py +1 -0
  44. featcat-0.1.0/featcat/tui/app.py +43 -0
  45. featcat-0.1.0/featcat/tui/screens/__init__.py +1 -0
  46. featcat-0.1.0/featcat/tui/screens/chat.py +195 -0
  47. featcat-0.1.0/featcat/tui/screens/dashboard.py +89 -0
  48. featcat-0.1.0/featcat/tui/screens/features.py +91 -0
  49. featcat-0.1.0/featcat/tui/screens/monitoring.py +105 -0
  50. featcat-0.1.0/featcat/tui/styles/app.tcss +132 -0
  51. featcat-0.1.0/featcat/tui/widgets/__init__.py +1 -0
  52. featcat-0.1.0/featcat/tui/widgets/alert_list.py +37 -0
  53. featcat-0.1.0/featcat/tui/widgets/chat_messages.py +47 -0
  54. featcat-0.1.0/featcat/tui/widgets/feature_detail.py +55 -0
  55. featcat-0.1.0/featcat/tui/widgets/feature_table.py +47 -0
  56. featcat-0.1.0/featcat/tui/widgets/stats_bar.py +72 -0
  57. featcat-0.1.0/featcat/utils/__init__.py +1 -0
  58. featcat-0.1.0/featcat/utils/cache.py +103 -0
  59. featcat-0.1.0/featcat/utils/catalog_context.py +117 -0
  60. featcat-0.1.0/featcat/utils/prompts.py +175 -0
  61. featcat-0.1.0/featcat/utils/statistics.py +141 -0
  62. featcat-0.1.0/pyproject.toml +89 -0
  63. featcat-0.1.0/scripts/import_initial.py +119 -0
  64. featcat-0.1.0/tests/__init__.py +0 -0
  65. featcat-0.1.0/tests/conftest.py +39 -0
  66. featcat-0.1.0/tests/fixtures/create_fixtures.py +98 -0
  67. featcat-0.1.0/tests/fixtures/device_performance.parquet +0 -0
  68. featcat-0.1.0/tests/fixtures/user_behavior_30d.parquet +0 -0
  69. featcat-0.1.0/tests/test_autodoc.py +131 -0
  70. featcat-0.1.0/tests/test_cache.py +126 -0
  71. featcat-0.1.0/tests/test_catalog.py +236 -0
  72. featcat-0.1.0/tests/test_discovery.py +94 -0
  73. featcat-0.1.0/tests/test_doctor.py +79 -0
  74. featcat-0.1.0/tests/test_import.py +130 -0
  75. featcat-0.1.0/tests/test_llm.py +134 -0
  76. featcat-0.1.0/tests/test_monitoring.py +223 -0
  77. featcat-0.1.0/tests/test_nl_query.py +117 -0
  78. featcat-0.1.0/tests/test_s3_storage.py +126 -0
  79. featcat-0.1.0/tests/test_storage.py +75 -0
@@ -0,0 +1,68 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, staging]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ concurrency:
10
+ group: ci-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ lint:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v6
18
+ - uses: astral-sh/setup-uv@v7
19
+ - uses: actions/setup-python@v6
20
+ with:
21
+ python-version: "3.12"
22
+ - run: uv pip install --system ruff
23
+ - run: ruff check .
24
+ - run: ruff format --check .
25
+
26
+ type-check:
27
+ runs-on: ubuntu-latest
28
+ steps:
29
+ - uses: actions/checkout@v6
30
+ - uses: astral-sh/setup-uv@v7
31
+ - uses: actions/setup-python@v6
32
+ with:
33
+ python-version: "3.12"
34
+ - run: uv pip install --system -e ".[dev]"
35
+ - run: mypy featcat/
36
+
37
+ test:
38
+ needs: lint
39
+ runs-on: ubuntu-latest
40
+ strategy:
41
+ matrix:
42
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
43
+ steps:
44
+ - uses: actions/checkout@v6
45
+ - uses: astral-sh/setup-uv@v7
46
+ - uses: actions/setup-python@v6
47
+ with:
48
+ python-version: ${{ matrix.python-version }}
49
+ - run: uv pip install --system -e ".[dev,tui]"
50
+ - run: pytest --cov=featcat --cov-report=xml --cov-report=term-missing
51
+ - if: matrix.python-version == '3.12'
52
+ uses: codecov/codecov-action@v6
53
+ with:
54
+ files: coverage.xml
55
+ fail_ci_if_error: false
56
+
57
+ build-check:
58
+ runs-on: ubuntu-latest
59
+ steps:
60
+ - uses: actions/checkout@v6
61
+ - uses: astral-sh/setup-uv@v7
62
+ - uses: actions/setup-python@v6
63
+ with:
64
+ python-version: "3.12"
65
+ - run: uv pip install --system build twine
66
+ - run: python -m build
67
+ - run: twine check dist/*
68
+ - run: uv pip install --system dist/*.whl && featcat --help
@@ -0,0 +1,94 @@
1
+ # SETUP REQUIRED (one-time):
2
+ # 1. Go to https://pypi.org/manage/account/publishing/
3
+ # Add trusted publisher:
4
+ # - Owner: codepawl
5
+ # - Repository: featcat
6
+ # - Workflow: publish.yml
7
+ # - Environment: pypi
8
+ #
9
+ # 2. Same for TestPyPI: https://test.pypi.org/manage/account/publishing/
10
+ # - Environment: testpypi
11
+ #
12
+ # 3. In GitHub repo Settings > Environments:
13
+ # - Create "pypi" environment with required reviewers (optional but recommended)
14
+ # - Create "testpypi" environment (no protection needed)
15
+
16
+ name: Publish to PyPI
17
+
18
+ on:
19
+ release:
20
+ types: [published]
21
+
22
+ jobs:
23
+ test:
24
+ runs-on: ubuntu-latest
25
+ steps:
26
+ - uses: actions/checkout@v6
27
+ - uses: astral-sh/setup-uv@v7
28
+ - uses: actions/setup-python@v6
29
+ with:
30
+ python-version: "3.12"
31
+ - run: uv pip install --system -e ".[dev,tui]"
32
+ - run: pytest --cov=featcat --cov-report=xml --cov-report=term-missing
33
+
34
+ build:
35
+ needs: test
36
+ runs-on: ubuntu-latest
37
+ steps:
38
+ - uses: actions/checkout@v6
39
+ - uses: astral-sh/setup-uv@v7
40
+ - uses: actions/setup-python@v6
41
+ with:
42
+ python-version: "3.12"
43
+ - run: uv pip install --system build
44
+ - run: python -m build
45
+ - uses: actions/upload-artifact@v7
46
+ with:
47
+ name: dist
48
+ path: dist/
49
+
50
+ publish-testpypi:
51
+ needs: build
52
+ runs-on: ubuntu-latest
53
+ environment: testpypi
54
+ permissions:
55
+ id-token: write
56
+ steps:
57
+ - uses: actions/download-artifact@v8
58
+ with:
59
+ name: dist
60
+ path: dist/
61
+ - uses: pypa/gh-action-pypi-publish@release/v1
62
+ with:
63
+ repository-url: https://test.pypi.org/legacy/
64
+
65
+ verify-testpypi:
66
+ needs: publish-testpypi
67
+ runs-on: ubuntu-latest
68
+ steps:
69
+ - run: sleep 30
70
+ - uses: astral-sh/setup-uv@v7
71
+ - uses: actions/setup-python@v6
72
+ with:
73
+ python-version: "3.12"
74
+ - run: uv pip install --system --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ featcat
75
+ - run: featcat --help
76
+ - name: Verify version matches tag
77
+ run: |
78
+ INSTALLED=$(python -c "import featcat; print(featcat.__version__)")
79
+ TAG="${GITHUB_REF_NAME#v}"
80
+ echo "Installed: $INSTALLED, Tag: $TAG"
81
+ [ "$INSTALLED" = "$TAG" ]
82
+
83
+ publish-pypi:
84
+ needs: verify-testpypi
85
+ runs-on: ubuntu-latest
86
+ environment: pypi
87
+ permissions:
88
+ id-token: write
89
+ steps:
90
+ - uses: actions/download-artifact@v8
91
+ with:
92
+ name: dist
93
+ path: dist/
94
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,57 @@
1
+ name: Draft Release
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - "featcat/__init__.py"
8
+
9
+ jobs:
10
+ draft-release:
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ contents: write
14
+ steps:
15
+ - uses: actions/checkout@v6
16
+ with:
17
+ fetch-depth: 0
18
+
19
+ - name: Extract version
20
+ id: version
21
+ run: |
22
+ VERSION=$(python -c "import re; print(re.search(r'__version__ = \"(.+)\"', open('featcat/__init__.py').read()).group(1))")
23
+ echo "version=$VERSION" >> "$GITHUB_OUTPUT"
24
+ echo "Detected version: $VERSION"
25
+
26
+ - name: Check if tag exists
27
+ id: check_tag
28
+ run: |
29
+ if git rev-parse "v${{ steps.version.outputs.version }}" >/dev/null 2>&1; then
30
+ echo "exists=true" >> "$GITHUB_OUTPUT"
31
+ echo "Tag v${{ steps.version.outputs.version }} already exists, skipping."
32
+ else
33
+ echo "exists=false" >> "$GITHUB_OUTPUT"
34
+ fi
35
+
36
+ - name: Extract changelog entry
37
+ if: steps.check_tag.outputs.exists == 'false'
38
+ id: changelog
39
+ run: |
40
+ VERSION="${{ steps.version.outputs.version }}"
41
+ # Extract the section for this version from CHANGELOG.md
42
+ BODY=$(awk "/^## \[$VERSION\]/{found=1; next} /^## \[/{if(found) exit} found{print}" CHANGELOG.md)
43
+ if [ -z "$BODY" ]; then
44
+ BODY="Release v$VERSION"
45
+ fi
46
+ # Write to file to preserve multiline
47
+ echo "$BODY" > /tmp/release_body.md
48
+ echo "Changelog entry extracted."
49
+
50
+ - name: Create draft release
51
+ if: steps.check_tag.outputs.exists == 'false'
52
+ uses: softprops/action-gh-release@v2
53
+ with:
54
+ tag_name: v${{ steps.version.outputs.version }}
55
+ name: "featcat v${{ steps.version.outputs.version }}"
56
+ body_path: /tmp/release_body.md
57
+ draft: true
@@ -0,0 +1,47 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ wheels/
11
+ *.whl
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ env/
17
+
18
+ # Testing
19
+ .pytest_cache/
20
+ htmlcov/
21
+ .coverage
22
+ coverage.xml
23
+
24
+ # IDE
25
+ .vscode/
26
+ .idea/
27
+ *.swp
28
+ *.swo
29
+ *~
30
+ .DS_Store
31
+
32
+ # Project runtime
33
+ catalog.db
34
+ docs/features.md
35
+ docs/monitoring_report.md
36
+ .env
37
+ .env.local
38
+
39
+ # Parquet fixtures (keep test ones)
40
+ *.parquet
41
+ !tests/fixtures/*.parquet
42
+
43
+ # mypy
44
+ .mypy_cache/
45
+
46
+ # ruff
47
+ .ruff_cache/
@@ -0,0 +1,18 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.9.0
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+
9
+ - repo: https://github.com/pre-commit/pre-commit-hooks
10
+ rev: v5.0.0
11
+ hooks:
12
+ - id: trailing-whitespace
13
+ - id: end-of-file-fixer
14
+ - id: check-yaml
15
+ - id: check-toml
16
+ - id: check-added-large-files
17
+ args: [--maxkb=500]
18
+ - id: debug-statements
@@ -0,0 +1,25 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-04-03
9
+
10
+ ### Added
11
+
12
+ - **Catalog**: Register data sources (local + S3/MinIO), scan Parquet files, auto-extract schema and statistics
13
+ - **AI Discovery**: Describe a use case, get ranked feature recommendations and new feature suggestions via LLM
14
+ - **Auto-doc**: LLM-powered documentation generation for features, with batch processing and Markdown export
15
+ - **NL Query**: Natural language search across the feature catalog, with Vietnamese language detection and fuzzy fallback
16
+ - **Quality Monitoring**: PSI drift detection, null spike alerts, range violation checks, zero variance detection
17
+ - **LLM Backends**: Ollama and llama.cpp server support with streaming, retry logic, and JSON extraction
18
+ - **Response Caching**: SQLite-backed LLM response cache with configurable TTL per plugin
19
+ - **TUI**: Terminal UI with dashboard, feature browser, monitoring view, and AI chat (Textual)
20
+ - **S3 Support**: Read Parquet metadata directly from AWS S3 and MinIO via PyArrow S3FileSystem
21
+ - **CLI Commands**: `init`, `source add/list/scan`, `feature list/info/tag/search`, `discover`, `ask`, `doc generate/show/export/stats`, `monitor baseline/check/report`, `doctor`, `stats`, `export`, `cache stats/clear`, `ui`
22
+ - **Configuration**: Pydantic settings loaded from `FEATCAT_*` environment variables
23
+ - **Documentation**: README, setup guide, user guide, and admin guide (Vietnamese with English technical terms)
24
+
25
+ [0.1.0]: https://github.com/codepawl/featcat/releases/tag/v0.1.0
@@ -0,0 +1,97 @@
1
+ # Contributing to featcat
2
+
3
+ ## Development Setup
4
+
5
+ ```bash
6
+ # Clone the repo
7
+ git clone https://github.com/codepawl/featcat.git
8
+ cd featcat
9
+
10
+ # Create virtual environment
11
+ uv venv && source .venv/bin/activate
12
+
13
+ # Install with dev dependencies + pre-commit hooks
14
+ make install
15
+ ```
16
+
17
+ ## Running Tests
18
+
19
+ ```bash
20
+ # Run all tests
21
+ make test
22
+
23
+ # Run with coverage report
24
+ make test-cov
25
+
26
+ # Run a specific test file
27
+ pytest tests/test_catalog.py -v
28
+ ```
29
+
30
+ ## Code Quality
31
+
32
+ ```bash
33
+ # Lint (check only)
34
+ make lint
35
+
36
+ # Auto-format + auto-fix
37
+ make format
38
+
39
+ # Type checking
40
+ make type-check
41
+
42
+ # Run all checks (lint + type-check + test)
43
+ make check
44
+ ```
45
+
46
+ ## PR Workflow
47
+
48
+ 1. Fork the repository
49
+ 2. Create a feature branch: `git checkout -b feat/my-feature`
50
+ 3. Make your changes
51
+ 4. Run checks: `make check`
52
+ 5. Commit with a descriptive message (see convention below)
53
+ 6. Push and open a Pull Request
54
+ 7. Wait for CI to pass and a review
55
+
56
+ ## Commit Message Convention
57
+
58
+ Use [Conventional Commits](https://www.conventionalcommits.org/):
59
+
60
+ ```
61
+ feat: add S3 support for MinIO endpoints
62
+ fix: handle null stats in monitoring PSI computation
63
+ docs: update admin guide with S3 troubleshooting
64
+ chore: bump pyarrow to 16.0
65
+ refactor: simplify JSON extraction in LLM base
66
+ test: add integration tests for autodoc batch mode
67
+ ```
68
+
69
+ ## Code Style
70
+
71
+ - **Formatter/Linter**: ruff (enforced via pre-commit and CI)
72
+ - **Type hints**: Required for all public functions
73
+ - **Line length**: 120 characters
74
+ - **Docstrings**: Required for modules and public classes/functions
75
+ - **Imports**: Sorted by ruff (isort-compatible)
76
+
77
+ ## Project Structure
78
+
79
+ ```
80
+ featcat/
81
+ ├── catalog/ # Data models, SQLite DB, Parquet scanner, storage backends
82
+ ├── llm/ # LLM abstraction layer (Ollama, llama.cpp, caching)
83
+ ├── plugins/ # AI plugins (discovery, autodoc, monitoring, NL query)
84
+ ├── utils/ # Prompts, catalog context formatters, statistics, cache
85
+ ├── tui/ # Terminal UI (Textual screens and widgets)
86
+ ├── config.py # Pydantic settings
87
+ └── cli.py # Typer CLI entry point
88
+ ```
89
+
90
+ ## Releasing
91
+
92
+ 1. Update version in `featcat/__init__.py`
93
+ 2. Update `CHANGELOG.md`
94
+ 3. Commit: `git commit -am "release: v0.x.0"`
95
+ 4. Push to main — a draft GitHub Release is auto-created
96
+ 5. Review the draft, click "Publish"
97
+ 6. The publish workflow handles TestPyPI verification and PyPI upload
featcat-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Codepawl
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
featcat-0.1.0/Makefile ADDED
@@ -0,0 +1,34 @@
1
+ .PHONY: install lint format type-check test test-cov build clean check release-check
2
+
3
+ install:
4
+ pip install -e ".[dev,tui,s3]"
5
+ pre-commit install
6
+
7
+ lint:
8
+ ruff check .
9
+ ruff format --check .
10
+
11
+ format:
12
+ ruff check --fix .
13
+ ruff format .
14
+
15
+ type-check:
16
+ mypy featcat/
17
+
18
+ test:
19
+ pytest
20
+
21
+ test-cov:
22
+ pytest --cov=featcat --cov-report=html
23
+ @echo "Open htmlcov/index.html to view coverage report"
24
+
25
+ build:
26
+ python -m build
27
+
28
+ clean:
29
+ rm -rf dist/ build/ *.egg-info .pytest_cache .mypy_cache .ruff_cache htmlcov .coverage coverage.xml
30
+
31
+ check: lint type-check test
32
+
33
+ release-check: clean check build
34
+ twine check dist/*
featcat-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.4
2
+ Name: featcat
3
+ Version: 0.1.0
4
+ Summary: AI-Powered Feature Catalog for Data Science teams
5
+ Project-URL: Homepage, https://github.com/codepawl/featcat
6
+ Project-URL: Documentation, https://github.com/codepawl/featcat/tree/main/docs
7
+ Project-URL: Repository, https://github.com/codepawl/featcat
8
+ Project-URL: Issues, https://github.com/codepawl/featcat/issues
9
+ Project-URL: Changelog, https://github.com/codepawl/featcat/blob/main/CHANGELOG.md
10
+ Author-email: Codepawl <hi@codepawl.dev>
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: ai,data-science,feature-catalog,feature-store,mlops
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.25
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: pyarrow>=14.0
25
+ Requires-Dist: pydantic-settings>=2.0
26
+ Requires-Dist: pydantic>=2.0
27
+ Requires-Dist: rapidfuzz>=3.0
28
+ Requires-Dist: rich>=13.0
29
+ Requires-Dist: scipy>=1.11
30
+ Requires-Dist: typer>=0.9
31
+ Provides-Extra: all
32
+ Requires-Dist: s3fs>=2024.0; extra == 'all'
33
+ Requires-Dist: textual>=0.50; extra == 'all'
34
+ Provides-Extra: dev
35
+ Requires-Dist: boto3>=1.28; extra == 'dev'
36
+ Requires-Dist: moto[s3]>=5.0; extra == 'dev'
37
+ Requires-Dist: mypy>=1.8; extra == 'dev'
38
+ Requires-Dist: pre-commit>=3.0; extra == 'dev'
39
+ Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
40
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
41
+ Requires-Dist: pytest-tmp-files>=0.0.2; extra == 'dev'
42
+ Requires-Dist: pytest>=7.0; extra == 'dev'
43
+ Requires-Dist: ruff>=0.4; extra == 'dev'
44
+ Provides-Extra: s3
45
+ Requires-Dist: s3fs>=2024.0; extra == 's3'
46
+ Provides-Extra: tui
47
+ Requires-Dist: textual>=0.50; extra == 'tui'
48
+ Description-Content-Type: text/markdown
49
+
50
+ # featcat
51
+
52
+ ![CI](https://github.com/codepawl/featcat/actions/workflows/ci.yml/badge.svg)
53
+ ![PyPI](https://img.shields.io/pypi/v/featcat)
54
+ ![Python](https://img.shields.io/pypi/pyversions/featcat)
55
+ ![License](https://img.shields.io/pypi/l/featcat)
56
+
57
+ **AI-Powered Feature Catalog for Data Science Teams**
58
+
59
+ [Tiếng Việt](docs/README-vi.md)
60
+
61
+ featcat is a lightweight Feature Catalog designed for Data Science teams. It is **not** a Feature Store (no online serving) — it's a metadata management tool with an AI layer for searching, documenting, and monitoring feature quality.
62
+
63
+ ## The Problem
64
+
65
+ - **Features scattered everywhere**: Parquet files stored across local disks, S3, and MinIO — nobody knows what features exist
66
+ - **Missing documentation**: Dataset columns have no descriptions; new team members don't know what `avg_session_duration` means
67
+ - **Hard to find the right features**: Starting a new project (e.g. churn prediction) with no idea which features are already available
68
+ - **Undetected data drift**: Feature distributions change silently until model performance degrades
69
+
70
+ ## Key Features
71
+
72
+ | Module | Description | Phase |
73
+ |--------|-------------|-------|
74
+ | **Catalog** | Register data sources, scan Parquet to auto-extract schema + stats | 1 |
75
+ | **AI Discovery** | Describe a use case → AI recommends relevant features + suggests new ones | 2 |
76
+ | **Auto-doc** | LLM automatically generates documentation for each feature | 2 |
77
+ | **NL Query** | Ask in natural language (English or Vietnamese), AI finds relevant features | 2 |
78
+ | **Monitoring** | PSI drift detection, null spikes, range violations | 3 |
79
+ | **TUI** | Terminal UI with dashboard, feature browser, AI chat | 3 |
80
+ | **S3 Support** | Read Parquet directly from S3/MinIO — never copies data locally | 1 |
81
+ | **Caching** | Cache LLM responses to speed up doc generation and NL queries | 3 |
82
+
83
+ ## Quick Start
84
+
85
+ ```bash
86
+ # 1. Clone and install
87
+ git clone https://github.com/codepawl/featcat.git && cd featcat
88
+ uv venv && source .venv/bin/activate
89
+ uv pip install -e ".[dev]"
90
+
91
+ # 2. Initialize catalog
92
+ featcat init
93
+
94
+ # 3. Register and scan a data source
95
+ featcat source add device_perf /data/features/device_performance.parquet
96
+ featcat source scan device_perf
97
+
98
+ # 4. Browse features
99
+ featcat feature list
100
+ featcat feature info device_perf.cpu_usage
101
+
102
+ # 5. (Optional) Enable AI features — requires Ollama
103
+ ollama serve &
104
+ ollama pull qwen2.5:7b
105
+ featcat discover "churn prediction for telecom customers"
106
+ featcat ask "features related to user behavior"
107
+ ```
108
+
109
+ ## TUI (Terminal UI)
110
+
111
+ ```bash
112
+ uv pip install -e ".[tui]"
113
+ featcat ui
114
+ ```
115
+
116
+ <!-- Screenshot placeholder -->
117
+ <!-- ![featcat TUI](docs/assets/tui-screenshot.png) -->
118
+
119
+ Keybindings: `D` Dashboard | `F` Features | `M` Monitor | `C` Chat | `Q` Quit | `?` Help
120
+
121
+ ## System Health Check
122
+
123
+ ```bash
124
+ featcat doctor
125
+ ```
126
+
127
+ ```
128
+ [x] Python 3.10+
129
+ [x] SQLite catalog exists (catalog.db)
130
+ [x] Ollama running at localhost:11434
131
+ [x] Model qwen2.5:7b available
132
+ [x] 14 features registered
133
+ [x] 10 features have docs (71.4%)
134
+ [ ] 2 features have drift warnings
135
+ ```
136
+
137
+ ## Tech Stack
138
+
139
+ - **Python 3.10+** | **SQLite** (metadata only, never copies data)
140
+ - **Typer** + **Rich** (CLI) | **Textual** (TUI)
141
+ - **PyArrow** (Parquet schema + stats) | **s3fs** (S3/MinIO)
142
+ - **Ollama** (local LLM) | **Pydantic** (models + config)
143
+
144
+ ## Project Structure
145
+
146
+ ```
147
+ featcat/
148
+ ├── catalog/ # Models, DB, scanner, storage backends
149
+ ├── llm/ # LLM abstraction (Ollama, llama.cpp)
150
+ ├── plugins/ # Discovery, Autodoc, Monitoring, NL Query
151
+ ├── utils/ # Prompts, catalog context, statistics, cache
152
+ ├── tui/ # Textual TUI (screens, widgets)
153
+ ├── config.py # Pydantic settings
154
+ └── cli.py # Typer CLI entry point
155
+ ```
156
+
157
+ ## License
158
+
159
+ MIT