featcat 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featcat-0.1.0/.github/workflows/ci.yml +68 -0
- featcat-0.1.0/.github/workflows/publish.yml +94 -0
- featcat-0.1.0/.github/workflows/release-draft.yml +57 -0
- featcat-0.1.0/.gitignore +47 -0
- featcat-0.1.0/.pre-commit-config.yaml +18 -0
- featcat-0.1.0/CHANGELOG.md +25 -0
- featcat-0.1.0/CONTRIBUTING.md +97 -0
- featcat-0.1.0/LICENSE +21 -0
- featcat-0.1.0/Makefile +34 -0
- featcat-0.1.0/PKG-INFO +159 -0
- featcat-0.1.0/README.md +110 -0
- featcat-0.1.0/docs/README-vi.md +68 -0
- featcat-0.1.0/docs/admin-guide-vi.md +189 -0
- featcat-0.1.0/docs/admin-guide.md +189 -0
- featcat-0.1.0/docs/setup-vi.md +137 -0
- featcat-0.1.0/docs/setup.md +138 -0
- featcat-0.1.0/docs/user-guide-vi.md +207 -0
- featcat-0.1.0/docs/user-guide.md +207 -0
- featcat-0.1.0/featcat/__init__.py +3 -0
- featcat-0.1.0/featcat/catalog/__init__.py +1 -0
- featcat-0.1.0/featcat/catalog/backend.py +98 -0
- featcat-0.1.0/featcat/catalog/db.py +14 -0
- featcat-0.1.0/featcat/catalog/factory.py +28 -0
- featcat-0.1.0/featcat/catalog/local.py +274 -0
- featcat-0.1.0/featcat/catalog/models.py +74 -0
- featcat-0.1.0/featcat/catalog/remote.py +75 -0
- featcat-0.1.0/featcat/catalog/scanner.py +80 -0
- featcat-0.1.0/featcat/catalog/storage.py +117 -0
- featcat-0.1.0/featcat/cli.py +903 -0
- featcat-0.1.0/featcat/config.py +40 -0
- featcat-0.1.0/featcat/llm/__init__.py +17 -0
- featcat-0.1.0/featcat/llm/base.py +109 -0
- featcat-0.1.0/featcat/llm/cached.py +57 -0
- featcat-0.1.0/featcat/llm/llamacpp.py +126 -0
- featcat-0.1.0/featcat/llm/ollama.py +132 -0
- featcat-0.1.0/featcat/plugins/__init__.py +4 -0
- featcat-0.1.0/featcat/plugins/autodoc.py +203 -0
- featcat-0.1.0/featcat/plugins/base.py +43 -0
- featcat-0.1.0/featcat/plugins/discovery.py +66 -0
- featcat-0.1.0/featcat/plugins/monitoring.py +229 -0
- featcat-0.1.0/featcat/plugins/nl_query.py +131 -0
- featcat-0.1.0/featcat/py.typed +0 -0
- featcat-0.1.0/featcat/tui/__init__.py +1 -0
- featcat-0.1.0/featcat/tui/app.py +43 -0
- featcat-0.1.0/featcat/tui/screens/__init__.py +1 -0
- featcat-0.1.0/featcat/tui/screens/chat.py +195 -0
- featcat-0.1.0/featcat/tui/screens/dashboard.py +89 -0
- featcat-0.1.0/featcat/tui/screens/features.py +91 -0
- featcat-0.1.0/featcat/tui/screens/monitoring.py +105 -0
- featcat-0.1.0/featcat/tui/styles/app.tcss +132 -0
- featcat-0.1.0/featcat/tui/widgets/__init__.py +1 -0
- featcat-0.1.0/featcat/tui/widgets/alert_list.py +37 -0
- featcat-0.1.0/featcat/tui/widgets/chat_messages.py +47 -0
- featcat-0.1.0/featcat/tui/widgets/feature_detail.py +55 -0
- featcat-0.1.0/featcat/tui/widgets/feature_table.py +47 -0
- featcat-0.1.0/featcat/tui/widgets/stats_bar.py +72 -0
- featcat-0.1.0/featcat/utils/__init__.py +1 -0
- featcat-0.1.0/featcat/utils/cache.py +103 -0
- featcat-0.1.0/featcat/utils/catalog_context.py +117 -0
- featcat-0.1.0/featcat/utils/prompts.py +175 -0
- featcat-0.1.0/featcat/utils/statistics.py +141 -0
- featcat-0.1.0/pyproject.toml +89 -0
- featcat-0.1.0/scripts/import_initial.py +119 -0
- featcat-0.1.0/tests/__init__.py +0 -0
- featcat-0.1.0/tests/conftest.py +39 -0
- featcat-0.1.0/tests/fixtures/create_fixtures.py +98 -0
- featcat-0.1.0/tests/fixtures/device_performance.parquet +0 -0
- featcat-0.1.0/tests/fixtures/user_behavior_30d.parquet +0 -0
- featcat-0.1.0/tests/test_autodoc.py +131 -0
- featcat-0.1.0/tests/test_cache.py +126 -0
- featcat-0.1.0/tests/test_catalog.py +236 -0
- featcat-0.1.0/tests/test_discovery.py +94 -0
- featcat-0.1.0/tests/test_doctor.py +79 -0
- featcat-0.1.0/tests/test_import.py +130 -0
- featcat-0.1.0/tests/test_llm.py +134 -0
- featcat-0.1.0/tests/test_monitoring.py +223 -0
- featcat-0.1.0/tests/test_nl_query.py +117 -0
- featcat-0.1.0/tests/test_s3_storage.py +126 -0
- featcat-0.1.0/tests/test_storage.py +75 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, staging]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ci-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
lint:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v6
|
|
18
|
+
- uses: astral-sh/setup-uv@v7
|
|
19
|
+
- uses: actions/setup-python@v6
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
- run: uv pip install --system ruff
|
|
23
|
+
- run: ruff check .
|
|
24
|
+
- run: ruff format --check .
|
|
25
|
+
|
|
26
|
+
type-check:
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v6
|
|
30
|
+
- uses: astral-sh/setup-uv@v7
|
|
31
|
+
- uses: actions/setup-python@v6
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.12"
|
|
34
|
+
- run: uv pip install --system -e ".[dev]"
|
|
35
|
+
- run: mypy featcat/
|
|
36
|
+
|
|
37
|
+
test:
|
|
38
|
+
needs: lint
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
strategy:
|
|
41
|
+
matrix:
|
|
42
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/checkout@v6
|
|
45
|
+
- uses: astral-sh/setup-uv@v7
|
|
46
|
+
- uses: actions/setup-python@v6
|
|
47
|
+
with:
|
|
48
|
+
python-version: ${{ matrix.python-version }}
|
|
49
|
+
- run: uv pip install --system -e ".[dev,tui]"
|
|
50
|
+
- run: pytest --cov=featcat --cov-report=xml --cov-report=term-missing
|
|
51
|
+
- if: matrix.python-version == '3.12'
|
|
52
|
+
uses: codecov/codecov-action@v6
|
|
53
|
+
with:
|
|
54
|
+
files: coverage.xml
|
|
55
|
+
fail_ci_if_error: false
|
|
56
|
+
|
|
57
|
+
build-check:
|
|
58
|
+
runs-on: ubuntu-latest
|
|
59
|
+
steps:
|
|
60
|
+
- uses: actions/checkout@v6
|
|
61
|
+
- uses: astral-sh/setup-uv@v7
|
|
62
|
+
- uses: actions/setup-python@v6
|
|
63
|
+
with:
|
|
64
|
+
python-version: "3.12"
|
|
65
|
+
- run: uv pip install --system build twine
|
|
66
|
+
- run: python -m build
|
|
67
|
+
- run: twine check dist/*
|
|
68
|
+
- run: uv pip install --system dist/*.whl && featcat --help
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# SETUP REQUIRED (one-time):
|
|
2
|
+
# 1. Go to https://pypi.org/manage/account/publishing/
|
|
3
|
+
# Add trusted publisher:
|
|
4
|
+
# - Owner: codepawl
|
|
5
|
+
# - Repository: featcat
|
|
6
|
+
# - Workflow: publish.yml
|
|
7
|
+
# - Environment: pypi
|
|
8
|
+
#
|
|
9
|
+
# 2. Same for TestPyPI: https://test.pypi.org/manage/account/publishing/
|
|
10
|
+
# - Environment: testpypi
|
|
11
|
+
#
|
|
12
|
+
# 3. In GitHub repo Settings > Environments:
|
|
13
|
+
# - Create "pypi" environment with required reviewers (optional but recommended)
|
|
14
|
+
# - Create "testpypi" environment (no protection needed)
|
|
15
|
+
|
|
16
|
+
name: Publish to PyPI
|
|
17
|
+
|
|
18
|
+
on:
|
|
19
|
+
release:
|
|
20
|
+
types: [published]
|
|
21
|
+
|
|
22
|
+
jobs:
|
|
23
|
+
test:
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v6
|
|
27
|
+
- uses: astral-sh/setup-uv@v7
|
|
28
|
+
- uses: actions/setup-python@v6
|
|
29
|
+
with:
|
|
30
|
+
python-version: "3.12"
|
|
31
|
+
- run: uv pip install --system -e ".[dev,tui]"
|
|
32
|
+
- run: pytest --cov=featcat --cov-report=xml --cov-report=term-missing
|
|
33
|
+
|
|
34
|
+
build:
|
|
35
|
+
needs: test
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v6
|
|
39
|
+
- uses: astral-sh/setup-uv@v7
|
|
40
|
+
- uses: actions/setup-python@v6
|
|
41
|
+
with:
|
|
42
|
+
python-version: "3.12"
|
|
43
|
+
- run: uv pip install --system build
|
|
44
|
+
- run: python -m build
|
|
45
|
+
- uses: actions/upload-artifact@v7
|
|
46
|
+
with:
|
|
47
|
+
name: dist
|
|
48
|
+
path: dist/
|
|
49
|
+
|
|
50
|
+
publish-testpypi:
|
|
51
|
+
needs: build
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
environment: testpypi
|
|
54
|
+
permissions:
|
|
55
|
+
id-token: write
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/download-artifact@v8
|
|
58
|
+
with:
|
|
59
|
+
name: dist
|
|
60
|
+
path: dist/
|
|
61
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
62
|
+
with:
|
|
63
|
+
repository-url: https://test.pypi.org/legacy/
|
|
64
|
+
|
|
65
|
+
verify-testpypi:
|
|
66
|
+
needs: publish-testpypi
|
|
67
|
+
runs-on: ubuntu-latest
|
|
68
|
+
steps:
|
|
69
|
+
- run: sleep 30
|
|
70
|
+
- uses: astral-sh/setup-uv@v7
|
|
71
|
+
- uses: actions/setup-python@v6
|
|
72
|
+
with:
|
|
73
|
+
python-version: "3.12"
|
|
74
|
+
- run: uv pip install --system --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ featcat
|
|
75
|
+
- run: featcat --help
|
|
76
|
+
- name: Verify version matches tag
|
|
77
|
+
run: |
|
|
78
|
+
INSTALLED=$(python -c "import featcat; print(featcat.__version__)")
|
|
79
|
+
TAG="${GITHUB_REF_NAME#v}"
|
|
80
|
+
echo "Installed: $INSTALLED, Tag: $TAG"
|
|
81
|
+
[ "$INSTALLED" = "$TAG" ]
|
|
82
|
+
|
|
83
|
+
publish-pypi:
|
|
84
|
+
needs: verify-testpypi
|
|
85
|
+
runs-on: ubuntu-latest
|
|
86
|
+
environment: pypi
|
|
87
|
+
permissions:
|
|
88
|
+
id-token: write
|
|
89
|
+
steps:
|
|
90
|
+
- uses: actions/download-artifact@v8
|
|
91
|
+
with:
|
|
92
|
+
name: dist
|
|
93
|
+
path: dist/
|
|
94
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: Draft Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths:
|
|
7
|
+
- "featcat/__init__.py"
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
draft-release:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
contents: write
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v6
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0
|
|
18
|
+
|
|
19
|
+
- name: Extract version
|
|
20
|
+
id: version
|
|
21
|
+
run: |
|
|
22
|
+
VERSION=$(python -c "import re; print(re.search(r'__version__ = \"(.+)\"', open('featcat/__init__.py').read()).group(1))")
|
|
23
|
+
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
|
24
|
+
echo "Detected version: $VERSION"
|
|
25
|
+
|
|
26
|
+
- name: Check if tag exists
|
|
27
|
+
id: check_tag
|
|
28
|
+
run: |
|
|
29
|
+
if git rev-parse "v${{ steps.version.outputs.version }}" >/dev/null 2>&1; then
|
|
30
|
+
echo "exists=true" >> "$GITHUB_OUTPUT"
|
|
31
|
+
echo "Tag v${{ steps.version.outputs.version }} already exists, skipping."
|
|
32
|
+
else
|
|
33
|
+
echo "exists=false" >> "$GITHUB_OUTPUT"
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
- name: Extract changelog entry
|
|
37
|
+
if: steps.check_tag.outputs.exists == 'false'
|
|
38
|
+
id: changelog
|
|
39
|
+
run: |
|
|
40
|
+
VERSION="${{ steps.version.outputs.version }}"
|
|
41
|
+
# Extract the section for this version from CHANGELOG.md
|
|
42
|
+
BODY=$(awk "/^## \[$VERSION\]/{found=1; next} /^## \[/{if(found) exit} found{print}" CHANGELOG.md)
|
|
43
|
+
if [ -z "$BODY" ]; then
|
|
44
|
+
BODY="Release v$VERSION"
|
|
45
|
+
fi
|
|
46
|
+
# Write to file to preserve multiline
|
|
47
|
+
echo "$BODY" > /tmp/release_body.md
|
|
48
|
+
echo "Changelog entry extracted."
|
|
49
|
+
|
|
50
|
+
- name: Create draft release
|
|
51
|
+
if: steps.check_tag.outputs.exists == 'false'
|
|
52
|
+
uses: softprops/action-gh-release@v2
|
|
53
|
+
with:
|
|
54
|
+
tag_name: v${{ steps.version.outputs.version }}
|
|
55
|
+
name: "featcat v${{ steps.version.outputs.version }}"
|
|
56
|
+
body_path: /tmp/release_body.md
|
|
57
|
+
draft: true
|
featcat-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.egg
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
wheels/
|
|
11
|
+
*.whl
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# Testing
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
htmlcov/
|
|
21
|
+
.coverage
|
|
22
|
+
coverage.xml
|
|
23
|
+
|
|
24
|
+
# IDE
|
|
25
|
+
.vscode/
|
|
26
|
+
.idea/
|
|
27
|
+
*.swp
|
|
28
|
+
*.swo
|
|
29
|
+
*~
|
|
30
|
+
.DS_Store
|
|
31
|
+
|
|
32
|
+
# Project runtime
|
|
33
|
+
catalog.db
|
|
34
|
+
docs/features.md
|
|
35
|
+
docs/monitoring_report.md
|
|
36
|
+
.env
|
|
37
|
+
.env.local
|
|
38
|
+
|
|
39
|
+
# Parquet fixtures (keep test ones)
|
|
40
|
+
*.parquet
|
|
41
|
+
!tests/fixtures/*.parquet
|
|
42
|
+
|
|
43
|
+
# mypy
|
|
44
|
+
.mypy_cache/
|
|
45
|
+
|
|
46
|
+
# ruff
|
|
47
|
+
.ruff_cache/
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.9.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
10
|
+
rev: v5.0.0
|
|
11
|
+
hooks:
|
|
12
|
+
- id: trailing-whitespace
|
|
13
|
+
- id: end-of-file-fixer
|
|
14
|
+
- id: check-yaml
|
|
15
|
+
- id: check-toml
|
|
16
|
+
- id: check-added-large-files
|
|
17
|
+
args: [--maxkb=500]
|
|
18
|
+
- id: debug-statements
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-04-03
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **Catalog**: Register data sources (local + S3/MinIO), scan Parquet files, auto-extract schema and statistics
|
|
13
|
+
- **AI Discovery**: Describe a use case, get ranked feature recommendations and new feature suggestions via LLM
|
|
14
|
+
- **Auto-doc**: LLM-powered documentation generation for features, with batch processing and Markdown export
|
|
15
|
+
- **NL Query**: Natural language search across the feature catalog, with Vietnamese language detection and fuzzy fallback
|
|
16
|
+
- **Quality Monitoring**: PSI drift detection, null spike alerts, range violation checks, zero variance detection
|
|
17
|
+
- **LLM Backends**: Ollama and llama.cpp server support with streaming, retry logic, and JSON extraction
|
|
18
|
+
- **Response Caching**: SQLite-backed LLM response cache with configurable TTL per plugin
|
|
19
|
+
- **TUI**: Terminal UI with dashboard, feature browser, monitoring view, and AI chat (Textual)
|
|
20
|
+
- **S3 Support**: Read Parquet metadata directly from AWS S3 and MinIO via PyArrow S3FileSystem
|
|
21
|
+
- **CLI Commands**: `init`, `source add/list/scan`, `feature list/info/tag/search`, `discover`, `ask`, `doc generate/show/export/stats`, `monitor baseline/check/report`, `doctor`, `stats`, `export`, `cache stats/clear`, `ui`
|
|
22
|
+
- **Configuration**: Pydantic settings loaded from `FEATCAT_*` environment variables
|
|
23
|
+
- **Documentation**: README, setup guide, user guide, and admin guide (Vietnamese with English technical terms)
|
|
24
|
+
|
|
25
|
+
[0.1.0]: https://github.com/codepawl/featcat/releases/tag/v0.1.0
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Contributing to featcat
|
|
2
|
+
|
|
3
|
+
## Development Setup
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
# Clone the repo
|
|
7
|
+
git clone https://github.com/codepawl/featcat.git
|
|
8
|
+
cd featcat
|
|
9
|
+
|
|
10
|
+
# Create virtual environment
|
|
11
|
+
uv venv && source .venv/bin/activate
|
|
12
|
+
|
|
13
|
+
# Install with dev dependencies + pre-commit hooks
|
|
14
|
+
make install
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Running Tests
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Run all tests
|
|
21
|
+
make test
|
|
22
|
+
|
|
23
|
+
# Run with coverage report
|
|
24
|
+
make test-cov
|
|
25
|
+
|
|
26
|
+
# Run a specific test file
|
|
27
|
+
pytest tests/test_catalog.py -v
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Code Quality
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Lint (check only)
|
|
34
|
+
make lint
|
|
35
|
+
|
|
36
|
+
# Auto-format + auto-fix
|
|
37
|
+
make format
|
|
38
|
+
|
|
39
|
+
# Type checking
|
|
40
|
+
make type-check
|
|
41
|
+
|
|
42
|
+
# Run all checks (lint + type-check + test)
|
|
43
|
+
make check
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## PR Workflow
|
|
47
|
+
|
|
48
|
+
1. Fork the repository
|
|
49
|
+
2. Create a feature branch: `git checkout -b feat/my-feature`
|
|
50
|
+
3. Make your changes
|
|
51
|
+
4. Run checks: `make check`
|
|
52
|
+
5. Commit with a descriptive message (see convention below)
|
|
53
|
+
6. Push and open a Pull Request
|
|
54
|
+
7. Wait for CI to pass and a review
|
|
55
|
+
|
|
56
|
+
## Commit Message Convention
|
|
57
|
+
|
|
58
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/):
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
feat: add S3 support for MinIO endpoints
|
|
62
|
+
fix: handle null stats in monitoring PSI computation
|
|
63
|
+
docs: update admin guide with S3 troubleshooting
|
|
64
|
+
chore: bump pyarrow to 16.0
|
|
65
|
+
refactor: simplify JSON extraction in LLM base
|
|
66
|
+
test: add integration tests for autodoc batch mode
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Code Style
|
|
70
|
+
|
|
71
|
+
- **Formatter/Linter**: ruff (enforced via pre-commit and CI)
|
|
72
|
+
- **Type hints**: Required for all public functions
|
|
73
|
+
- **Line length**: 120 characters
|
|
74
|
+
- **Docstrings**: Required for modules and public classes/functions
|
|
75
|
+
- **Imports**: Sorted by ruff (isort-compatible)
|
|
76
|
+
|
|
77
|
+
## Project Structure
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
featcat/
|
|
81
|
+
├── catalog/ # Data models, SQLite DB, Parquet scanner, storage backends
|
|
82
|
+
├── llm/ # LLM abstraction layer (Ollama, llama.cpp, caching)
|
|
83
|
+
├── plugins/ # AI plugins (discovery, autodoc, monitoring, NL query)
|
|
84
|
+
├── utils/ # Prompts, catalog context formatters, statistics, cache
|
|
85
|
+
├── tui/ # Terminal UI (Textual screens and widgets)
|
|
86
|
+
├── config.py # Pydantic settings
|
|
87
|
+
└── cli.py # Typer CLI entry point
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Releasing
|
|
91
|
+
|
|
92
|
+
1. Update version in `featcat/__init__.py`
|
|
93
|
+
2. Update `CHANGELOG.md`
|
|
94
|
+
3. Commit: `git commit -am "release: v0.x.0"`
|
|
95
|
+
4. Push to main — a draft GitHub Release is auto-created
|
|
96
|
+
5. Review the draft, click "Publish"
|
|
97
|
+
6. The publish workflow handles TestPyPI verification and PyPI upload
|
featcat-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Codepawl
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
featcat-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
.PHONY: install lint format type-check test test-cov build clean check release-check
|
|
2
|
+
|
|
3
|
+
install:
|
|
4
|
+
pip install -e ".[dev,tui,s3]"
|
|
5
|
+
pre-commit install
|
|
6
|
+
|
|
7
|
+
lint:
|
|
8
|
+
ruff check .
|
|
9
|
+
ruff format --check .
|
|
10
|
+
|
|
11
|
+
format:
|
|
12
|
+
ruff check --fix .
|
|
13
|
+
ruff format .
|
|
14
|
+
|
|
15
|
+
type-check:
|
|
16
|
+
mypy featcat/
|
|
17
|
+
|
|
18
|
+
test:
|
|
19
|
+
pytest
|
|
20
|
+
|
|
21
|
+
test-cov:
|
|
22
|
+
pytest --cov=featcat --cov-report=html
|
|
23
|
+
@echo "Open htmlcov/index.html to view coverage report"
|
|
24
|
+
|
|
25
|
+
build:
|
|
26
|
+
python -m build
|
|
27
|
+
|
|
28
|
+
clean:
|
|
29
|
+
rm -rf dist/ build/ *.egg-info .pytest_cache .mypy_cache .ruff_cache htmlcov .coverage coverage.xml
|
|
30
|
+
|
|
31
|
+
check: lint type-check test
|
|
32
|
+
|
|
33
|
+
release-check: clean check build
|
|
34
|
+
twine check dist/*
|
featcat-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: featcat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-Powered Feature Catalog for Data Science teams
|
|
5
|
+
Project-URL: Homepage, https://github.com/codepawl/featcat
|
|
6
|
+
Project-URL: Documentation, https://github.com/codepawl/featcat/tree/main/docs
|
|
7
|
+
Project-URL: Repository, https://github.com/codepawl/featcat
|
|
8
|
+
Project-URL: Issues, https://github.com/codepawl/featcat/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/codepawl/featcat/blob/main/CHANGELOG.md
|
|
10
|
+
Author-email: Codepawl <hi@codepawl.dev>
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: ai,data-science,feature-catalog,feature-store,mlops
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: httpx>=0.25
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: pyarrow>=14.0
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0
|
|
27
|
+
Requires-Dist: rapidfuzz>=3.0
|
|
28
|
+
Requires-Dist: rich>=13.0
|
|
29
|
+
Requires-Dist: scipy>=1.11
|
|
30
|
+
Requires-Dist: typer>=0.9
|
|
31
|
+
Provides-Extra: all
|
|
32
|
+
Requires-Dist: s3fs>=2024.0; extra == 'all'
|
|
33
|
+
Requires-Dist: textual>=0.50; extra == 'all'
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: boto3>=1.28; extra == 'dev'
|
|
36
|
+
Requires-Dist: moto[s3]>=5.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
38
|
+
Requires-Dist: pre-commit>=3.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: pytest-tmp-files>=0.0.2; extra == 'dev'
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
43
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
44
|
+
Provides-Extra: s3
|
|
45
|
+
Requires-Dist: s3fs>=2024.0; extra == 's3'
|
|
46
|
+
Provides-Extra: tui
|
|
47
|
+
Requires-Dist: textual>=0.50; extra == 'tui'
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
# featcat
|
|
51
|
+
|
|
52
|
+

|
|
53
|
+

|
|
54
|
+

|
|
55
|
+

|
|
56
|
+
|
|
57
|
+
**AI-Powered Feature Catalog for Data Science Teams**
|
|
58
|
+
|
|
59
|
+
[Tiếng Việt](docs/README-vi.md)
|
|
60
|
+
|
|
61
|
+
featcat is a lightweight Feature Catalog designed for Data Science teams. It is **not** a Feature Store (no online serving) — it's a metadata management tool with an AI layer for searching, documenting, and monitoring feature quality.
|
|
62
|
+
|
|
63
|
+
## The Problem
|
|
64
|
+
|
|
65
|
+
- **Features scattered everywhere**: Parquet files stored across local disks, S3, and MinIO — nobody knows what features exist
|
|
66
|
+
- **Missing documentation**: Dataset columns have no descriptions; new team members don't know what `avg_session_duration` means
|
|
67
|
+
- **Hard to find the right features**: Starting a new project (e.g. churn prediction) with no idea which features are already available
|
|
68
|
+
- **Undetected data drift**: Feature distributions change silently until model performance degrades
|
|
69
|
+
|
|
70
|
+
## Key Features
|
|
71
|
+
|
|
72
|
+
| Module | Description | Phase |
|
|
73
|
+
|--------|-------------|-------|
|
|
74
|
+
| **Catalog** | Register data sources, scan Parquet to auto-extract schema + stats | 1 |
|
|
75
|
+
| **AI Discovery** | Describe a use case → AI recommends relevant features + suggests new ones | 2 |
|
|
76
|
+
| **Auto-doc** | LLM automatically generates documentation for each feature | 2 |
|
|
77
|
+
| **NL Query** | Ask in natural language (English or Vietnamese), AI finds relevant features | 2 |
|
|
78
|
+
| **Monitoring** | PSI drift detection, null spikes, range violations | 3 |
|
|
79
|
+
| **TUI** | Terminal UI with dashboard, feature browser, AI chat | 3 |
|
|
80
|
+
| **S3 Support** | Read Parquet directly from S3/MinIO — never copies data locally | 1 |
|
|
81
|
+
| **Caching** | Cache LLM responses to speed up doc generation and NL queries | 3 |
|
|
82
|
+
|
|
83
|
+
## Quick Start
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
# 1. Clone and install
|
|
87
|
+
git clone https://github.com/codepawl/featcat.git && cd featcat
|
|
88
|
+
uv venv && source .venv/bin/activate
|
|
89
|
+
uv pip install -e ".[dev]"
|
|
90
|
+
|
|
91
|
+
# 2. Initialize catalog
|
|
92
|
+
featcat init
|
|
93
|
+
|
|
94
|
+
# 3. Register and scan a data source
|
|
95
|
+
featcat source add device_perf /data/features/device_performance.parquet
|
|
96
|
+
featcat source scan device_perf
|
|
97
|
+
|
|
98
|
+
# 4. Browse features
|
|
99
|
+
featcat feature list
|
|
100
|
+
featcat feature info device_perf.cpu_usage
|
|
101
|
+
|
|
102
|
+
# 5. (Optional) Enable AI features — requires Ollama
|
|
103
|
+
ollama serve &
|
|
104
|
+
ollama pull qwen2.5:7b
|
|
105
|
+
featcat discover "churn prediction for telecom customers"
|
|
106
|
+
featcat ask "features related to user behavior"
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## TUI (Terminal UI)
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
uv pip install -e ".[tui]"
|
|
113
|
+
featcat ui
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
<!-- Screenshot placeholder -->
|
|
117
|
+
<!--  -->
|
|
118
|
+
|
|
119
|
+
Keybindings: `D` Dashboard | `F` Features | `M` Monitor | `C` Chat | `Q` Quit | `?` Help
|
|
120
|
+
|
|
121
|
+
## System Health Check
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
featcat doctor
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
[x] Python 3.10+
|
|
129
|
+
[x] SQLite catalog exists (catalog.db)
|
|
130
|
+
[x] Ollama running at localhost:11434
|
|
131
|
+
[x] Model qwen2.5:7b available
|
|
132
|
+
[x] 14 features registered
|
|
133
|
+
[x] 10 features have docs (71.4%)
|
|
134
|
+
[ ] 2 features have drift warnings
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Tech Stack
|
|
138
|
+
|
|
139
|
+
- **Python 3.10+** | **SQLite** (metadata only, never copies data)
|
|
140
|
+
- **Typer** + **Rich** (CLI) | **Textual** (TUI)
|
|
141
|
+
- **PyArrow** (Parquet schema + stats) | **s3fs** (S3/MinIO)
|
|
142
|
+
- **Ollama** (local LLM) | **Pydantic** (models + config)
|
|
143
|
+
|
|
144
|
+
## Project Structure
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
featcat/
|
|
148
|
+
├── catalog/ # Models, DB, scanner, storage backends
|
|
149
|
+
├── llm/ # LLM abstraction (Ollama, llama.cpp)
|
|
150
|
+
├── plugins/ # Discovery, Autodoc, Monitoring, NL Query
|
|
151
|
+
├── utils/ # Prompts, catalog context, statistics, cache
|
|
152
|
+
├── tui/ # Textual TUI (screens, widgets)
|
|
153
|
+
├── config.py # Pydantic settings
|
|
154
|
+
└── cli.py # Typer CLI entry point
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
MIT
|