kitefs 0.2.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: kitefs
3
+ Version: 0.2.0a1
4
+ Summary: A Python feature store library for offline/online feature storage, registry, validation, and serving
5
+ Author: Fedai Paca
6
+ Author-email: Fedai Paca <fedaipaca@gmail.com>
7
+ License-Expression: Apache-2.0
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Dist: click>=8.3.2
12
+ Requires-Dist: pandas>=3.0.2
13
+ Requires-Dist: pyarrow>=24.0.0
14
+ Requires-Dist: pyyaml>=6.0
15
+ Requires-Dist: boto3 ; extra == 'aws'
16
+ Requires-Python: >=3.12
17
+ Project-URL: Homepage, https://github.com/fedaipaca/kitefs
18
+ Project-URL: Repository, https://github.com/fedaipaca/kitefs
19
+ Project-URL: Issues, https://github.com/fedaipaca/kitefs/issues
20
+ Provides-Extra: aws
21
+ Description-Content-Type: text/markdown
22
+
23
+ # kitefs
24
+
25
+ A Python feature store library for offline/online feature storage, registry, materialization, validation, and serving. Designed for Machine Learning practitioners who need feature management with minimal operational overhead.
26
+
27
+ - **SDK**: Python API for notebooks, scripts, and applications (`from kitefs import FeatureStore`)
28
+ - **CLI**: Command-line interface (`kitefs init`, `kitefs apply`, `kitefs ingest`, etc.)
29
+ - **Library-first**: No running server, no Docker — just `pip install` and go
30
+
31
+ ## Install from TestPyPI
32
+
33
+ > TestPyPI is a separate package index used for testing. The `--extra-index-url` flag tells pip to fetch dependencies (like `click` and `pyyaml`) from the real PyPI, since TestPyPI may not have them.
34
+
35
+ ```bash
36
+ pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ kitefs==0.1.0
37
+ ```
38
+
39
+ Verify the installation:
40
+
41
+ ```bash
42
+ # Should display the available commands
43
+ kitefs --help
44
+
45
+ # Create a new KiteFS project in the current directory
46
+ kitefs init
47
+ ```
48
+
49
+ ## Prerequisites
50
+
51
+ - **Python 3.12+**
52
+ - **[uv](https://docs.astral.sh/uv/)** — Fast Python package manager
53
+ - **[just](https://github.com/casey/just)** — Command runner (like `make`, but simpler)
54
+
55
+ ### Install uv
56
+
57
+ Follow the instructions at https://github.com/astral-sh/uv?tab=readme-ov-file#installation to install `uv` for your platform.
58
+
59
+ ### Install just
60
+
61
+ Follow the instructions at https://github.com/astral-sh/uv?tab=readme-ov-file#installation to install `just` for your platform.
62
+
63
+ ## Getting Started
64
+
65
+ ```bash
66
+ # To clone the repository and set up the project, run:
67
+ git clone https://github.com/fedaipaca/kitefs.git
68
+ cd kitefs
69
+
70
+ # To install dependencies, run:
71
+ uv sync
72
+
73
+ # To list available commands, run:
74
+ just
75
+ ```
76
+
77
+ ## Available Commands
78
+
79
+ Run `just` with no arguments to see all commands:
80
+
81
+ ```bash
82
+ just
83
+ ```
84
+
85
+ | Command | Description |
86
+ | ----------------- | ---------------------------------------- |
87
+ | `just dev` | Run the project locally |
88
+ | `just test` | Run all tests |
89
+ | `just lint` | Check code for lint issues |
90
+ | `just format` | Auto-format code |
91
+ | `just fix` | Auto-fix lint issues |
92
+ | `just check` | Run lint + tests (quick pre-commit check)|
93
+ | `just build` | Build the package |
94
+ | `just clean` | Remove build artifacts and caches |
95
+ | `just clean-build`| Clean then build from scratch |
96
+
97
+ ## Project Structure
98
+
99
+ ```
100
+ kitefs/
101
+ ├── src/
102
+ │ └── kitefs/ # Directory contains source code files
103
+ ├── tests/ # Directory contains test files
104
+ ├── docs/ # Design documents
105
+ ├── pyproject.toml # Project config, dependencies, tool settings
106
+ ├── justfile # Task runner commands
107
+ └── uv.lock # Locked dependencies
108
+ ```
@@ -0,0 +1,86 @@
1
+ # kitefs
2
+
3
+ A Python feature store library for offline/online feature storage, registry, materialization, validation, and serving. Designed for Machine Learning practitioners who need feature management with minimal operational overhead.
4
+
5
+ - **SDK**: Python API for notebooks, scripts, and applications (`from kitefs import FeatureStore`)
6
+ - **CLI**: Command-line interface (`kitefs init`, `kitefs apply`, `kitefs ingest`, etc.)
7
+ - **Library-first**: No running server, no Docker — just `pip install` and go
8
+
9
+ ## Install from TestPyPI
10
+
11
+ > TestPyPI is a separate package index used for testing. The `--extra-index-url` flag tells pip to fetch dependencies (like `click` and `pyyaml`) from the real PyPI, since TestPyPI may not have them.
12
+
13
+ ```bash
14
+ pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ kitefs==0.1.0
15
+ ```
16
+
17
+ Verify the installation:
18
+
19
+ ```bash
20
+ # Should display the available commands
21
+ kitefs --help
22
+
23
+ # Create a new KiteFS project in the current directory
24
+ kitefs init
25
+ ```
26
+
27
+ ## Prerequisites
28
+
29
+ - **Python 3.12+**
30
+ - **[uv](https://docs.astral.sh/uv/)** — Fast Python package manager
31
+ - **[just](https://github.com/casey/just)** — Command runner (like `make`, but simpler)
32
+
33
+ ### Install uv
34
+
35
+ Follow the instructions at https://github.com/astral-sh/uv?tab=readme-ov-file#installation to install `uv` for your platform.
36
+
37
+ ### Install just
38
+
39
+ Follow the instructions at https://github.com/astral-sh/uv?tab=readme-ov-file#installation to install `just` for your platform.
40
+
41
+ ## Getting Started
42
+
43
+ ```bash
44
+ # To clone the repository and set up the project, run:
45
+ git clone https://github.com/fedaipaca/kitefs.git
46
+ cd kitefs
47
+
48
+ # To install dependencies, run:
49
+ uv sync
50
+
51
+ # To list available commands, run:
52
+ just
53
+ ```
54
+
55
+ ## Available Commands
56
+
57
+ Run `just` with no arguments to see all commands:
58
+
59
+ ```bash
60
+ just
61
+ ```
62
+
63
+ | Command | Description |
64
+ | ----------------- | ---------------------------------------- |
65
+ | `just dev` | Run the project locally |
66
+ | `just test` | Run all tests |
67
+ | `just lint` | Check code for lint issues |
68
+ | `just format` | Auto-format code |
69
+ | `just fix` | Auto-fix lint issues |
70
+ | `just check` | Run lint + tests (quick pre-commit check)|
71
+ | `just build` | Build the package |
72
+ | `just clean` | Remove build artifacts and caches |
73
+ | `just clean-build`| Clean then build from scratch |
74
+
75
+ ## Project Structure
76
+
77
+ ```
78
+ kitefs/
79
+ ├── src/
80
+ │ └── kitefs/ # Directory contains source code files
81
+ ├── tests/ # Directory contains test files
82
+ ├── docs/ # Design documents
83
+ ├── pyproject.toml # Project config, dependencies, tool settings
84
+ ├── justfile # Task runner commands
85
+ └── uv.lock # Locked dependencies
86
+ ```
@@ -0,0 +1,67 @@
1
+ [project]
2
+ name = "kitefs"
3
+ version = "0.2.0a1"
4
+ description = "A Python feature store library for offline/online feature storage, registry, validation, and serving"
5
+ readme = "README.md"
6
+ license = "Apache-2.0"
7
+ authors = [
8
+ { name = "Fedai Paca", email = "fedaipaca@gmail.com" }
9
+ ]
10
+ requires-python = ">=3.12"
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.12",
15
+ ]
16
+ dependencies = [
17
+ "click>=8.3.2",
18
+ "pandas>=3.0.2",
19
+ "pyarrow>=24.0.0",
20
+ "pyyaml>=6.0",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ aws = ["boto3"]
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/fedaipaca/kitefs"
28
+ Repository = "https://github.com/fedaipaca/kitefs"
29
+ Issues = "https://github.com/fedaipaca/kitefs/issues"
30
+
31
+ [project.scripts]
32
+ kitefs = "kitefs.cli:cli"
33
+
34
+ [build-system]
35
+ requires = ["uv_build>=0.11.1,<0.12.0"]
36
+ build-backend = "uv_build"
37
+
38
+ [tool.ruff]
39
+ line-length = 120
40
+ target-version = "py312"
41
+
42
+ [tool.ruff.lint]
43
+ select = ["E", "W", "F", "I", "N", "UP", "B", "SIM", "RUF"]
44
+
45
+ [tool.ruff.format]
46
+ quote-style = "double"
47
+ indent-style = "space"
48
+ docstring-code-format = true
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
52
+ pythonpath = ["src", "tests"]
53
+ addopts = "-v --tb=short"
54
+
55
+ [tool.pyright]
56
+ include = ["src", "tests"]
57
+ exclude = ["build", "dist", "**/node_modules", "**/__pycache__", "**/.*", ".venv"]
58
+ pythonVersion = "3.12"
59
+ typeCheckingMode = "basic"
60
+ extraPaths = ["src"]
61
+
62
+ [dependency-groups]
63
+ dev = [
64
+ "pyright>=1.1.408",
65
+ "pytest>=9.0.3",
66
+ "ruff>=0.15.10",
67
+ ]
@@ -0,0 +1,31 @@
1
+ """KiteFS — a Python feature store for offline/online feature storage and serving."""
2
+
3
+ from kitefs.definitions import (
4
+ EntityKey,
5
+ EventTimestamp,
6
+ Expect,
7
+ Feature,
8
+ FeatureGroup,
9
+ FeatureType,
10
+ JoinKey,
11
+ Metadata,
12
+ StorageTarget,
13
+ ValidationMode,
14
+ )
15
+ from kitefs.feature_store import FeatureStore
16
+ from kitefs.registry import ApplyResult
17
+
18
+ __all__ = [
19
+ "ApplyResult",
20
+ "EntityKey",
21
+ "EventTimestamp",
22
+ "Expect",
23
+ "Feature",
24
+ "FeatureGroup",
25
+ "FeatureStore",
26
+ "FeatureType",
27
+ "JoinKey",
28
+ "Metadata",
29
+ "StorageTarget",
30
+ "ValidationMode",
31
+ ]
@@ -0,0 +1,5 @@
1
+ """Entry point for `python -m kitefs`."""
2
+
3
+ from kitefs.cli import cli
4
+
5
+ cli()
@@ -0,0 +1,253 @@
1
+ """CLI entry point for KiteFS — thin delegation layer over the SDK.
2
+
3
+ ``kitefs init`` is the only self-contained command because the project
4
+ scaffold (including ``kitefs.yaml``) does not exist yet when it runs.
5
+ All other commands delegate to :class:`kitefs.FeatureStore`.
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+
11
+ import click
12
+
13
+ _GITIGNORE_ENTRY = "feature_store/data/"
14
+
15
+ _DEFAULT_CONFIG = """\
16
+ provider: local
17
+ storage_root: ./feature_store/
18
+ """
19
+
20
+ _EXAMPLE_FEATURES = '''\
21
+ """Example feature group definitions for KiteFS.
22
+
23
+ Uncomment and modify the example below, then run ``kitefs apply``
24
+ to register your feature groups.
25
+ """
26
+
27
+ # from kitefs import (
28
+ # EntityKey,
29
+ # EventTimestamp,
30
+ # Expect,
31
+ # Feature,
32
+ # FeatureGroup,
33
+ # FeatureType,
34
+ # Metadata,
35
+ # StorageTarget,
36
+ # ValidationMode,
37
+ # )
38
+ #
39
+ # example_features = FeatureGroup(
40
+ # name="example_features",
41
+ # storage_target=StorageTarget.OFFLINE,
42
+ # entity_key=EntityKey(name="entity_id", dtype=FeatureType.INTEGER),
43
+ # event_timestamp=EventTimestamp(name="event_timestamp", dtype=FeatureType.DATETIME),
44
+ # features=[
45
+ # Feature(name="feature_one", dtype=FeatureType.FLOAT, expect=Expect().not_null()),
46
+ # Feature(name="feature_two", dtype=FeatureType.STRING),
47
+ # ],
48
+ # ingestion_validation=ValidationMode.ERROR,
49
+ # metadata=Metadata(owner="your-team", description="An example feature group."),
50
+ # )
51
+ '''
52
+
53
+ _SEED_REGISTRY = {"version": "1.0", "feature_groups": {}}
54
+
55
+
56
+ @click.group()
57
+ def cli() -> None:
58
+ """KiteFS — a Python feature store for offline/online feature storage and serving."""
59
+
60
+
61
+ @cli.command()
62
+ @click.argument("path", required=False, default=None, type=click.Path(file_okay=False))
63
+ def init(path: str | None) -> None:
64
+ """Create a new KiteFS project at PATH (default: current directory)."""
65
+ project_root = Path(path).resolve() if path else Path.cwd().resolve()
66
+ config_path = project_root / "kitefs.yaml"
67
+
68
+ if config_path.exists():
69
+ click.echo("Error: KiteFS project already initialized at this location.", err=True)
70
+ raise SystemExit(1)
71
+
72
+ storage_root = project_root / "feature_store"
73
+
74
+ try:
75
+ # Create directory structure
76
+ (storage_root / "definitions").mkdir(parents=True, exist_ok=True)
77
+ (storage_root / "data" / "offline_store").mkdir(parents=True, exist_ok=True)
78
+ (storage_root / "data" / "online_store").mkdir(parents=True, exist_ok=True)
79
+
80
+ # Seed definitions
81
+ (storage_root / "definitions" / "__init__.py").write_text("", encoding="utf-8")
82
+ (storage_root / "definitions" / "example_features.py").write_text(_EXAMPLE_FEATURES, encoding="utf-8")
83
+
84
+ # Seed registry.json — deterministic output for meaningful Git diffs
85
+ registry_path = storage_root / "registry.json"
86
+ registry_path.write_text(json.dumps(_SEED_REGISTRY, sort_keys=True, indent=2) + "\n", encoding="utf-8")
87
+
88
+ # Create or append .gitignore — check by exact line, not substring, to avoid
89
+ # false positives from comments or negated rules containing the entry.
90
+ gitignore_path = project_root / ".gitignore"
91
+ if gitignore_path.exists():
92
+ content = gitignore_path.read_text(encoding="utf-8")
93
+ existing_lines = {line.strip() for line in content.splitlines()}
94
+ if _GITIGNORE_ENTRY not in existing_lines:
95
+ with gitignore_path.open("a", encoding="utf-8") as f:
96
+ if content and not content.endswith("\n"):
97
+ f.write("\n")
98
+ f.write(_GITIGNORE_ENTRY + "\n")
99
+ else:
100
+ gitignore_path.write_text(_GITIGNORE_ENTRY + "\n", encoding="utf-8")
101
+
102
+ # Seed kitefs.yaml last — this is the sentinel file that guards against
103
+ # re-init. Writing it last ensures a crash mid-scaffold leaves no sentinel,
104
+ # so the user can retry `kitefs init` without manual cleanup.
105
+ config_path.write_text(_DEFAULT_CONFIG, encoding="utf-8")
106
+ except OSError as e:
107
+ click.echo(f"Error: {e}", err=True)
108
+ raise SystemExit(1) from None
109
+
110
+ click.echo(f"Project initialized at {project_root}")
111
+ click.echo(" Provider: local")
112
+ click.echo(f" Config: {config_path}")
113
+
114
+
115
+ @cli.command()
116
+ def apply() -> None:
117
+ """Register feature group definitions into the registry."""
118
+ from kitefs.exceptions import KiteFSError
119
+ from kitefs.feature_store import FeatureStore
120
+
121
+ try:
122
+ fs = FeatureStore()
123
+ result = fs.apply()
124
+ except KiteFSError as e:
125
+ click.echo(f"Error: {e}", err=True)
126
+ raise SystemExit(1) from None
127
+
128
+ click.echo(f"Applied {result.group_count} feature group(s) — registered successfully.")
129
+
130
+
131
+ @cli.command(name="list")
132
+ @click.option("--format", "fmt", default=None, type=click.Choice(["json"], case_sensitive=False), help="Output format.")
133
+ @click.option("--target", default=None, type=click.Path(), help="File path to write output to.")
134
+ def list_cmd(fmt: str | None, target: str | None) -> None:
135
+ """List all registered feature groups with summary information."""
136
+ from kitefs.exceptions import KiteFSError
137
+ from kitefs.feature_store import FeatureStore
138
+
139
+ try:
140
+ fs = FeatureStore()
141
+ result = fs.list_feature_groups(format=fmt, target=target)
142
+ except KiteFSError as e:
143
+ click.echo(f"Error: {e}", err=True)
144
+ raise SystemExit(1) from None
145
+
146
+ if target is not None:
147
+ click.echo(f"Output written to {target}")
148
+ return
149
+
150
+ if fmt == "json":
151
+ click.echo(result)
152
+ return
153
+
154
+ # Default: human-readable table.
155
+ assert isinstance(result, list) # target/format branches already returned
156
+ if not result:
157
+ click.echo("No feature groups registered. Run `kitefs apply` first.")
158
+ return
159
+
160
+ _render_list_table(result)
161
+
162
+
163
+ def _render_list_table(summaries: list[dict]) -> None:
164
+ """Render feature group summaries as a human-readable table."""
165
+ headers = ["Name", "Owner", "Entity Key", "Storage Target", "Features"]
166
+ keys = ["name", "owner", "entity_key", "storage_target", "feature_count"]
167
+
168
+ rows: list[list[str]] = []
169
+ for s in summaries:
170
+ rows.append([str(s.get(k) or "") for k in keys])
171
+
172
+ # Compute column widths from headers and data.
173
+ widths = [len(h) for h in headers]
174
+ for row in rows:
175
+ for i, cell in enumerate(row):
176
+ widths[i] = max(widths[i], len(cell))
177
+
178
+ def _fmt_row(cells: list[str]) -> str:
179
+ return " ".join(cell.ljust(widths[i]) for i, cell in enumerate(cells))
180
+
181
+ click.echo(_fmt_row(headers))
182
+ click.echo(" ".join("-" * w for w in widths))
183
+ for row in rows:
184
+ click.echo(_fmt_row(row))
185
+
186
+
187
+ @cli.command()
188
+ @click.argument("feature_group_name")
189
+ @click.option("--format", "fmt", default=None, type=click.Choice(["json"], case_sensitive=False), help="Output format.")
190
+ @click.option("--target", default=None, type=click.Path(), help="File path to write output to.")
191
+ def describe(feature_group_name: str, fmt: str | None, target: str | None) -> None:
192
+ """Display the full definition of a specific feature group."""
193
+ from kitefs.exceptions import KiteFSError
194
+ from kitefs.feature_store import FeatureStore
195
+
196
+ try:
197
+ fs = FeatureStore()
198
+ result = fs.describe_feature_group(feature_group_name, format=fmt, target=target)
199
+ except KiteFSError as e:
200
+ click.echo(f"Error: {e}", err=True)
201
+ raise SystemExit(1) from None
202
+
203
+ if target is not None:
204
+ click.echo(f"Output written to {target}")
205
+ return
206
+
207
+ if fmt == "json":
208
+ click.echo(result)
209
+ return
210
+
211
+ # Default: human-readable key-value layout.
212
+ assert isinstance(result, dict) # target/format branches already returned
213
+ _render_describe(result)
214
+
215
+
216
+ def _render_describe(entry: dict) -> None:
217
+ """Render a full feature group definition as a human-readable layout."""
218
+ click.echo(f"Feature Group: {entry.get('name', '?')}")
219
+ click.echo(f" Storage Target: {entry.get('storage_target', '?')}")
220
+
221
+ ek = entry.get("entity_key", {})
222
+ click.echo(f" Entity Key: {ek.get('name', '?')} ({ek.get('dtype', '?')})")
223
+
224
+ et = entry.get("event_timestamp", {})
225
+ click.echo(f" Event Timestamp: {et.get('name', '?')} ({et.get('dtype', '?')})")
226
+
227
+ click.echo(f" Ingestion Validation: {entry.get('ingestion_validation', '?')}")
228
+ click.echo(f" Offline Retrieval Validation: {entry.get('offline_retrieval_validation', '?')}")
229
+
230
+ meta = entry.get("metadata") or {}
231
+ if meta.get("owner"):
232
+ click.echo(f" Owner: {meta['owner']}")
233
+ if meta.get("description"):
234
+ click.echo(f" Description: {meta['description']}")
235
+ if meta.get("tags"):
236
+ click.echo(f" Tags: {meta['tags']}")
237
+
238
+ click.echo(f" Applied At: {entry.get('applied_at', '?')}")
239
+ click.echo(f" Last Materialized At: {entry.get('last_materialized_at', 'None')}")
240
+
241
+ features = entry.get("features", [])
242
+ click.echo(f" Features ({len(features)}):")
243
+ for f in features:
244
+ expect_str = ""
245
+ if f.get("expect"):
246
+ expect_str = f" expect={f['expect']}"
247
+ click.echo(f" - {f['name']} ({f.get('dtype', '?')}){expect_str}")
248
+
249
+ join_keys = entry.get("join_keys", [])
250
+ if join_keys:
251
+ click.echo(f" Join Keys ({len(join_keys)}):")
252
+ for jk in join_keys:
253
+ click.echo(f" - {jk['field_name']} -> {jk['referenced_group']}")