omgkit 2.19.3 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +537 -338
  2. package/package.json +2 -2
  3. package/plugin/agents/ai-architect-agent.md +282 -0
  4. package/plugin/agents/data-scientist-agent.md +221 -0
  5. package/plugin/agents/experiment-analyst-agent.md +318 -0
  6. package/plugin/agents/ml-engineer-agent.md +165 -0
  7. package/plugin/agents/mlops-engineer-agent.md +324 -0
  8. package/plugin/agents/model-optimizer-agent.md +287 -0
  9. package/plugin/agents/production-engineer-agent.md +360 -0
  10. package/plugin/agents/research-scientist-agent.md +274 -0
  11. package/plugin/commands/omgdata/augment.md +86 -0
  12. package/plugin/commands/omgdata/collect.md +81 -0
  13. package/plugin/commands/omgdata/label.md +83 -0
  14. package/plugin/commands/omgdata/split.md +83 -0
  15. package/plugin/commands/omgdata/validate.md +76 -0
  16. package/plugin/commands/omgdata/version.md +85 -0
  17. package/plugin/commands/omgdeploy/ab.md +94 -0
  18. package/plugin/commands/omgdeploy/cloud.md +89 -0
  19. package/plugin/commands/omgdeploy/edge.md +93 -0
  20. package/plugin/commands/omgdeploy/package.md +91 -0
  21. package/plugin/commands/omgdeploy/serve.md +92 -0
  22. package/plugin/commands/omgfeature/embed.md +93 -0
  23. package/plugin/commands/omgfeature/extract.md +93 -0
  24. package/plugin/commands/omgfeature/select.md +85 -0
  25. package/plugin/commands/omgfeature/store.md +97 -0
  26. package/plugin/commands/omgml/init.md +60 -0
  27. package/plugin/commands/omgml/status.md +82 -0
  28. package/plugin/commands/omgops/drift.md +87 -0
  29. package/plugin/commands/omgops/monitor.md +99 -0
  30. package/plugin/commands/omgops/pipeline.md +102 -0
  31. package/plugin/commands/omgops/registry.md +109 -0
  32. package/plugin/commands/omgops/retrain.md +91 -0
  33. package/plugin/commands/omgoptim/distill.md +90 -0
  34. package/plugin/commands/omgoptim/profile.md +92 -0
  35. package/plugin/commands/omgoptim/prune.md +81 -0
  36. package/plugin/commands/omgoptim/quantize.md +83 -0
  37. package/plugin/commands/omgtrain/baseline.md +78 -0
  38. package/plugin/commands/omgtrain/compare.md +99 -0
  39. package/plugin/commands/omgtrain/evaluate.md +85 -0
  40. package/plugin/commands/omgtrain/train.md +81 -0
  41. package/plugin/commands/omgtrain/tune.md +89 -0
  42. package/plugin/registry.yaml +252 -2
  43. package/plugin/skills/ml-systems/SKILL.md +65 -0
  44. package/plugin/skills/ml-systems/ai-accelerators/SKILL.md +342 -0
  45. package/plugin/skills/ml-systems/data-eng/SKILL.md +126 -0
  46. package/plugin/skills/ml-systems/deep-learning-primer/SKILL.md +143 -0
  47. package/plugin/skills/ml-systems/deployment-paradigms/SKILL.md +148 -0
  48. package/plugin/skills/ml-systems/dnn-architectures/SKILL.md +128 -0
  49. package/plugin/skills/ml-systems/edge-deployment/SKILL.md +366 -0
  50. package/plugin/skills/ml-systems/efficient-ai/SKILL.md +316 -0
  51. package/plugin/skills/ml-systems/feature-engineering/SKILL.md +151 -0
  52. package/plugin/skills/ml-systems/ml-frameworks/SKILL.md +187 -0
  53. package/plugin/skills/ml-systems/ml-serving-optimization/SKILL.md +371 -0
  54. package/plugin/skills/ml-systems/ml-systems-fundamentals/SKILL.md +103 -0
  55. package/plugin/skills/ml-systems/ml-workflow/SKILL.md +162 -0
  56. package/plugin/skills/ml-systems/mlops/SKILL.md +386 -0
  57. package/plugin/skills/ml-systems/model-deployment/SKILL.md +350 -0
  58. package/plugin/skills/ml-systems/model-dev/SKILL.md +160 -0
  59. package/plugin/skills/ml-systems/model-optimization/SKILL.md +339 -0
  60. package/plugin/skills/ml-systems/robust-ai/SKILL.md +395 -0
  61. package/plugin/skills/ml-systems/training-data/SKILL.md +152 -0
  62. package/plugin/workflows/ml-systems/data-preparation-workflow.md +276 -0
  63. package/plugin/workflows/ml-systems/edge-deployment-workflow.md +413 -0
  64. package/plugin/workflows/ml-systems/full-ml-lifecycle-workflow.md +405 -0
  65. package/plugin/workflows/ml-systems/hyperparameter-tuning-workflow.md +352 -0
  66. package/plugin/workflows/ml-systems/mlops-pipeline-workflow.md +384 -0
  67. package/plugin/workflows/ml-systems/model-deployment-workflow.md +392 -0
  68. package/plugin/workflows/ml-systems/model-development-workflow.md +218 -0
  69. package/plugin/workflows/ml-systems/model-evaluation-workflow.md +416 -0
  70. package/plugin/workflows/ml-systems/model-optimization-workflow.md +390 -0
  71. package/plugin/workflows/ml-systems/monitoring-drift-workflow.md +446 -0
  72. package/plugin/workflows/ml-systems/retraining-workflow.md +401 -0
  73. package/plugin/workflows/ml-systems/training-pipeline-workflow.md +382 -0
@@ -0,0 +1,93 @@
1
+ ---
2
+ description: Deploy model to edge devices including TFLite Micro, Jetson, Raspberry Pi, and mobile platforms
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <target> [--model <path>] [--optimize]
5
+ ---
6
+
7
+ # Edge Deployment: $ARGUMENTS
8
+
9
+ Deploy to edge: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **deployment-agent** for edge deployment.
13
+
14
+ ## Parameters
15
+ - **target**: tflite_micro | jetson | raspberry_pi | mobile_ios | mobile_android
16
+ - **model**: Path to model
17
+ - **optimize**: Apply target-specific optimization (default: true)
18
+
19
+ ## Edge Targets
20
+
21
+ ### TFLite Micro
22
+ - Microcontroller deployment
23
+ - <1MB Flash, <256KB RAM
24
+ - INT8 quantization required
25
+ - C++ runtime
26
+
27
+ ### Jetson (Nano/Xavier/Orin)
28
+ - NVIDIA GPU accelerated
29
+ - TensorRT optimization
30
+ - FP16/INT8 support
31
+ - CUDA runtime
32
+
33
+ ### Raspberry Pi
34
+ - ARM CPU deployment
35
+ - ONNX Runtime
36
+ - INT8 quantization
37
+ - Python/C++ runtime
38
+
39
+ ### Mobile iOS
40
+ - CoreML format
41
+ - Neural Engine
42
+ - On-device inference
43
+ - Swift/Obj-C integration
44
+
45
+ ### Mobile Android
46
+ - TFLite format
47
+ - NNAPI delegate
48
+ - GPU delegate
49
+ - Kotlin/Java integration
50
+
51
+ ## Code Template
52
+ ```python
53
+ from omgkit.deployment import EdgeDeployer
54
+
55
+ deployer = EdgeDeployer()
56
+
57
+ # Deploy to TFLite Micro (TinyML)
58
+ tflite_model = deployer.deploy(
59
+ model_path="models/best_model.pt",
60
+ target="tflite_micro",
61
+ optimize=True,
62
+ constraints={
63
+ "max_flash_kb": 512,
64
+ "max_ram_kb": 128,
65
+ "target_latency_ms": 10
66
+ },
67
+ output_dir="edge/tflite_micro/"
68
+ )
69
+
70
+ # Deploy to Jetson
71
+ jetson_model = deployer.deploy(
72
+ model_path="models/best_model.pt",
73
+ target="jetson",
74
+ optimize=True,
75
+ tensorrt_precision="fp16",
76
+ output_dir="edge/jetson/"
77
+ )
78
+ ```
79
+
80
+ ## Optimization Applied
81
+ - Quantization (INT8/FP16)
82
+ - Pruning
83
+ - Operator fusion
84
+ - Memory optimization
85
+
86
+ ## Progress
87
+ - [ ] Target validated
88
+ - [ ] Model converted
89
+ - [ ] Optimization applied
90
+ - [ ] Size verified
91
+ - [ ] Package created
92
+
93
+ Deploy ML models to resource-constrained devices.
@@ -0,0 +1,91 @@
1
+ ---
2
+ description: Package model for deployment in TorchServe, TF Serving, ONNX, TFLite, or Docker formats
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <format> [--model <path>]
5
+ ---
6
+
7
+ # Model Packaging: $ARGUMENTS
8
+
9
+ Package model: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **deployment-agent** for model packaging.
13
+
14
+ ## Parameters
15
+ - **format**: torchserve | tensorflow_serving | onnx | tflite | docker
16
+ - **model**: Path to trained model
17
+
18
+ ## Package Formats
19
+
20
+ ### TorchServe
21
+ - .mar archive file
22
+ - Custom handler support
23
+ - Multi-model serving
24
+ - Batch inference
25
+
26
+ ### TensorFlow Serving
27
+ - SavedModel format
28
+ - gRPC/REST endpoints
29
+ - Model versioning
30
+ - Warm-up support
31
+
32
+ ### ONNX
33
+ - Cross-framework format
34
+ - Hardware agnostic
35
+ - Wide runtime support
36
+ - Optimization passes
37
+
38
+ ### TFLite
39
+ - Mobile/embedded ready
40
+ - Quantization built-in
41
+ - Delegates support
42
+ - Minimal footprint
43
+
44
+ ### Docker
45
+ - Containerized deployment
46
+ - All dependencies included
47
+ - Kubernetes ready
48
+ - Easy scaling
49
+
50
+ ## Code Template
51
+ ```python
52
+ from omgkit.deployment import ModelPackager
53
+
54
+ packager = ModelPackager()
55
+
56
+ # Package for TorchServe
57
+ package = packager.package(
58
+ model_path="models/best_model.pt",
59
+ format="torchserve",
60
+ model_name="churn_predictor",
61
+ version="1.0",
62
+ handler="src/serving/handler.py",
63
+ requirements=["torch", "numpy", "pandas"],
64
+ output_dir="artifacts/"
65
+ )
66
+
67
+ # Package as Docker
68
+ docker_image = packager.package(
69
+ model_path="models/best_model.pt",
70
+ format="docker",
71
+ base_image="pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime",
72
+ expose_port=8080,
73
+ output_dir="docker/"
74
+ )
75
+ ```
76
+
77
+ ## Output Artifacts
78
+ - Packaged model file
79
+ - Dockerfile (if docker)
80
+ - Requirements file
81
+ - Handler script
82
+ - Config files
83
+
84
+ ## Progress
85
+ - [ ] Model loaded
86
+ - [ ] Format validated
87
+ - [ ] Dependencies resolved
88
+ - [ ] Package created
89
+ - [ ] Artifacts saved
90
+
91
+ Create deployment-ready model packages.
@@ -0,0 +1,92 @@
1
+ ---
2
+ description: Deploy model serving endpoint on local, Kubernetes, AWS SageMaker, GCP Vertex, or Azure ML
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <platform> [--config <config>]
5
+ ---
6
+
7
+ # Model Serving: $ARGUMENTS
8
+
9
+ Deploy serving: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **deployment-agent** for model serving deployment.
13
+
14
+ ## Parameters
15
+ - **platform**: local | kubernetes | aws_sagemaker | gcp_vertex | azure_ml
16
+ - **config**: Path to deployment configuration
17
+
18
+ ## Deployment Platforms
19
+
20
+ ### Local
21
+ - FastAPI server
22
+ - Development/testing
23
+ - Quick iteration
24
+ - localhost access
25
+
26
+ ### Kubernetes
27
+ - Seldon Core / KServe
28
+ - Auto-scaling
29
+ - Canary deployments
30
+ - Production grade
31
+
32
+ ### AWS SageMaker
33
+ - Managed endpoints
34
+ - Auto-scaling
35
+ - Multi-model serving
36
+ - A/B testing
37
+
38
+ ### GCP Vertex AI
39
+ - Managed prediction
40
+ - Explanation support
41
+ - Batch prediction
42
+ - AutoML integration
43
+
44
+ ### Azure ML
45
+ - Managed endpoints
46
+ - Blue-green deployment
47
+ - Container support
48
+ - Enterprise features
49
+
50
+ ## Code Template
51
+ ```python
52
+ from omgkit.deployment import ModelServer
53
+
54
+ server = ModelServer()
55
+
56
+ # Deploy to Kubernetes
57
+ endpoint = server.deploy(
58
+ model_path="artifacts/churn_predictor.mar",
59
+ platform="kubernetes",
60
+ config={
61
+ "replicas": 3,
62
+ "resources": {
63
+ "requests": {"cpu": "500m", "memory": "1Gi"},
64
+ "limits": {"cpu": "2", "memory": "4Gi"}
65
+ },
66
+ "autoscaling": {
67
+ "min_replicas": 2,
68
+ "max_replicas": 10,
69
+ "target_cpu_utilization": 70
70
+ }
71
+ },
72
+ namespace="ml-serving"
73
+ )
74
+
75
+ print(f"Endpoint deployed: {endpoint.url}")
76
+ ```
77
+
78
+ ## Features
79
+ - Health checks
80
+ - Metrics collection
81
+ - Request logging
82
+ - Error handling
83
+ - Graceful shutdown
84
+
85
+ ## Progress
86
+ - [ ] Config validated
87
+ - [ ] Platform connected
88
+ - [ ] Model deployed
89
+ - [ ] Health verified
90
+ - [ ] Endpoint active
91
+
92
+ Deploy scalable model serving infrastructure.
@@ -0,0 +1,93 @@
1
+ ---
2
+ description: Create embeddings for text, images, and categorical features using pretrained or custom models
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <data_type> [--model <model>]
5
+ ---
6
+
7
+ # Feature Embeddings: $ARGUMENTS
8
+
9
+ Create embeddings: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **feature-engineer-agent** for embedding generation.
13
+
14
+ ## Parameters
15
+ - **data_type**: text | image | categorical
16
+ - **model**: Embedding model name (e.g., sentence-transformers, CLIP)
17
+
18
+ ## Embedding Types
19
+
20
+ ### Text Embeddings
21
+ - Sentence Transformers
22
+ - OpenAI embeddings
23
+ - Cohere embeddings
24
+ - Custom fine-tuned models
25
+ - Word2Vec/FastText
26
+
27
+ ### Image Embeddings
28
+ - CLIP
29
+ - ResNet features
30
+ - EfficientNet
31
+ - ViT (Vision Transformer)
32
+
33
+ ### Categorical Embeddings
34
+ - Entity embeddings
35
+ - Neural network learned
36
+ - Co-occurrence based
37
+
38
+ ## Code Template
39
+ ```python
40
+ from omgkit.features import EmbeddingGenerator
41
+
42
+ generator = EmbeddingGenerator()
43
+
44
+ # Text embeddings
45
+ text_embeddings = generator.embed(
46
+ data_path="data/processed/texts.parquet",
47
+ data_type="text",
48
+ model="sentence-transformers/all-MiniLM-L6-v2",
49
+ column="description",
50
+ batch_size=32,
51
+ output_path="data/features/text_embeddings.parquet"
52
+ )
53
+
54
+ # Image embeddings
55
+ image_embeddings = generator.embed(
56
+ data_path="data/processed/images/",
57
+ data_type="image",
58
+ model="openai/clip-vit-base-patch32",
59
+ batch_size=16,
60
+ output_path="data/features/image_embeddings.parquet"
61
+ )
62
+
63
+ # Categorical embeddings
64
+ cat_embeddings = generator.embed(
65
+ data_path="data/processed/categories.parquet",
66
+ data_type="categorical",
67
+ model="entity_embedding",
68
+ columns=["category", "brand", "seller"],
69
+ embedding_dim=32,
70
+ output_path="data/features/cat_embeddings.parquet"
71
+ )
72
+ ```
73
+
74
+ ## Model Options
75
+ - OpenAI: text-embedding-ada-002
76
+ - Sentence Transformers: all-MiniLM-L6-v2
77
+ - CLIP: openai/clip-vit-base-patch32
78
+ - Custom: fine-tuned models
79
+
80
+ ## Output
81
+ - Embedding vectors
82
+ - Metadata mapping
83
+ - Dimensionality info
84
+ - Quality metrics
85
+
86
+ ## Progress
87
+ - [ ] Model loaded
88
+ - [ ] Data processed
89
+ - [ ] Embeddings generated
90
+ - [ ] Quality validated
91
+ - [ ] Output saved
92
+
93
+ Generate dense vector representations for ML tasks.
@@ -0,0 +1,93 @@
1
+ ---
2
+ description: Extract features from numerical, categorical, text, image, and temporal data with configurable pipelines
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <feature_type> [--config <config>]
5
+ ---
6
+
7
+ # Feature Extraction: $ARGUMENTS
8
+
9
+ Extract features: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **feature-engineer-agent** for feature extraction.
13
+
14
+ ## Parameters
15
+ - **feature_type**: numerical | categorical | text | image | temporal | custom
16
+ - **config**: Path to feature extraction config
17
+
18
+ ## Feature Operations
19
+
20
+ ### Numerical
21
+ - Scaling (standard, minmax, robust)
22
+ - Log transform
23
+ - Polynomial features
24
+ - Binning/discretization
25
+ - Outlier handling
26
+
27
+ ### Categorical
28
+ - One-hot encoding
29
+ - Label encoding
30
+ - Target encoding
31
+ - Frequency encoding
32
+ - Hash encoding
33
+
34
+ ### Text
35
+ - TF-IDF vectors
36
+ - Word2Vec/FastText
37
+ - BERT embeddings
38
+ - Text statistics
39
+ - N-gram features
40
+
41
+ ### Image
42
+ - CNN feature extraction
43
+ - Histogram features
44
+ - Edge detection
45
+ - Color features
46
+
47
+ ### Temporal
48
+ - Datetime components
49
+ - Cyclical encoding (sin/cos)
50
+ - Lag features
51
+ - Rolling statistics
52
+ - Trend extraction
53
+
54
+ ## Code Template
55
+ ```python
56
+ from omgkit.features import FeatureExtractor
57
+
58
+ extractor = FeatureExtractor()
59
+
60
+ feature_pipeline = extractor.create_pipeline({
61
+ "numerical": {
62
+ "columns": ["age", "income", "tenure"],
63
+ "operations": ["scaling", "log_transform"]
64
+ },
65
+ "categorical": {
66
+ "columns": ["city", "occupation"],
67
+ "operations": ["target_encoding"]
68
+ },
69
+ "text": {
70
+ "columns": ["description"],
71
+ "operations": ["tfidf", "text_statistics"]
72
+ },
73
+ "temporal": {
74
+ "columns": ["signup_date", "last_login"],
75
+ "operations": ["datetime_components", "cyclical_encoding"]
76
+ }
77
+ })
78
+
79
+ features = extractor.extract(
80
+ data_path="data/processed/users.parquet",
81
+ pipeline=feature_pipeline,
82
+ output_path="data/features/user_features.parquet"
83
+ )
84
+ ```
85
+
86
+ ## Progress
87
+ - [ ] Config loaded
88
+ - [ ] Pipeline created
89
+ - [ ] Features extracted
90
+ - [ ] Quality validated
91
+ - [ ] Output saved
92
+
93
+ Create reproducible feature engineering pipelines.
@@ -0,0 +1,85 @@
1
+ ---
2
+ description: Select most important features using filter, wrapper, embedded, or hybrid methods
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <method> [--n_features <n>]
5
+ ---
6
+
7
+ # Feature Selection: $ARGUMENTS
8
+
9
+ Select features: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **feature-engineer-agent** for feature selection.
13
+
14
+ ## Parameters
15
+ - **method**: filter | wrapper | embedded | hybrid (default: hybrid)
16
+ - **n_features**: Number of features to select
17
+
18
+ ## Selection Methods
19
+
20
+ ### Filter Methods
21
+ - Mutual information
22
+ - Chi-square test
23
+ - ANOVA F-test
24
+ - Correlation analysis
25
+ - Variance threshold
26
+
27
+ ### Wrapper Methods
28
+ - Recursive feature elimination (RFE)
29
+ - Forward selection
30
+ - Backward elimination
31
+ - Exhaustive search
32
+
33
+ ### Embedded Methods
34
+ - LASSO importance (L1)
35
+ - Tree-based importance
36
+ - Permutation importance
37
+ - SHAP values
38
+
39
+ ### Hybrid
40
+ - Combines multiple methods
41
+ - Voting/consensus approach
42
+ - Best of both worlds
43
+
44
+ ## Code Template
45
+ ```python
46
+ from omgkit.features import FeatureSelector
47
+
48
+ selector = FeatureSelector()
49
+
50
+ selected_features = selector.select(
51
+ data_path="data/features/all_features.parquet",
52
+ target_column="label",
53
+ methods={
54
+ "filter": ["mutual_information", "correlation"],
55
+ "embedded": ["tree_importance"]
56
+ },
57
+ n_features=50,
58
+ output_path="data/features/selected_features.parquet"
59
+ )
60
+
61
+ # Generate importance report
62
+ selector.report(output="reports/feature_importance.html")
63
+ ```
64
+
65
+ ## Output
66
+ - Selected feature list
67
+ - Feature importance scores
68
+ - Selection rationale
69
+ - Visualization plots
70
+ - HTML report
71
+
72
+ ## Best Practices
73
+ - Start with filter methods (fast)
74
+ - Use embedded for final selection
75
+ - Validate on holdout set
76
+ - Consider domain knowledge
77
+
78
+ ## Progress
79
+ - [ ] Features loaded
80
+ - [ ] Methods applied
81
+ - [ ] Selection complete
82
+ - [ ] Report generated
83
+ - [ ] Output saved
84
+
85
+ Reduce dimensionality while preserving predictive power.
@@ -0,0 +1,97 @@
1
+ ---
2
+ description: Feature store operations - register, materialize, and serve features for training and inference
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <action> [--feature_view <view>]
5
+ ---
6
+
7
+ # Feature Store: $ARGUMENTS
8
+
9
+ Feature store operation: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **feature-engineer-agent** for feature store management.
13
+
14
+ ## Parameters
15
+ - **action**: register | materialize | serve | list
16
+ - **feature_view**: Feature view name
17
+
18
+ ## Actions
19
+
20
+ ### Register
21
+ - Define feature definitions
22
+ - Specify entities and joins
23
+ - Set TTL and freshness
24
+ - Configure data sources
25
+
26
+ ### Materialize
27
+ - Compute features for training
28
+ - Populate online store
29
+ - Batch feature computation
30
+ - Incremental updates
31
+
32
+ ### Serve
33
+ - Get features for inference
34
+ - Point-in-time correct retrieval
35
+ - Low-latency serving
36
+ - Batch retrieval
37
+
38
+ ### List
39
+ - Show all feature views
40
+ - Display feature definitions
41
+ - Check freshness status
42
+
43
+ ## Code Template
44
+ ```python
45
+ from omgkit.features import FeatureStore
46
+ from feast import Entity, FeatureView, Field
47
+ from feast.types import Float32, Int64
48
+
49
+ store = FeatureStore(repo_path="feature_repo/")
50
+
51
+ # Define entity
52
+ user = Entity(name="user", join_keys=["user_id"])
53
+
54
+ # Define feature view
55
+ user_features = FeatureView(
56
+ name="user_features",
57
+ entities=[user],
58
+ schema=[
59
+ Field(name="total_purchases", dtype=Float32),
60
+ Field(name="avg_order_value", dtype=Float32),
61
+ Field(name="days_since_last_order", dtype=Int64),
62
+ ],
63
+ source=user_source,
64
+ online=True,
65
+ ttl=timedelta(days=1)
66
+ )
67
+
68
+ # Register
69
+ store.register([user, user_features])
70
+
71
+ # Materialize
72
+ store.materialize(
73
+ start_date=datetime(2024, 1, 1),
74
+ end_date=datetime.now()
75
+ )
76
+
77
+ # Serve features
78
+ features = store.get_online_features(
79
+ features=["user_features:total_purchases", "user_features:avg_order_value"],
80
+ entity_rows=[{"user_id": 123}]
81
+ ).to_dict()
82
+ ```
83
+
84
+ ## Benefits
85
+ - Training-serving consistency
86
+ - Point-in-time correctness
87
+ - Feature reuse across models
88
+ - Feature discovery
89
+
90
+ ## Progress
91
+ - [ ] Action validated
92
+ - [ ] Feature store connected
93
+ - [ ] Operation executed
94
+ - [ ] Cache updated
95
+ - [ ] Status verified
96
+
97
+ Ensure consistent features across training and serving.
@@ -0,0 +1,60 @@
1
+ ---
2
+ description: Initialize ML project with standard structure, configs, experiment tracking, and best practices
3
+ allowed-tools: Task, Read, Write, Bash, Grep, Glob
4
+ argument-hint: <project_type> [--template <template>]
5
+ ---
6
+
7
+ # ML Project Init: $ARGUMENTS
8
+
9
+ Initialize ML project: **$ARGUMENTS**
10
+
11
+ ## Agent
12
+ Uses **ml-engineer** agent with data engineering skills.
13
+
14
+ ## Parameters
15
+ - **project_type**: classification | regression | detection | segmentation | nlp | timeseries | recommendation
16
+ - **template**: minimal | standard | production | research (default: standard)
17
+
18
+ ## Project Structure Created
19
+ ```
20
+ project/
21
+ ├── config/
22
+ │ ├── config.yaml # Main config
23
+ │ ├── model_config.yaml # Model hyperparameters
24
+ │ └── data_config.yaml # Data processing config
25
+ ├── data/
26
+ │ ├── raw/ # Raw data
27
+ │ ├── processed/ # Processed data
28
+ │ ├── features/ # Feature store
29
+ │ └── .dvc/ # DVC tracking
30
+ ├── src/
31
+ │ ├── data/ # Data processing
32
+ │ ├── features/ # Feature engineering
33
+ │ ├── models/ # Model definitions
34
+ │ ├── training/ # Training scripts
35
+ │ └── serving/ # Inference code
36
+ ├── notebooks/ # Jupyter notebooks
37
+ ├── tests/ # Unit tests
38
+ ├── pipelines/ # CI/CD pipelines
39
+ ├── mlruns/ # MLflow tracking
40
+ └── artifacts/ # Model artifacts
41
+ ```
42
+
43
+ ## Actions
44
+ 1. Create standard directory structure
45
+ 2. Generate configuration files
46
+ 3. Setup experiment tracking (MLflow/W&B)
47
+ 4. Initialize DVC for data versioning
48
+ 5. Create README with project guidelines
49
+ 6. Setup virtual environment
50
+ 7. Install base dependencies
51
+
52
+ ## Progress
53
+ - [ ] Directory structure created
54
+ - [ ] Config files generated
55
+ - [ ] Experiment tracking setup
56
+ - [ ] DVC initialized
57
+ - [ ] README created
58
+ - [ ] Environment configured
59
+
60
+ Initialize with best practices for production ML.