scikit-rec-agent 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Read(//Users/ssankararam/Shankar/Personal/RecSys/**)",
5
+ "Bash(git ls-remote *)",
6
+ "Bash(git add *)"
7
+ ],
8
+ "additionalDirectories": [
9
+ "/Users/ssankararam/Shankar/Personal/RecSys/scikit-rec"
10
+ ]
11
+ }
12
+ }
@@ -0,0 +1,40 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ name: Lint
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v5
15
+ - uses: actions/setup-python@v6
16
+ with:
17
+ python-version: "3.12"
18
+ - name: Install ruff
19
+ run: pip install ruff
20
+ - name: Check formatting
21
+ run: ruff format --check .
22
+ - name: Check lint
23
+ run: ruff check .
24
+
25
+ test:
26
+ name: Test (Python ${{ matrix.python-version }})
27
+ runs-on: ubuntu-latest
28
+ strategy:
29
+ fail-fast: false
30
+ matrix:
31
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
32
+ steps:
33
+ - uses: actions/checkout@v5
34
+ - uses: actions/setup-python@v6
35
+ with:
36
+ python-version: ${{ matrix.python-version }}
37
+ - name: Install package and test deps
38
+ run: pip install -e ".[dev]"
39
+ - name: Run tests
40
+ run: pytest
@@ -0,0 +1,83 @@
1
+ name: Publish
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ version:
7
+ description: "Version to release (e.g. 1.2.3)"
8
+ required: true
9
+
10
+ jobs:
11
+ tag:
12
+ name: Create tag
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ contents: write
16
+ steps:
17
+ - uses: actions/checkout@v5
18
+ with:
19
+ fetch-depth: 0
20
+ - name: Create and push tag
21
+ run: |
22
+ git config user.name "github-actions"
23
+ git config user.email "github-actions@github.com"
24
+ git tag v${{ github.event.inputs.version }}
25
+ git push origin v${{ github.event.inputs.version }}
26
+
27
+ build:
28
+ name: Build distribution
29
+ needs: tag
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - uses: actions/checkout@v5
33
+ with:
34
+ fetch-depth: 0
35
+ ref: v${{ github.event.inputs.version }}
36
+ - uses: actions/setup-python@v6
37
+ with:
38
+ python-version: "3.12"
39
+ - name: Install build
40
+ run: pip install build
41
+ - name: Build wheel and sdist
42
+ run: python -m build
43
+ - name: Upload dist artifacts
44
+ uses: actions/upload-artifact@v5
45
+ with:
46
+ name: dist
47
+ path: dist/
48
+
49
+ publish-pypi:
50
+ name: Publish to PyPI
51
+ needs: build
52
+ runs-on: ubuntu-latest
53
+ environment: pypi
54
+ permissions:
55
+ id-token: write
56
+ steps:
57
+ - name: Download dist artifacts
58
+ uses: actions/download-artifact@v5
59
+ with:
60
+ name: dist
61
+ path: dist/
62
+ - name: Publish to PyPI
63
+ uses: pypa/gh-action-pypi-publish@release/v1
64
+
65
+ github-release:
66
+ name: Create GitHub Release
67
+ needs: publish-pypi
68
+ runs-on: ubuntu-latest
69
+ permissions:
70
+ contents: write
71
+ steps:
72
+ - uses: actions/checkout@v5
73
+ - name: Download dist artifacts
74
+ uses: actions/download-artifact@v5
75
+ with:
76
+ name: dist
77
+ path: dist/
78
+ - name: Create GitHub Release
79
+ uses: softprops/action-gh-release@v2
80
+ with:
81
+ tag_name: v${{ github.event.inputs.version }}
82
+ files: dist/*
83
+ generate_release_notes: true
@@ -0,0 +1,29 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ scikit_rec_agent.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # Testing
17
+ .pytest_cache/
18
+ .coverage
19
+ htmlcov/
20
+ coverage.xml
21
+
22
+ # Mypy
23
+ .mypy_cache/
24
+
25
+ # Ruff
26
+ .ruff_cache/
27
+
28
+ # OS
29
+ .DS_Store
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to the Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by the Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding any notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright 2025 Intuit Inc.
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: scikit-rec-agent
3
+ Version: 0.0.1
4
+ Summary: Conversational AI agent that drives scikit-rec via tool use
5
+ License-Expression: Apache-2.0
6
+ Project-URL: Homepage, https://github.com/intuit/scikit-rec-agent
7
+ Project-URL: Repository, https://github.com/intuit/scikit-rec-agent
8
+ Project-URL: Issues, https://github.com/intuit/scikit-rec-agent/issues
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=7.0; extra == "dev"
14
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
15
+ Requires-Dist: ruff>=0.4; extra == "dev"
16
+ Requires-Dist: mypy>=1.0; extra == "dev"
17
+ Dynamic: license-file
18
+
19
+ # scikit-rec-agent
20
+
21
+ Conversational AI agent that uses [scikit-rec](https://github.com/intuit/scikit-rec) as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
22
+
23
+ > **Status:** placeholder release. Real implementation in progress — see [`agentic_design.md`](./agentic_design.md) for the spec.
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install scikit-rec-agent
29
+ ```
30
+
31
+ ## License
32
+
33
+ Apache-2.0
@@ -0,0 +1,15 @@
1
+ # scikit-rec-agent
2
+
3
+ Conversational AI agent that uses [scikit-rec](https://github.com/intuit/scikit-rec) as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
4
+
5
+ > **Status:** placeholder release. Real implementation in progress — see [`agentic_design.md`](./agentic_design.md) for the spec.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install scikit-rec-agent
11
+ ```
12
+
13
+ ## License
14
+
15
+ Apache-2.0
@@ -0,0 +1,794 @@
1
+ # scikit-rec-agent: Design Document
2
+
3
+ Conversational AI agent that uses scikit-rec as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
4
+
5
+ This document is the authoritative spec for the implementation. It reflects the decisions locked in during design review and the factory contract provided by scikit-rec PR landed on 2026-04-17 (commits `74a773c` + `137d278` + `5bdc7d0`).
6
+
7
+ ---
8
+
9
+ ## Design Decisions
10
+
11
+ | Decision | Choice | Rationale |
12
+ |---|---|---|
13
+ | Distribution | **Single pip package** (`scikit-rec-agent`) | Installable library with CLI entry point. Examples live in `examples/`, no separate cookbook repo. |
14
+ | LLM provider | **Bring-your-own** via `BaseLLM` protocol | Users pass any LLM that implements `chat()` + `chat_stream()`. Ship Anthropic + OpenAI adapters at launch. |
15
+ | System prompt | **Swappable at `Agent()` construction** | Default prompt exported; users pass `system_prompt=...` to override or extend. |
16
+ | Tool registry | **Pluggable at `Agent()` construction** | 10 default tools ship with the library; users extend or replace via `tools=...`. |
17
+ | Interface | **CLI** for v1 | `scikit-rec-agent chat`. Jupyter/web layered on top of `Agent` later. |
18
+ | Model registry | **Local filesystem** | `~/.scikit-rec/registry/` — JSON metadata + pickle. |
19
+ | Tool scope | **10 tools, everything in v1** | No v1.1 tier. If it's worth shipping, it ships now. |
20
+ | Recommender scope | **Full scikit-rec capability matrix** | All 6 recommenders × 6 scorers × 3 estimator planes. Driven end-to-end via `create_recommender_pipeline`. |
21
+ | `suggest_pipelines` | **In-prompt reasoning, not a tool** | The LLM emits candidate `RecommenderConfig` dicts as text; `train_model` validates via the factory. |
22
+ | Config validation | **Delegated to scikit-rec factory** | Agent does not re-implement enum checks. Bad configs fail at `train_model` with the factory's error message surfaced to the LLM. |
23
+ | Streaming | **Yes** | Stream LLM text deltas to terminal for responsive UX during long tool executions. |
24
+
25
+ ---
26
+
27
+ ## Architecture
28
+
29
+ ```
30
+ User (CLI)
31
+ |
32
+ v
33
+ Agent Loop (BaseLLM protocol + tool dispatch + streaming)
34
+ |
35
+ |--- BaseLLM protocol ----+---- AnthropicAdapter (Claude)
36
+ | +---- OpenAIAdapter (GPT-4)
37
+ | +---- UserCustomAdapter (anything)
38
+ |
39
+ v
40
+ Tools Layer (10 structured tool-use functions)
41
+ |
42
+ v
43
+ scikit-rec
44
+ |
45
+ |-- skrec.orchestrator.create_recommender_pipeline(config)
46
+ | Recommender -> Scorer -> Estimator
47
+ |-- skrec.orchestrator.HyperparameterOptimizer (used by run_hpo)
48
+ |-- skrec.dataset.{Interactions,Users,Items}Dataset
49
+ |
50
+ v
51
+ Model Registry (~/.scikit-rec/registry/)
52
+ ```
53
+
54
+ The agent is stateful across turns. `Session` holds loaded datasets, trained pipeline handles, and evaluation results. Tools mutate this session; model objects themselves never enter the LLM context — only `model_id` handles and metadata.
55
+
56
+ ---
57
+
58
+ ## Prerequisite: scikit-rec Factory Contract
59
+
60
+ The agent depends on a single entry point:
61
+
62
+ ```python
63
+ from skrec.orchestrator import create_recommender_pipeline, RecommenderConfig
64
+ ```
65
+
66
+ `create_recommender_pipeline(config: RecommenderConfig) -> BaseRecommender` builds the full Estimator → Scorer → Recommender chain from a dict. It covers the entire scikit-rec capability matrix (post PR `74a773c`):
67
+
68
+ ### Recommender types
69
+ `ranking`, `bandits`, `sequential`, `hierarchical_sequential`, `uplift`, `gcsl`
70
+
71
+ ### Scorer types
72
+ `universal`, `independent`, `multiclass`, `multioutput`, `sequential`, `hierarchical`
73
+
74
+ ### Estimator planes (`estimator_type` discriminator)
75
+ - `tabular` — XGBoost classifier/regressor, MultiOutputClassifier (LightGBM, sklearn wrappers available directly but not via factory enum today — acceptable for v1)
76
+ - `embedding` (`model_type` ∈ {`matrix_factorization`, `ncf`, `two_tower`, `deep_cross_network`, `neural_factorization`})
77
+ - `sequential` (`model_type` ∈ {`sasrec_classifier`, `sasrec_regressor`, `hrnn_classifier`, `hrnn_regressor`})
78
+
79
+ ### Required fields
80
+ - `recommender_type` — **required**, raises `ValueError` if missing or `None`.
81
+ - `scorer_type` — **required**, raises `ValueError` if missing.
82
+ - `estimator_config` — required; `estimator_type` defaults to `"tabular"`.
83
+ - `recommender_params` — required only for recommenders that need them (e.g. `uplift` requires `control_item_id`). Keys irrelevant to the chosen recommender are silently ignored.
84
+
85
+ ### Cross-cutting validators the factory already enforces
86
+ The agent relies on these and does **not** re-implement them:
87
+
88
+ - `sequential` / `hierarchical_sequential` recommenders require `estimator_type="sequential"`
89
+ - `sequential` recommender requires `scorer_type="sequential"`
90
+ - `hierarchical_sequential` requires `scorer_type="hierarchical"`
91
+ - `sequential` / `hierarchical` scorers require `estimator_type="sequential"`
92
+ - `embedding` estimators are rejected by `multioutput` / `multiclass` / `independent` scorers
93
+ - `uplift` recommender requires `scorer_type ∈ {"independent", "universal"}`
94
+
95
+ When a bad config reaches `train_model`, the factory raises `ValueError` / `TypeError` / `NotImplementedError`. The tool captures the message verbatim and returns it as a tool error — the LLM reads the error and corrects the config without the agent needing a parallel validator.
96
+
97
+ ### Canonical config shapes (copy these into the system prompt)
98
+
99
+ ```python
100
+ # 1. Tabular ranking
101
+ {
102
+ "recommender_type": "ranking",
103
+ "scorer_type": "universal",
104
+ "estimator_config": {
105
+ "ml_task": "classification",
106
+ "xgboost": {"n_estimators": 100, "max_depth": 5, "learning_rate": 0.1},
107
+ },
108
+ }
109
+
110
+ # 2. Embedding ranking (Two-Tower / NCF / MF / DCN / NFM)
111
+ {
112
+ "recommender_type": "ranking",
113
+ "scorer_type": "universal",
114
+ "estimator_config": {
115
+ "estimator_type": "embedding",
116
+ "embedding": {"model_type": "two_tower", "params": {"embedding_dim": 32}},
117
+ },
118
+ }
119
+
120
+ # 3. Sequential (SASRec / HRNN)
121
+ {
122
+ "recommender_type": "sequential",
123
+ "scorer_type": "sequential",
124
+ "estimator_config": {
125
+ "estimator_type": "sequential",
126
+ "sequential": {"model_type": "sasrec_classifier", "params": {"hidden_units": 64, "max_len": 50}},
127
+ },
128
+ "recommender_params": {"max_len": 50},
129
+ }
130
+
131
+ # 4. Uplift (T-Learner / S-Learner / X-Learner)
132
+ {
133
+ "recommender_type": "uplift",
134
+ "scorer_type": "independent",
135
+ "estimator_config": {"ml_task": "classification", "xgboost": {"n_estimators": 100}},
136
+ "recommender_params": {"control_item_id": "control", "mode": "t_learner"},
137
+ }
138
+
139
+ # 5. GCSL (multi-objective)
140
+ {
141
+ "recommender_type": "gcsl",
142
+ "scorer_type": "universal",
143
+ "estimator_config": {"ml_task": "classification", "xgboost": {"n_estimators": 100}},
144
+ "recommender_params": {
145
+ "inference_method": {
146
+ "type": "predefined_value",
147
+ "params": {"goal_values": {"OUTCOME_revenue": 1.0}},
148
+ },
149
+ },
150
+ }
151
+
152
+ # 6. Contextual bandits
153
+ {
154
+ "recommender_type": "bandits",
155
+ "scorer_type": "universal",
156
+ "estimator_config": {"ml_task": "classification", "xgboost": {"n_estimators": 100}},
157
+ }
158
+ ```
159
+
160
+ ### XGBoost hyperparameter hints
161
+ `XGBConfig` is a `TypedDict(total=False)` with these typed keys (full passthrough — any other XGBoost param is accepted too):
162
+ `n_estimators`, `max_depth`, `learning_rate`, `subsample`, `colsample_bytree`, `colsample_bynode`, `objective`, `eval_metric`, `n_jobs`, `random_state`.
163
+
164
+ ---
165
+
166
+ ## LLM Provider Abstraction
167
+
168
+ Users bring their own LLM. The agent depends on a `BaseLLM` protocol, not a specific SDK.
169
+
170
+ ### Protocol
171
+
172
+ ```python
173
+ from typing import Protocol, Iterator, Any
174
+
175
+ class BaseLLM(Protocol):
176
+ def chat(
177
+ self,
178
+ messages: list[dict[str, Any]],
179
+ tools: list[dict[str, Any]],
180
+ system: str,
181
+ ) -> LLMResponse:
182
+ """Non-streaming chat completion with tool definitions.
183
+
184
+ Args:
185
+ messages: Conversation history in OpenAI-style format.
186
+ tools: List of tool schemas (JSON Schema format, provider-agnostic).
187
+ system: System prompt string.
188
+
189
+ Returns:
190
+ LLMResponse with text content and/or tool calls.
191
+ """
192
+ ...
193
+
194
+ def chat_stream(
195
+ self,
196
+ messages: list[dict[str, Any]],
197
+ tools: list[dict[str, Any]],
198
+ system: str,
199
+ ) -> Iterator[LLMStreamEvent]:
200
+ """Streaming variant. Yields text deltas and tool calls.
201
+
202
+ Tool calls are atomic events — they're only emitted once fully received.
203
+ Only text deltas stream token-by-token.
204
+ """
205
+ ...
206
+ ```
207
+
208
+ ### Response types
209
+
210
+ ```python
211
+ @dataclass
212
+ class ToolCall:
213
+ id: str # Unique ID for this tool call
214
+ name: str # Tool name (e.g. "profile_data")
215
+ arguments: dict # Parsed JSON arguments
216
+
217
+ @dataclass
218
+ class LLMResponse:
219
+ content: str | None # Text response (may be None if only tool calls)
220
+ tool_calls: list[ToolCall] # Zero or more tool calls
221
+ stop_reason: str # "end_turn", "tool_use", "max_tokens", etc.
222
+
223
+ @dataclass
224
+ class LLMStreamEvent:
225
+ type: str # "text_delta" | "tool_call" | "done"
226
+ text: str | None = None
227
+ tool_call: ToolCall | None = None
228
+ stop_reason: str | None = None
229
+ ```
230
+
231
+ ### Built-in adapters
232
+
233
+ Both ship in v1:
234
+
235
+ ```python
236
+ class AnthropicAdapter(BaseLLM):
237
+ """Wraps anthropic.Anthropic client to BaseLLM protocol."""
238
+ def __init__(self, client: "anthropic.Anthropic", model: str = "claude-sonnet-4-6"):
239
+ ...
240
+
241
+ class OpenAIAdapter(BaseLLM):
242
+ """Wraps openai.OpenAI client to BaseLLM protocol."""
243
+ def __init__(self, client: "openai.OpenAI", model: str = "gpt-4o"):
244
+ ...
245
+ ```
246
+
247
+ ### Usage
248
+
249
+ ```python
250
+ from scikit_rec_agent import Agent
251
+ from scikit_rec_agent.llm import AnthropicAdapter
252
+ import anthropic
253
+
254
+ llm = AnthropicAdapter(anthropic.Anthropic(), model="claude-sonnet-4-6")
255
+ agent = Agent(llm=llm)
256
+ agent.chat() # interactive CLI session
257
+ ```
258
+
259
+ ```python
260
+ # Or bring your own
261
+ from scikit_rec_agent import Agent, BaseLLM
262
+
263
+ class MyLLM(BaseLLM):
264
+ def chat(self, messages, tools, system): ...
265
+ def chat_stream(self, messages, tools, system): ...
266
+
267
+ agent = Agent(llm=MyLLM())
268
+ ```
269
+
270
+ ---
271
+
272
+ ## Session State
273
+
274
+ The agent is stateful across turns. The `Session` dataclass holds:
275
+
276
+ ```python
277
+ @dataclass
278
+ class Session:
279
+ loaded_datasets: dict[str, dict] # path -> {profile, dataset_objects}
280
+ trained_models: dict[str, ModelHandle] # model_id -> handle
281
+ messages: list[dict] # conversation history
282
+
283
+ @dataclass
284
+ class ModelHandle:
285
+ model_id: str # e.g. "twotower_1712345678"
286
+ name: str # human-readable name
287
+ config: RecommenderConfig # the config passed to train_model
288
+ recommender: BaseRecommender # the actual trained pipeline
289
+ training_time_seconds: float
290
+ datasets_used: dict # paths/schema info
291
+ metrics: dict[str, float] # metric_name@k -> value (accumulates across evaluate_model calls)
292
+ tags: list[str]
293
+ created_at: str # ISO timestamp
294
+ ```
295
+
296
+ ### What enters the LLM context
297
+ Only `model_id`, `name`, config, metrics, training time, and status messages. The `recommender` object stays in Python memory and is referenced by `model_id` from tool calls.
298
+
299
+ ### `model_id` generation
300
+ `{model_type}_{unix_timestamp}` — e.g. `twotower_1712345678`. Deterministic enough to be collision-free in a session, readable enough for the LLM to reference unambiguously.
301
+
302
+ ---
303
+
304
+ ## Extension Points
305
+
306
+ ### 1. System prompt
307
+
308
+ ```python
309
+ from scikit_rec_agent import Agent
310
+ from scikit_rec_agent.prompts import DEFAULT_SYSTEM_PROMPT
311
+
312
+ custom_prompt = DEFAULT_SYSTEM_PROMPT + "\n\nOur team uses NDCG@10 as the primary metric."
313
+ agent = Agent(llm=llm, system_prompt=custom_prompt)
314
+ ```
315
+
316
+ ### 2. Tool registry
317
+
318
+ ```python
319
+ from scikit_rec_agent import Agent
320
+ from scikit_rec_agent.tools import DEFAULT_TOOLS, Tool
321
+
322
+ def fetch_from_snowflake(query: str, session: Session) -> dict:
323
+ ...
324
+
325
+ custom_tool = Tool(
326
+ name="fetch_from_snowflake",
327
+ schema={...}, # JSON schema
328
+ fn=fetch_from_snowflake,
329
+ )
330
+
331
+ agent = Agent(llm=llm, tools=[*DEFAULT_TOOLS, custom_tool])
332
+ ```
333
+
334
+ Tool functions receive the `Session` as a keyword arg so user-defined tools can read and mutate the same state.
335
+
336
+ ### 3. CLI / frontend
337
+
338
+ `scikit-rec-agent chat` is thin glue: constructs an `Agent`, reads stdin, prints streamed output. Users who want Jupyter, Slack, or a web UI instantiate `Agent` directly and drive it with their own I/O loop. `Agent.chat_turn(user_message)` returns an event iterator — the CLI has no privileged access.
339
+
340
+ ---
341
+
342
+ ## Agent Tools (v1 — 10 tools)
343
+
344
+ | Tool | Purpose | Wraps |
345
+ |---|---|---|
346
+ | `profile_data` | Load CSV/parquet; report shape, dtypes, cardinality, sparsity, temporal range, target type | pandas + heuristics |
347
+ | `validate_data` | Schema-compliance check against scikit-rec required schemas. Returns violations + auto-fix suggestions. | Compare against `InteractionsDataset.REQUIRED_SCHEMA_PATH_TRAINING` etc. |
348
+ | `create_datasets` | Build `InteractionsDataset` / `UsersDataset` / `ItemsDataset` handles. Auto-generate YAML schema to tmp dir if not provided. Supports `column_mapping` to rename user columns → scikit-rec names. | `DatasetSchema.create` + dataset constructors |
349
+ | `train_model` | Train a recommender pipeline from a `RecommenderConfig`. Creates datasets internally if called with paths; uses pre-built datasets if called with dataset handles from `create_datasets`. | `create_recommender_pipeline` + `.train()` |
350
+ | `evaluate_model` | Run evaluation: evaluator type + metrics + multiple k values. Supports all 7 evaluator types. | `BaseRecommender.evaluate()` |
351
+ | `compare_models` | Tabulate metrics across trained models. Markdown table sorted by primary metric. | Session state lookup |
352
+ | `run_hpo` | Optuna-based hyperparameter optimization. Returns best config + trial results. | `HyperparameterOptimizer.run_optimization()` |
353
+ | `save_model` | Persist model + config + metrics to local registry | pickle + JSON metadata |
354
+ | `list_models` | List models in the local registry (not just session) with metadata. | Filesystem scan of `~/.scikit-rec/registry/` |
355
+ | `load_model` | Load a registered model into the current session. | pickle + session state mutation |
356
+
357
+ **`suggest_pipelines` is deliberately NOT a tool.** It's the agent's job: after `profile_data` + `validate_data`, the LLM emits 2–5 candidate `RecommenderConfig` dicts as text in its reply with rationale for each. The user picks one (or more), then the LLM calls `train_model`. The factory validates the config on entry — there's no need for a Python-side validator.
358
+
359
+ ### Tool error contract
360
+
361
+ Every tool's return value is a JSON-serializable dict with a consistent envelope:
362
+
363
+ ```python
364
+ # Success
365
+ {"status": "ok", "data": {...}}
366
+
367
+ # Error (factory raised, file missing, evaluation failed, etc.)
368
+ {"status": "error", "error_type": "ValueError", "message": "...", "hint": "optional suggestion"}
369
+ ```
370
+
371
+ Both shapes are passed back as the tool result. The LLM reads the `message` field and self-corrects on error. `hint` is used for high-confidence fixes we can synthesize locally (e.g. `"Your column 'user' was detected as the user ID — pass column_mapping={'user': 'USER_ID'}"`).
372
+
373
+ ---
374
+
375
+ ## Tool Schemas (v1)
376
+
377
+ ### profile_data
378
+
379
+ ```json
380
+ {
381
+ "name": "profile_data",
382
+ "description": "Load and profile a data file. Reports shape, dtypes, cardinality of ID columns, sparsity, value distributions, temporal range, and whether the target looks implicit (binary) or explicit (ratings).",
383
+ "input_schema": {
384
+ "type": "object",
385
+ "properties": {
386
+ "file_path": {"type": "string", "description": "Path to CSV or parquet file"},
387
+ "file_type": {"type": "string", "enum": ["interactions", "users", "items"]}
388
+ },
389
+ "required": ["file_path", "file_type"]
390
+ }
391
+ }
392
+ ```
393
+ **Returns:** `shape`, `columns` (name, dtype, null_count, n_unique, sample_values), `id_columns_detected`, `target_column_detected`, `target_type` (binary/rating/continuous), `temporal_range` (if timestamp found), `duplicate_pairs_count`, `sparsity`.
394
+
395
+ ### validate_data
396
+
397
+ ```json
398
+ {
399
+ "name": "validate_data",
400
+ "description": "Validate a data file against scikit-rec required schemas. Reports missing required columns, wrong dtypes, and suggests column renames if near-matches are detected.",
401
+ "input_schema": {
402
+ "type": "object",
403
+ "properties": {
404
+ "file_path": {"type": "string"},
405
+ "file_type": {"type": "string", "enum": ["interactions", "users", "items"]},
406
+ "is_training": {"type": "boolean", "default": true}
407
+ },
408
+ "required": ["file_path", "file_type"]
409
+ }
410
+ }
411
+ ```
412
+ **Returns:** `valid` (bool), `missing_columns`, `wrong_dtypes`, `suggested_column_mapping` (fuzzy-matched renames), `extra_columns` (passed through as features).
413
+
414
+ ### create_datasets
415
+
416
+ ```json
417
+ {
418
+ "name": "create_datasets",
419
+ "description": "Build scikit-rec Dataset handles. Auto-generates YAML schemas from the data types if client_schema_path is not provided. Applies column_mapping to rename columns to USER_ID/ITEM_ID/OUTCOME as needed. Registers the handles in the session under a dataset_bundle_id.",
420
+ "input_schema": {
421
+ "type": "object",
422
+ "properties": {
423
+ "bundle_name": {"type": "string"},
424
+ "interactions_path": {"type": "string"},
425
+ "users_path": {"type": "string"},
426
+ "items_path": {"type": "string"},
427
+ "column_mapping": {
428
+ "type": "object",
429
+ "description": "Map user's column names to scikit-rec names, e.g. {\"userid\": \"USER_ID\", \"clicked\": \"OUTCOME\"}"
430
+ },
431
+ "schemas": {
432
+ "type": "object",
433
+ "description": "Optional pre-written YAML schema paths keyed by file_type"
434
+ }
435
+ },
436
+ "required": ["bundle_name", "interactions_path"]
437
+ }
438
+ }
439
+ ```
440
+ **Returns:** `bundle_id`, paths to generated schema files (so user can inspect / version them), summary of the three datasets.
441
+
442
+ ### train_model
443
+
444
+ ```json
445
+ {
446
+ "name": "train_model",
447
+ "description": "Train a recommender pipeline from a RecommenderConfig. Supply either a dataset bundle_id from create_datasets, OR raw file paths with optional column_mapping (train_model will call create_datasets internally). Config is validated by scikit-rec's factory — bad configs raise with a specific error that you can use to correct and retry.",
448
+ "input_schema": {
449
+ "type": "object",
450
+ "properties": {
451
+ "model_name": {"type": "string"},
452
+ "config": {
453
+ "type": "object",
454
+ "description": "RecommenderConfig dict: recommender_type, scorer_type, estimator_config, optional recommender_params. See system prompt for canonical shapes."
455
+ },
456
+ "bundle_id": {"type": "string", "description": "From create_datasets. If provided, paths/column_mapping are ignored."},
457
+ "interactions_path": {"type": "string"},
458
+ "users_path": {"type": "string"},
459
+ "items_path": {"type": "string"},
460
+ "column_mapping": {"type": "object"},
461
+ "validation_split": {"type": "number", "description": "0-1 fraction held out. Default 0.2. Ignored if explicit validation datasets are in the bundle."}
462
+ },
463
+ "required": ["model_name", "config"]
464
+ }
465
+ }
466
+ ```
467
+ **Returns:** `model_id`, `model_name`, `status`, `training_time_seconds`, `estimator_type`, `scorer_type`, `recommender_type`.
468
+
469
+ ### evaluate_model
470
+
471
+ ```json
472
+ {
473
+ "name": "evaluate_model",
474
+ "description": "Evaluate a trained model using offline evaluation. Supports all 7 evaluator types and all 9 metrics at multiple k values. Results cached on the recommender's evaluation_session — subsequent calls with the same eval_kwargs are free.",
475
+ "input_schema": {
476
+ "type": "object",
477
+ "properties": {
478
+ "model_id": {"type": "string"},
479
+ "evaluator_type": {
480
+ "type": "string",
481
+ "enum": ["simple", "replay_match", "IPS", "DR", "direct_method", "SNIPS", "policy_weighted"]
482
+ },
483
+ "metrics": {
484
+ "type": "array",
485
+ "items": {
486
+ "type": "string",
487
+ "enum": ["NDCG_at_k", "MAP_at_k", "MRR_at_k", "precision_at_k", "recall_at_k", "average_reward_at_k", "roc_auc", "pr_auc", "expected_reward"]
488
+ }
489
+ },
490
+ "k_values": {"type": "array", "items": {"type": "integer"}},
491
+ "eval_kwargs": {"type": "object", "description": "logged_items, logged_rewards, logging_proba, expected_rewards — as required by the evaluator type"}
492
+ },
493
+ "required": ["model_id", "evaluator_type", "metrics", "k_values"]
494
+ }
495
+ }
496
+ ```
497
+ **Returns:** `model_id`, `results` (list of `{metric, k, value}`).
498
+
499
+ ### compare_models
500
+
501
+ ```json
502
+ {
503
+ "name": "compare_models",
504
+ "description": "Compare trained models in the current session. Returns a markdown leaderboard sorted by primary metric.",
505
+ "input_schema": {
506
+ "type": "object",
507
+ "properties": {
508
+ "model_ids": {"type": "array", "items": {"type": "string"}, "description": "If empty, compares all trained models in the session."},
509
+ "primary_metric": {"type": "string"},
510
+ "k": {"type": "integer"}
511
+ },
512
+ "required": ["primary_metric", "k"]
513
+ }
514
+ }
515
+ ```
516
+ **Returns:** markdown table (models × metrics) + JSON version.
517
+
518
+ ### run_hpo
519
+
520
+ ```json
521
+ {
522
+ "name": "run_hpo",
523
+ "description": "Run Optuna hyperparameter optimization on a base RecommenderConfig. Supports TPE, GP, CMA-ES, random, grid, QMC samplers. Results persisted to a parquet file keyed by study_name.",
524
+ "input_schema": {
525
+ "type": "object",
526
+ "properties": {
527
+ "study_name": {"type": "string"},
528
+ "base_config": {"type": "object", "description": "RecommenderConfig with fixed values"},
529
+ "search_space": {
530
+ "type": "object",
531
+ "description": "Dot-notation param paths → dimension specs. Each spec is {type: int|float|categorical, low, high, step?, log?, choices?}. Example: {'estimator_config.xgboost.n_estimators': {type: 'int', low: 50, high: 500, step: 50}}"
532
+ },
533
+ "metric_definitions": {
534
+ "type": "array",
535
+ "items": {"type": "string"},
536
+ "description": "Metric names like 'NDCG@10' or 'MAP@5'."
537
+ },
538
+ "objective_metric": {"type": "string"},
539
+ "bundle_id": {"type": "string", "description": "Dataset bundle from create_datasets — must include validation datasets."},
540
+ "n_trials": {"type": "integer"},
541
+ "sampler": {"type": "string", "enum": ["tpe", "gp", "cmaes", "random", "grid", "qmc"], "default": "tpe"},
542
+ "direction": {"type": "string", "enum": ["maximize", "minimize"], "default": "maximize"}
543
+ },
544
+ "required": ["study_name", "base_config", "search_space", "metric_definitions", "objective_metric", "bundle_id", "n_trials"]
545
+ }
546
+ }
547
+ ```
548
+ **Returns:** `best_params`, `best_value`, `n_complete_trials`, `results_parquet_path`, and a `model_id` if the best config is automatically re-trained at the end (configurable via `retrain_best`, default `true`).
549
+
550
+ ### save_model
551
+
552
+ ```json
553
+ {
554
+ "name": "save_model",
555
+ "description": "Persist a trained model, its config, and evaluation metrics to the local registry (~/.scikit-rec/registry/<model_name>/).",
556
+ "input_schema": {
557
+ "type": "object",
558
+ "properties": {
559
+ "model_id": {"type": "string"},
560
+ "tags": {"type": "array", "items": {"type": "string"}}
561
+ },
562
+ "required": ["model_id"]
563
+ }
564
+ }
565
+ ```
566
+ **Returns:** `registry_path`, `model_name`, `saved_at`.
567
+
568
+ ### list_models
569
+
570
+ ```json
571
+ {
572
+ "name": "list_models",
573
+ "description": "List all models in the local registry (persistent — not just current session). Returns metadata and metrics so the user can choose one to load.",
574
+ "input_schema": {
575
+ "type": "object",
576
+ "properties": {
577
+ "tag_filter": {"type": "array", "items": {"type": "string"}},
578
+ "recommender_type_filter": {"type": "string"}
579
+ }
580
+ }
581
+ }
582
+ ```
583
+ **Returns:** list of `{model_name, model_id, recommender_type, tags, saved_at, metrics}`.
584
+
585
+ ### load_model
586
+
587
+ ```json
588
+ {
589
+ "name": "load_model",
590
+ "description": "Load a registered model into the current session. Subsequent evaluate_model / compare_models / save_model calls can reference it by model_id.",
591
+ "input_schema": {
592
+ "type": "object",
593
+ "properties": {
594
+ "model_name": {"type": "string"}
595
+ },
596
+ "required": ["model_name"]
597
+ }
598
+ }
599
+ ```
600
+ **Returns:** `model_id`, `name`, `config`, `metrics` from registry.
601
+
602
+ ---
603
+
604
+ ## Conversation Flow
605
+
606
+ ### Typical session
607
+
608
+ ```
609
+ Turn 1 — User describes data
610
+ User: "I have click-stream data with user features and an item catalog"
611
+ Agent: Asks for file paths, clarifies goal (ranking? bandit? sequential?)
612
+
613
+ Turn 2 — Profile + validate
614
+ Agent calls: profile_data (x3) + validate_data (x3)
615
+ Agent: "1.2M interactions across 50K users / 10K items. Target is binary (clicks).
616
+ Timestamps span 6 months. User features: age, gender, segment.
617
+ Item features: category, price. Sparsity 99.76%. 342 duplicate pairs.
618
+ Your columns 'userid' / 'itemid' / 'clicked' need to be mapped to
619
+ USER_ID / ITEM_ID / OUTCOME — I'll pass column_mapping automatically."
620
+
621
+ Turn 3 — Agent suggests pipelines IN-PROMPT
622
+ Agent (text, no tool call): "Given binary implicit feedback with timestamps
623
+ and side features, three candidates:
624
+
625
+ 1. XGBoost + UniversalScorer + Ranking — fast baseline, handles features well.
626
+ {recommender_type: ranking, scorer_type: universal,
627
+ estimator_config: {ml_task: classification,
628
+ xgboost: {n_estimators: 200, max_depth: 6}}}
629
+
630
+ 2. Two-Tower + UniversalScorer + Ranking — learns user/item embeddings,
631
+ robust to cold-start.
632
+ {recommender_type: ranking, scorer_type: universal,
633
+ estimator_config: {estimator_type: embedding,
634
+ embedding: {model_type: two_tower,
635
+ params: {embedding_dim: 64}}}}
636
+
637
+ 3. SASRec + SequentialScorer + Sequential — exploits timestamps.
638
+ {recommender_type: sequential, scorer_type: sequential,
639
+ estimator_config: {estimator_type: sequential,
640
+ sequential: {model_type: sasrec_classifier,
641
+ params: {hidden_units: 64, max_len: 50}}},
642
+ recommender_params: {max_len: 50}}
643
+
644
+ Train all three?"
645
+
646
+ Turn 4 — User confirms
647
+ User: "Yes, train all three."
648
+ Agent calls: create_datasets (auto-schema + column_mapping)
649
+ Agent calls: train_model x3
650
+ Agent: "XGBoost: 45s. Two-Tower: 3m 12s. SASRec: 8m 41s."
651
+
652
+ Turn 5 — Evaluate
653
+ Agent calls: evaluate_model x3 (Simple, NDCG/Precision/MAP @ 10)
654
+ Agent calls: compare_models
655
+ Agent: "Two-Tower wins. NDCG@10: XGB 0.312, TT 0.347, SASRec 0.338."
656
+
657
+ Turn 6 — HPO
658
+ User: "Optimize Two-Tower."
659
+ Agent calls: run_hpo (TPE, 50 trials, embedding_dim / lr search space)
660
+ Agent: "Best NDCG@10: 0.381 (embedding_dim=128, lr=0.003).
661
+ Re-trained the best config as twotower_hpo_1712400000."
662
+ Agent calls: save_model
663
+ ```
664
+
665
+ ### Edge cases the agent handles (via the in-prompt heuristics)
666
+
667
+ - **Missing columns**: `validate_data` detects near-matches, returns `suggested_column_mapping`; agent passes it to `create_datasets`.
668
+ - **Rating scale (1–5) vs binary**: `profile_data` reports `target_type`; agent picks `regression` vs `classification` accordingly.
669
+ - **Too sparse for embeddings**: agent warns when < 100K interactions and recommends XGBoost over Two-Tower/NCF.
670
+ - **No timestamps**: agent skips sequential candidates.
671
+ - **Causal evaluation**: agent asks for `logging_proba` / `expected_rewards` and sets `evaluator_type` to IPS/DR/DM.
672
+ - **Multi-outcome rewards (revenue + clicks)**: agent suggests GCSL with `predefined_value` or `mean_scalarization` inference methods.
673
+
674
+ ---
675
+
676
+ ## System Prompt
677
+
678
+ The default system prompt (lives in `src/scikit_rec_agent/prompts/system.py`) encodes:
679
+
680
+ 1. **Role and tone** — domain expert, concise, never trains what the data can't support.
681
+ 2. **scikit-rec architecture recap** — the 3-layer model, when to use each recommender type.
682
+ 3. **Capability matrix** — authoritative enums for `recommender_type`, `scorer_type`, `estimator_type`, `model_type` (embedding), `model_type` (sequential), `inference_method.type`, `retriever.type`, `sampler`. **These enums should be imported from `skrec.orchestrator.factory` at prompt build time** so the prompt can't drift from the factory — e.g. read `_EMBEDDING_ESTIMATOR_MAP.keys()` directly.
683
+ 4. **Canonical config shapes** — the 6 shapes from the Prereq section above, copied verbatim.
684
+ 5. **Decision heuristics**:
685
+ - Data size thresholds (when embeddings outperform XGBoost)
686
+ - Feature availability (dense features → DeepFM, sparse → MF)
687
+ - Sparsity bounds (embedding models need ≥ ~100K interactions)
688
+ - Target type → `ml_task` mapping
689
+ - Timestamps present → sequential is an option
690
+ 6. **Evaluator selection**:
691
+ - Held-out split + randomized logging → `simple`
692
+ - Logged from production with known propensities → `IPS` / `SNIPS` / `DR`
693
+ - Reward model available → `direct_method`
694
+ - Exploration deployment → `replay_match` / `policy_weighted`
695
+ 7. **Metric selection by use case** — implicit feedback → NDCG/MAP/Precision; revenue → expected_reward; CTR → roc_auc / pr_auc.
696
+ 8. **Guardrails**:
697
+ - Always call `validate_data` before `train_model`.
698
+ - Don't suggest embedding models on < 100K interactions.
699
+ - Warn about overfitting with small validation sets.
700
+ - Flag premature HPO (run baselines first).
701
+ 9. **Tool-calling discipline**:
702
+ - `suggest_pipelines` is IN-PROMPT — emit configs in text, don't invent a tool call for it.
703
+ - Always set both `recommender_type` AND `scorer_type` explicitly.
704
+ - On factory errors, read the error message and self-correct — don't re-raise to the user.
705
+
706
+ ---
707
+
708
+ ## Repo Structure
709
+
710
+ ```
711
+ scikit-rec-agent/
712
+ ├── pyproject.toml
713
+ ├── README.md
714
+ ├── src/
715
+ │ └── scikit_rec_agent/
716
+ │ ├── __init__.py # Exports: Agent, BaseLLM, Tool, Session, DEFAULT_TOOLS, DEFAULT_SYSTEM_PROMPT
717
+ │ ├── agent.py # Agent loop: BaseLLM + tool dispatch + streaming
718
+ │ ├── session.py # Session + ModelHandle dataclasses
719
+ │ ├── llm/
720
+ │ │ ├── __init__.py # Exports BaseLLM, LLMResponse, LLMStreamEvent, ToolCall
721
+ │ │ ├── base.py # Protocol + dataclasses
722
+ │ │ ├── anthropic.py # AnthropicAdapter
723
+ │ │ └── openai.py # OpenAIAdapter
724
+ │ ├── tools/
725
+ │ │ ├── __init__.py # DEFAULT_TOOLS list; Tool dataclass
726
+ │ │ ├── profiling.py # profile_data, validate_data
727
+ │ │ ├── datasets.py # create_datasets (incl. auto-schema generation)
728
+ │ │ ├── training.py # train_model
729
+ │ │ ├── evaluation.py # evaluate_model, compare_models
730
+ │ │ ├── hpo.py # run_hpo
731
+ │ │ └── registry.py # save_model, list_models, load_model
732
+ │ ├── prompts/
733
+ │ │ ├── __init__.py
734
+ │ │ ├── system.py # DEFAULT_SYSTEM_PROMPT (built at import from factory enums)
735
+ │ │ └── _capability.py # Runtime-derived capability matrix → string
736
+ │ └── cli.py # Entry point: scikit-rec-agent chat
737
+ ├── tests/
738
+ │ ├── fixtures/ # Tiny CSVs + mocked LLM transcripts
739
+ │ ├── test_profiling.py
740
+ │ ├── test_datasets.py
741
+ │ ├── test_training.py
742
+ │ ├── test_evaluation.py
743
+ │ ├── test_hpo.py
744
+ │ ├── test_registry.py
745
+ │ ├── test_llm_adapters.py # Anthropic + OpenAI with mocked API
746
+ │ └── test_agent_integration.py # End-to-end with scripted LLM
747
+ └── examples/
748
+ ├── movielens_session.md # Example conversation transcript
749
+ ├── custom_tool.py # Adding a user-defined tool
750
+ ├── custom_prompt.py # Overriding the default system prompt
751
+ └── custom_frontend.py # Driving Agent from Jupyter / Slack / web
752
+ ```
753
+
754
+ ---
755
+
756
+ ## Dependencies
757
+
758
+ ```toml
759
+ [project]
760
+ name = "scikit-rec-agent"
761
+ requires-python = ">=3.10"
762
+ dependencies = [
763
+ "scikit-rec>=1.0.0",
764
+ ]
765
+
766
+ [project.optional-dependencies]
767
+ anthropic = ["anthropic>=0.40.0"]
768
+ openai = ["openai>=1.0.0"]
769
+ torch = ["scikit-rec[torch]"] # passthrough for sequential / embedding models
770
+ aws = ["scikit-rec[aws]"] # passthrough for S3 dataset loading
771
+ dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4", "mypy>=1.0"]
772
+ ```
773
+
774
+ Core has zero LLM SDK dependencies. Users install the adapter they need:
775
+
776
+ ```bash
777
+ pip install scikit-rec-agent[anthropic] # Claude
778
+ pip install scikit-rec-agent[openai] # GPT-4
779
+ pip install scikit-rec-agent[anthropic,torch] # Claude + deep-learning models
780
+ pip install scikit-rec-agent # bring your own LLM
781
+ ```
782
+
783
+ All ML dependencies come transitively through `scikit-rec`.
784
+
785
+ ---
786
+
787
+ ## Build Plan
788
+
789
+ 1. **Day 1 — Skeleton**: `pyproject.toml`, `llm/{base,anthropic,openai}.py`, `session.py`, `agent.py` loop, mocked-LLM smoke test (one scripted `train_model` call end-to-end).
790
+ 2. **Days 2–4 — Tools**: all 10 tools against `create_recommender_pipeline` and `HyperparameterOptimizer`. Use `skrec.examples.datasets.sample_*` for fixtures.
791
+ 3. **Days 5–6 — System prompt + CLI**: build the capability matrix from factory enums at import time (derive, don't hardcode); CLI entry with streaming; single end-to-end transcript example.
792
+ 4. **Day 7 — Tests + polish**: per-tool tests, adapter tests with mocked API, end-to-end scripted-LLM integration test, README.
793
+
794
+ Out of scope for v1: Jupyter widget, web UI, MLflow registry backend, non-XGBoost tabular estimators (LightGBM / logreg / sklearn wrappers) via factory — they work today if manually constructed but aren't in the factory's enum yet, which is a scikit-rec follow-up, not an agent concern.
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel", "setuptools-scm"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "scikit-rec-agent"
7
+ dynamic = ["version"]
8
+ description = "Conversational AI agent that drives scikit-rec via tool use"
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.10"
12
+ dependencies = []
13
+
14
+ [project.optional-dependencies]
15
+ dev = [
16
+ "pytest>=7.0",
17
+ "pytest-cov>=4.0",
18
+ "ruff>=0.4",
19
+ "mypy>=1.0",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/intuit/scikit-rec-agent"
24
+ Repository = "https://github.com/intuit/scikit-rec-agent"
25
+ Issues = "https://github.com/intuit/scikit-rec-agent/issues"
26
+
27
+ [tool.setuptools_scm]
28
+
29
+ [tool.setuptools.packages.find]
30
+ include = ["scikit_rec_agent*"]
31
+
32
+ # --- ruff (lint + format) ---
33
+ [tool.ruff]
34
+ line-length = 120
35
+ target-version = "py310"
36
+
37
+ [tool.ruff.lint]
38
+ select = ["E", "F", "I"] # pycodestyle errors, pyflakes, isort
39
+
40
+ [tool.ruff.format]
41
+ quote-style = "double"
42
+
43
+ # --- mypy ---
44
+ [tool.mypy]
45
+ python_version = "3.10"
46
+ strict = false
47
+ ignore_missing_imports = true
48
+
49
+ # --- pytest ---
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
52
+ addopts = "-v"
53
+
54
+ # --- coverage ---
55
+ [tool.coverage.run]
56
+ source = ["scikit_rec_agent"]
@@ -0,0 +1,3 @@
1
+ """scikit-rec-agent: conversational AI agent for scikit-rec."""
2
+
3
+ __all__: list[str] = []
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: scikit-rec-agent
3
+ Version: 0.0.1
4
+ Summary: Conversational AI agent that drives scikit-rec via tool use
5
+ License-Expression: Apache-2.0
6
+ Project-URL: Homepage, https://github.com/intuit/scikit-rec-agent
7
+ Project-URL: Repository, https://github.com/intuit/scikit-rec-agent
8
+ Project-URL: Issues, https://github.com/intuit/scikit-rec-agent/issues
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=7.0; extra == "dev"
14
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
15
+ Requires-Dist: ruff>=0.4; extra == "dev"
16
+ Requires-Dist: mypy>=1.0; extra == "dev"
17
+ Dynamic: license-file
18
+
19
+ # scikit-rec-agent
20
+
21
+ Conversational AI agent that uses [scikit-rec](https://github.com/intuit/scikit-rec) as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
22
+
23
+ > **Status:** placeholder release. Real implementation in progress — see [`agentic_design.md`](./agentic_design.md) for the spec.
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install scikit-rec-agent
29
+ ```
30
+
31
+ ## License
32
+
33
+ Apache-2.0
@@ -0,0 +1,15 @@
1
+ .gitignore
2
+ LICENSE
3
+ README.md
4
+ agentic_design.md
5
+ pyproject.toml
6
+ .claude/settings.json
7
+ .github/workflows/ci.yml
8
+ .github/workflows/publish.yml
9
+ scikit_rec_agent/__init__.py
10
+ scikit_rec_agent.egg-info/PKG-INFO
11
+ scikit_rec_agent.egg-info/SOURCES.txt
12
+ scikit_rec_agent.egg-info/dependency_links.txt
13
+ scikit_rec_agent.egg-info/requires.txt
14
+ scikit_rec_agent.egg-info/top_level.txt
15
+ tests/test_import.py
@@ -0,0 +1,6 @@
1
+
2
+ [dev]
3
+ pytest>=7.0
4
+ pytest-cov>=4.0
5
+ ruff>=0.4
6
+ mypy>=1.0
@@ -0,0 +1 @@
1
+ scikit_rec_agent
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ def test_package_imports():
2
+ import scikit_rec_agent
3
+
4
+ assert scikit_rec_agent.__all__ == []