scikit-rec-agent 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_rec_agent-0.0.1/.claude/settings.json +12 -0
- scikit_rec_agent-0.0.1/.github/workflows/ci.yml +40 -0
- scikit_rec_agent-0.0.1/.github/workflows/publish.yml +83 -0
- scikit_rec_agent-0.0.1/.gitignore +29 -0
- scikit_rec_agent-0.0.1/LICENSE +190 -0
- scikit_rec_agent-0.0.1/PKG-INFO +33 -0
- scikit_rec_agent-0.0.1/README.md +15 -0
- scikit_rec_agent-0.0.1/agentic_design.md +794 -0
- scikit_rec_agent-0.0.1/pyproject.toml +56 -0
- scikit_rec_agent-0.0.1/scikit_rec_agent/__init__.py +3 -0
- scikit_rec_agent-0.0.1/scikit_rec_agent.egg-info/PKG-INFO +33 -0
- scikit_rec_agent-0.0.1/scikit_rec_agent.egg-info/SOURCES.txt +15 -0
- scikit_rec_agent-0.0.1/scikit_rec_agent.egg-info/dependency_links.txt +1 -0
- scikit_rec_agent-0.0.1/scikit_rec_agent.egg-info/requires.txt +6 -0
- scikit_rec_agent-0.0.1/scikit_rec_agent.egg-info/top_level.txt +1 -0
- scikit_rec_agent-0.0.1/setup.cfg +4 -0
- scikit_rec_agent-0.0.1/tests/test_import.py +4 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
name: Lint
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v5
|
|
15
|
+
- uses: actions/setup-python@v6
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
- name: Install ruff
|
|
19
|
+
run: pip install ruff
|
|
20
|
+
- name: Check formatting
|
|
21
|
+
run: ruff format --check .
|
|
22
|
+
- name: Check lint
|
|
23
|
+
run: ruff check .
|
|
24
|
+
|
|
25
|
+
test:
|
|
26
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
strategy:
|
|
29
|
+
fail-fast: false
|
|
30
|
+
matrix:
|
|
31
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/checkout@v5
|
|
34
|
+
- uses: actions/setup-python@v6
|
|
35
|
+
with:
|
|
36
|
+
python-version: ${{ matrix.python-version }}
|
|
37
|
+
- name: Install package and test deps
|
|
38
|
+
run: pip install -e ".[dev]"
|
|
39
|
+
- name: Run tests
|
|
40
|
+
run: pytest
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version:
|
|
7
|
+
description: "Version to release (e.g. 1.2.3)"
|
|
8
|
+
required: true
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
tag:
|
|
12
|
+
name: Create tag
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v5
|
|
18
|
+
with:
|
|
19
|
+
fetch-depth: 0
|
|
20
|
+
- name: Create and push tag
|
|
21
|
+
run: |
|
|
22
|
+
git config user.name "github-actions"
|
|
23
|
+
git config user.email "github-actions@github.com"
|
|
24
|
+
git tag v${{ github.event.inputs.version }}
|
|
25
|
+
git push origin v${{ github.event.inputs.version }}
|
|
26
|
+
|
|
27
|
+
build:
|
|
28
|
+
name: Build distribution
|
|
29
|
+
needs: tag
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v5
|
|
33
|
+
with:
|
|
34
|
+
fetch-depth: 0
|
|
35
|
+
ref: v${{ github.event.inputs.version }}
|
|
36
|
+
- uses: actions/setup-python@v6
|
|
37
|
+
with:
|
|
38
|
+
python-version: "3.12"
|
|
39
|
+
- name: Install build
|
|
40
|
+
run: pip install build
|
|
41
|
+
- name: Build wheel and sdist
|
|
42
|
+
run: python -m build
|
|
43
|
+
- name: Upload dist artifacts
|
|
44
|
+
uses: actions/upload-artifact@v5
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
|
|
49
|
+
publish-pypi:
|
|
50
|
+
name: Publish to PyPI
|
|
51
|
+
needs: build
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
environment: pypi
|
|
54
|
+
permissions:
|
|
55
|
+
id-token: write
|
|
56
|
+
steps:
|
|
57
|
+
- name: Download dist artifacts
|
|
58
|
+
uses: actions/download-artifact@v5
|
|
59
|
+
with:
|
|
60
|
+
name: dist
|
|
61
|
+
path: dist/
|
|
62
|
+
- name: Publish to PyPI
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
|
|
65
|
+
github-release:
|
|
66
|
+
name: Create GitHub Release
|
|
67
|
+
needs: publish-pypi
|
|
68
|
+
runs-on: ubuntu-latest
|
|
69
|
+
permissions:
|
|
70
|
+
contents: write
|
|
71
|
+
steps:
|
|
72
|
+
- uses: actions/checkout@v5
|
|
73
|
+
- name: Download dist artifacts
|
|
74
|
+
uses: actions/download-artifact@v5
|
|
75
|
+
with:
|
|
76
|
+
name: dist
|
|
77
|
+
path: dist/
|
|
78
|
+
- name: Create GitHub Release
|
|
79
|
+
uses: softprops/action-gh-release@v2
|
|
80
|
+
with:
|
|
81
|
+
tag_name: v${{ github.event.inputs.version }}
|
|
82
|
+
files: dist/*
|
|
83
|
+
generate_release_notes: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
scikit_rec_agent.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
.eggs/
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# Testing
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.coverage
|
|
19
|
+
htmlcov/
|
|
20
|
+
coverage.xml
|
|
21
|
+
|
|
22
|
+
# Mypy
|
|
23
|
+
.mypy_cache/
|
|
24
|
+
|
|
25
|
+
# Ruff
|
|
26
|
+
.ruff_cache/
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to the Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by the Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding any notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
Copyright 2025 Intuit Inc.
|
|
179
|
+
|
|
180
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
181
|
+
you may not use this file except in compliance with the License.
|
|
182
|
+
You may obtain a copy of the License at
|
|
183
|
+
|
|
184
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
185
|
+
|
|
186
|
+
Unless required by applicable law or agreed to in writing, software
|
|
187
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
188
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
189
|
+
See the License for the specific language governing permissions and
|
|
190
|
+
limitations under the License.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scikit-rec-agent
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Conversational AI agent that drives scikit-rec via tool use
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Project-URL: Homepage, https://github.com/intuit/scikit-rec-agent
|
|
7
|
+
Project-URL: Repository, https://github.com/intuit/scikit-rec-agent
|
|
8
|
+
Project-URL: Issues, https://github.com/intuit/scikit-rec-agent/issues
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
15
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
16
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# scikit-rec-agent
|
|
20
|
+
|
|
21
|
+
Conversational AI agent that uses [scikit-rec](https://github.com/intuit/scikit-rec) as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
|
|
22
|
+
|
|
23
|
+
> **Status:** placeholder release. Real implementation in progress — see [`agentic_design.md`](./agentic_design.md) for the spec.
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install scikit-rec-agent
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## License
|
|
32
|
+
|
|
33
|
+
Apache-2.0
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# scikit-rec-agent
|
|
2
|
+
|
|
3
|
+
Conversational AI agent that uses [scikit-rec](https://github.com/intuit/scikit-rec) as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
|
|
4
|
+
|
|
5
|
+
> **Status:** placeholder release. Real implementation in progress — see [`agentic_design.md`](./agentic_design.md) for the spec.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install scikit-rec-agent
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## License
|
|
14
|
+
|
|
15
|
+
Apache-2.0
|
|
@@ -0,0 +1,794 @@
|
|
|
1
|
+
# scikit-rec-agent: Design Document
|
|
2
|
+
|
|
3
|
+
Conversational AI agent that uses scikit-rec as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
|
|
4
|
+
|
|
5
|
+
This document is the authoritative spec for the implementation. It reflects the decisions locked in during design review and the factory contract provided by scikit-rec PR landed on 2026-04-17 (commits `74a773c` + `137d278` + `5bdc7d0`).
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Design Decisions
|
|
10
|
+
|
|
11
|
+
| Decision | Choice | Rationale |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| Distribution | **Single pip package** (`scikit-rec-agent`) | Installable library with CLI entry point. Examples live in `examples/`, no separate cookbook repo. |
|
|
14
|
+
| LLM provider | **Bring-your-own** via `BaseLLM` protocol | Users pass any LLM that implements `chat()` + `chat_stream()`. Ship Anthropic + OpenAI adapters at launch. |
|
|
15
|
+
| System prompt | **Swappable at `Agent()` construction** | Default prompt exported; users pass `system_prompt=...` to override or extend. |
|
|
16
|
+
| Tool registry | **Pluggable at `Agent()` construction** | 10 default tools ship with the library; users extend or replace via `tools=...`. |
|
|
17
|
+
| Interface | **CLI** for v1 | `scikit-rec-agent chat`. Jupyter/web layered on top of `Agent` later. |
|
|
18
|
+
| Model registry | **Local filesystem** | `~/.scikit-rec/registry/` — JSON metadata + pickle. |
|
|
19
|
+
| Tool scope | **10 tools, everything in v1** | No v1.1 tier. If it's worth shipping, it ships now. |
|
|
20
|
+
| Recommender scope | **Full scikit-rec capability matrix** | All 6 recommenders × 6 scorers × 3 estimator planes. Driven end-to-end via `create_recommender_pipeline`. |
|
|
21
|
+
| `suggest_pipelines` | **In-prompt reasoning, not a tool** | The LLM emits candidate `RecommenderConfig` dicts as text; `train_model` validates via the factory. |
|
|
22
|
+
| Config validation | **Delegated to scikit-rec factory** | Agent does not re-implement enum checks. Bad configs fail at `train_model` with the factory's error message surfaced to the LLM. |
|
|
23
|
+
| Streaming | **Yes** | Stream LLM text deltas to terminal for responsive UX during long tool executions. |
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Architecture
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
User (CLI)
|
|
31
|
+
|
|
|
32
|
+
v
|
|
33
|
+
Agent Loop (BaseLLM protocol + tool dispatch + streaming)
|
|
34
|
+
|
|
|
35
|
+
|--- BaseLLM protocol ----+---- AnthropicAdapter (Claude)
|
|
36
|
+
| +---- OpenAIAdapter (GPT-4)
|
|
37
|
+
| +---- UserCustomAdapter (anything)
|
|
38
|
+
|
|
|
39
|
+
v
|
|
40
|
+
Tools Layer (10 structured tool-use functions)
|
|
41
|
+
|
|
|
42
|
+
v
|
|
43
|
+
scikit-rec
|
|
44
|
+
|
|
|
45
|
+
|-- skrec.orchestrator.create_recommender_pipeline(config)
|
|
46
|
+
| Recommender -> Scorer -> Estimator
|
|
47
|
+
|-- skrec.orchestrator.HyperparameterOptimizer (used by run_hpo)
|
|
48
|
+
|-- skrec.dataset.{Interactions,Users,Items}Dataset
|
|
49
|
+
|
|
|
50
|
+
v
|
|
51
|
+
Model Registry (~/.scikit-rec/registry/)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The agent is stateful across turns. `Session` holds loaded datasets, trained pipeline handles, and evaluation results. Tools mutate this session; model objects themselves never enter the LLM context — only `model_id` handles and metadata.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Prerequisite: scikit-rec Factory Contract
|
|
59
|
+
|
|
60
|
+
The agent depends on a single entry point:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from skrec.orchestrator import create_recommender_pipeline, RecommenderConfig
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
`create_recommender_pipeline(config: RecommenderConfig) -> BaseRecommender` builds the full Estimator → Scorer → Recommender chain from a dict. It covers the entire scikit-rec capability matrix (post PR `74a773c`):
|
|
67
|
+
|
|
68
|
+
### Recommender types
|
|
69
|
+
`ranking`, `bandits`, `sequential`, `hierarchical_sequential`, `uplift`, `gcsl`
|
|
70
|
+
|
|
71
|
+
### Scorer types
|
|
72
|
+
`universal`, `independent`, `multiclass`, `multioutput`, `sequential`, `hierarchical`
|
|
73
|
+
|
|
74
|
+
### Estimator planes (`estimator_type` discriminator)
|
|
75
|
+
- `tabular` — XGBoost classifier/regressor, MultiOutputClassifier (LightGBM, sklearn wrappers available directly but not via factory enum today — acceptable for v1)
|
|
76
|
+
- `embedding` (`model_type` ∈ {`matrix_factorization`, `ncf`, `two_tower`, `deep_cross_network`, `neural_factorization`})
|
|
77
|
+
- `sequential` (`model_type` ∈ {`sasrec_classifier`, `sasrec_regressor`, `hrnn_classifier`, `hrnn_regressor`})
|
|
78
|
+
|
|
79
|
+
### Required fields
|
|
80
|
+
- `recommender_type` — **required**, raises `ValueError` if missing or `None`.
|
|
81
|
+
- `scorer_type` — **required**, raises `ValueError` if missing.
|
|
82
|
+
- `estimator_config` — required; `estimator_type` defaults to `"tabular"`.
|
|
83
|
+
- `recommender_params` — required only for recommenders that need them (e.g. `uplift` requires `control_item_id`). Keys irrelevant to the chosen recommender are silently ignored.
|
|
84
|
+
|
|
85
|
+
### Cross-cutting validators the factory already enforces
|
|
86
|
+
The agent relies on these and does **not** re-implement them:
|
|
87
|
+
|
|
88
|
+
- `sequential` / `hierarchical_sequential` recommenders require `estimator_type="sequential"`
|
|
89
|
+
- `sequential` recommender requires `scorer_type="sequential"`
|
|
90
|
+
- `hierarchical_sequential` requires `scorer_type="hierarchical"`
|
|
91
|
+
- `sequential` / `hierarchical` scorers require `estimator_type="sequential"`
|
|
92
|
+
- `embedding` estimators are rejected by `multioutput` / `multiclass` / `independent` scorers
|
|
93
|
+
- `uplift` recommender requires `scorer_type ∈ {"independent", "universal"}`
|
|
94
|
+
|
|
95
|
+
When a bad config reaches `train_model`, the factory raises `ValueError` / `TypeError` / `NotImplementedError`. The tool captures the message verbatim and returns it as a tool error — the LLM reads the error and corrects the config without the agent needing a parallel validator.
|
|
96
|
+
|
|
97
|
+
### Canonical config shapes (copy these into the system prompt)
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
# 1. Tabular ranking
|
|
101
|
+
{
|
|
102
|
+
"recommender_type": "ranking",
|
|
103
|
+
"scorer_type": "universal",
|
|
104
|
+
"estimator_config": {
|
|
105
|
+
"ml_task": "classification",
|
|
106
|
+
"xgboost": {"n_estimators": 100, "max_depth": 5, "learning_rate": 0.1},
|
|
107
|
+
},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# 2. Embedding ranking (Two-Tower / NCF / MF / DCN / NFM)
|
|
111
|
+
{
|
|
112
|
+
"recommender_type": "ranking",
|
|
113
|
+
"scorer_type": "universal",
|
|
114
|
+
"estimator_config": {
|
|
115
|
+
"estimator_type": "embedding",
|
|
116
|
+
"embedding": {"model_type": "two_tower", "params": {"embedding_dim": 32}},
|
|
117
|
+
},
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# 3. Sequential (SASRec / HRNN)
|
|
121
|
+
{
|
|
122
|
+
"recommender_type": "sequential",
|
|
123
|
+
"scorer_type": "sequential",
|
|
124
|
+
"estimator_config": {
|
|
125
|
+
"estimator_type": "sequential",
|
|
126
|
+
"sequential": {"model_type": "sasrec_classifier", "params": {"hidden_units": 64, "max_len": 50}},
|
|
127
|
+
},
|
|
128
|
+
"recommender_params": {"max_len": 50},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# 4. Uplift (T-Learner / S-Learner / X-Learner)
|
|
132
|
+
{
|
|
133
|
+
"recommender_type": "uplift",
|
|
134
|
+
"scorer_type": "independent",
|
|
135
|
+
"estimator_config": {"ml_task": "classification", "xgboost": {"n_estimators": 100}},
|
|
136
|
+
"recommender_params": {"control_item_id": "control", "mode": "t_learner"},
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
# 5. GCSL (multi-objective)
|
|
140
|
+
{
|
|
141
|
+
"recommender_type": "gcsl",
|
|
142
|
+
"scorer_type": "universal",
|
|
143
|
+
"estimator_config": {"ml_task": "classification", "xgboost": {"n_estimators": 100}},
|
|
144
|
+
"recommender_params": {
|
|
145
|
+
"inference_method": {
|
|
146
|
+
"type": "predefined_value",
|
|
147
|
+
"params": {"goal_values": {"OUTCOME_revenue": 1.0}},
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# 6. Contextual bandits
|
|
153
|
+
{
|
|
154
|
+
"recommender_type": "bandits",
|
|
155
|
+
"scorer_type": "universal",
|
|
156
|
+
"estimator_config": {"ml_task": "classification", "xgboost": {"n_estimators": 100}},
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### XGBoost hyperparameter hints
|
|
161
|
+
`XGBConfig` is a `TypedDict(total=False)` with these typed keys (full passthrough — any other XGBoost param is accepted too):
|
|
162
|
+
`n_estimators`, `max_depth`, `learning_rate`, `subsample`, `colsample_bytree`, `colsample_bynode`, `objective`, `eval_metric`, `n_jobs`, `random_state`.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## LLM Provider Abstraction
|
|
167
|
+
|
|
168
|
+
Users bring their own LLM. The agent depends on a `BaseLLM` protocol, not a specific SDK.
|
|
169
|
+
|
|
170
|
+
### Protocol
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from typing import Protocol, Iterator, Any
|
|
174
|
+
|
|
175
|
+
class BaseLLM(Protocol):
|
|
176
|
+
def chat(
|
|
177
|
+
self,
|
|
178
|
+
messages: list[dict[str, Any]],
|
|
179
|
+
tools: list[dict[str, Any]],
|
|
180
|
+
system: str,
|
|
181
|
+
) -> LLMResponse:
|
|
182
|
+
"""Non-streaming chat completion with tool definitions.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
messages: Conversation history in OpenAI-style format.
|
|
186
|
+
tools: List of tool schemas (JSON Schema format, provider-agnostic).
|
|
187
|
+
system: System prompt string.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
LLMResponse with text content and/or tool calls.
|
|
191
|
+
"""
|
|
192
|
+
...
|
|
193
|
+
|
|
194
|
+
def chat_stream(
|
|
195
|
+
self,
|
|
196
|
+
messages: list[dict[str, Any]],
|
|
197
|
+
tools: list[dict[str, Any]],
|
|
198
|
+
system: str,
|
|
199
|
+
) -> Iterator[LLMStreamEvent]:
|
|
200
|
+
"""Streaming variant. Yields text deltas and tool calls.
|
|
201
|
+
|
|
202
|
+
Tool calls are atomic events — they're only emitted once fully received.
|
|
203
|
+
Only text deltas stream token-by-token.
|
|
204
|
+
"""
|
|
205
|
+
...
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Response types
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
@dataclass
|
|
212
|
+
class ToolCall:
|
|
213
|
+
id: str # Unique ID for this tool call
|
|
214
|
+
name: str # Tool name (e.g. "profile_data")
|
|
215
|
+
arguments: dict # Parsed JSON arguments
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class LLMResponse:
|
|
219
|
+
content: str | None # Text response (may be None if only tool calls)
|
|
220
|
+
tool_calls: list[ToolCall] # Zero or more tool calls
|
|
221
|
+
stop_reason: str # "end_turn", "tool_use", "max_tokens", etc.
|
|
222
|
+
|
|
223
|
+
@dataclass
|
|
224
|
+
class LLMStreamEvent:
|
|
225
|
+
type: str # "text_delta" | "tool_call" | "done"
|
|
226
|
+
text: str | None = None
|
|
227
|
+
tool_call: ToolCall | None = None
|
|
228
|
+
stop_reason: str | None = None
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Built-in adapters
|
|
232
|
+
|
|
233
|
+
Both ship in v1:
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
class AnthropicAdapter(BaseLLM):
|
|
237
|
+
"""Wraps anthropic.Anthropic client to BaseLLM protocol."""
|
|
238
|
+
def __init__(self, client: "anthropic.Anthropic", model: str = "claude-sonnet-4-6"):
|
|
239
|
+
...
|
|
240
|
+
|
|
241
|
+
class OpenAIAdapter(BaseLLM):
|
|
242
|
+
"""Wraps openai.OpenAI client to BaseLLM protocol."""
|
|
243
|
+
def __init__(self, client: "openai.OpenAI", model: str = "gpt-4o"):
|
|
244
|
+
...
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Usage
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from scikit_rec_agent import Agent
|
|
251
|
+
from scikit_rec_agent.llm import AnthropicAdapter
|
|
252
|
+
import anthropic
|
|
253
|
+
|
|
254
|
+
llm = AnthropicAdapter(anthropic.Anthropic(), model="claude-sonnet-4-6")
|
|
255
|
+
agent = Agent(llm=llm)
|
|
256
|
+
agent.chat() # interactive CLI session
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
# Or bring your own
|
|
261
|
+
from scikit_rec_agent import Agent, BaseLLM
|
|
262
|
+
|
|
263
|
+
class MyLLM(BaseLLM):
|
|
264
|
+
def chat(self, messages, tools, system): ...
|
|
265
|
+
def chat_stream(self, messages, tools, system): ...
|
|
266
|
+
|
|
267
|
+
agent = Agent(llm=MyLLM())
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
---
|
|
271
|
+
|
|
272
|
+
## Session State
|
|
273
|
+
|
|
274
|
+
The agent is stateful across turns. The `Session` dataclass holds:
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
@dataclass
|
|
278
|
+
class Session:
|
|
279
|
+
loaded_datasets: dict[str, dict] # path -> {profile, dataset_objects}
|
|
280
|
+
trained_models: dict[str, ModelHandle] # model_id -> handle
|
|
281
|
+
messages: list[dict] # conversation history
|
|
282
|
+
|
|
283
|
+
@dataclass
|
|
284
|
+
class ModelHandle:
|
|
285
|
+
model_id: str # e.g. "twotower_1712345678"
|
|
286
|
+
name: str # human-readable name
|
|
287
|
+
config: RecommenderConfig # the config passed to train_model
|
|
288
|
+
recommender: BaseRecommender # the actual trained pipeline
|
|
289
|
+
training_time_seconds: float
|
|
290
|
+
datasets_used: dict # paths/schema info
|
|
291
|
+
metrics: dict[str, float] # metric_name@k -> value (accumulates across evaluate_model calls)
|
|
292
|
+
tags: list[str]
|
|
293
|
+
created_at: str # ISO timestamp
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### What enters the LLM context
|
|
297
|
+
Only `model_id`, `name`, config, metrics, training time, and status messages. The `recommender` object stays in Python memory and is referenced by `model_id` from tool calls.
|
|
298
|
+
|
|
299
|
+
### `model_id` generation
|
|
300
|
+
`{model_type}_{unix_timestamp}` — e.g. `twotower_1712345678`. Deterministic enough to be collision-free in a session, readable enough for the LLM to reference unambiguously.
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
## Extension Points
|
|
305
|
+
|
|
306
|
+
### 1. System prompt
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
from scikit_rec_agent import Agent
|
|
310
|
+
from scikit_rec_agent.prompts import DEFAULT_SYSTEM_PROMPT
|
|
311
|
+
|
|
312
|
+
custom_prompt = DEFAULT_SYSTEM_PROMPT + "\n\nOur team uses NDCG@10 as the primary metric."
|
|
313
|
+
agent = Agent(llm=llm, system_prompt=custom_prompt)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### 2. Tool registry
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
from scikit_rec_agent import Agent
|
|
320
|
+
from scikit_rec_agent.tools import DEFAULT_TOOLS, Tool
|
|
321
|
+
|
|
322
|
+
def fetch_from_snowflake(query: str, session: Session) -> dict:
|
|
323
|
+
...
|
|
324
|
+
|
|
325
|
+
custom_tool = Tool(
|
|
326
|
+
name="fetch_from_snowflake",
|
|
327
|
+
schema={...}, # JSON schema
|
|
328
|
+
fn=fetch_from_snowflake,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
agent = Agent(llm=llm, tools=[*DEFAULT_TOOLS, custom_tool])
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
Tool functions receive the `Session` as a keyword arg so user-defined tools can read and mutate the same state.
|
|
335
|
+
|
|
336
|
+
### 3. CLI / frontend
|
|
337
|
+
|
|
338
|
+
`scikit-rec-agent chat` is thin glue: constructs an `Agent`, reads stdin, prints streamed output. Users who want Jupyter, Slack, or a web UI instantiate `Agent` directly and drive it with their own I/O loop. `Agent.chat_turn(user_message)` returns an event iterator — the CLI has no privileged access.
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## Agent Tools (v1 — 10 tools)
|
|
343
|
+
|
|
344
|
+
| Tool | Purpose | Wraps |
|
|
345
|
+
|---|---|---|
|
|
346
|
+
| `profile_data` | Load CSV/parquet; report shape, dtypes, cardinality, sparsity, temporal range, target type | pandas + heuristics |
|
|
347
|
+
| `validate_data` | Schema-compliance check against scikit-rec required schemas. Returns violations + auto-fix suggestions. | Compare against `InteractionsDataset.REQUIRED_SCHEMA_PATH_TRAINING` etc. |
|
|
348
|
+
| `create_datasets` | Build `InteractionsDataset` / `UsersDataset` / `ItemsDataset` handles. Auto-generate YAML schema to tmp dir if not provided. Supports `column_mapping` to rename user columns → scikit-rec names. | `DatasetSchema.create` + dataset constructors |
|
|
349
|
+
| `train_model` | Train a recommender pipeline from a `RecommenderConfig`. Creates datasets internally if called with paths; uses pre-built datasets if called with dataset handles from `create_datasets`. | `create_recommender_pipeline` + `.train()` |
|
|
350
|
+
| `evaluate_model` | Run evaluation: evaluator type + metrics + multiple k values. Supports all 7 evaluator types. | `BaseRecommender.evaluate()` |
|
|
351
|
+
| `compare_models` | Tabulate metrics across trained models. Markdown table sorted by primary metric. | Session state lookup |
|
|
352
|
+
| `run_hpo` | Optuna-based hyperparameter optimization. Returns best config + trial results. | `HyperparameterOptimizer.run_optimization()` |
|
|
353
|
+
| `save_model` | Persist model + config + metrics to local registry | pickle + JSON metadata |
|
|
354
|
+
| `list_models` | List models in the local registry (not just session) with metadata. | Filesystem scan of `~/.scikit-rec/registry/` |
|
|
355
|
+
| `load_model` | Load a registered model into the current session. | pickle + session state mutation |
|
|
356
|
+
|
|
357
|
+
**`suggest_pipelines` is deliberately NOT a tool.** It's the agent's job: after `profile_data` + `validate_data`, the LLM emits 2–5 candidate `RecommenderConfig` dicts as text in its reply with rationale for each. The user picks one (or more), then the LLM calls `train_model`. The factory validates the config on entry — there's no need for a Python-side validator.
|
|
358
|
+
|
|
359
|
+
### Tool error contract
|
|
360
|
+
|
|
361
|
+
Every tool's return value is a JSON-serializable dict with a consistent envelope:
|
|
362
|
+
|
|
363
|
+
```python
|
|
364
|
+
# Success
|
|
365
|
+
{"status": "ok", "data": {...}}
|
|
366
|
+
|
|
367
|
+
# Error (factory raised, file missing, evaluation failed, etc.)
|
|
368
|
+
{"status": "error", "error_type": "ValueError", "message": "...", "hint": "optional suggestion"}
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
Both shapes are passed back as the tool result. The LLM reads the `message` field and self-corrects on error. `hint` is used for high-confidence fixes we can synthesize locally (e.g. `"Your column 'user' was detected as the user ID — pass column_mapping={'user': 'USER_ID'}"`).
|
|
372
|
+
|
|
373
|
+
---
|
|
374
|
+
|
|
375
|
+
## Tool Schemas (v1)
|
|
376
|
+
|
|
377
|
+
### profile_data
|
|
378
|
+
|
|
379
|
+
```json
|
|
380
|
+
{
|
|
381
|
+
"name": "profile_data",
|
|
382
|
+
"description": "Load and profile a data file. Reports shape, dtypes, cardinality of ID columns, sparsity, value distributions, temporal range, and whether the target looks implicit (binary) or explicit (ratings).",
|
|
383
|
+
"input_schema": {
|
|
384
|
+
"type": "object",
|
|
385
|
+
"properties": {
|
|
386
|
+
"file_path": {"type": "string", "description": "Path to CSV or parquet file"},
|
|
387
|
+
"file_type": {"type": "string", "enum": ["interactions", "users", "items"]}
|
|
388
|
+
},
|
|
389
|
+
"required": ["file_path", "file_type"]
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
```
|
|
393
|
+
**Returns:** `shape`, `columns` (name, dtype, null_count, n_unique, sample_values), `id_columns_detected`, `target_column_detected`, `target_type` (binary/rating/continuous), `temporal_range` (if timestamp found), `duplicate_pairs_count`, `sparsity`.
|
|
394
|
+
|
|
395
|
+
### validate_data
|
|
396
|
+
|
|
397
|
+
```json
|
|
398
|
+
{
|
|
399
|
+
"name": "validate_data",
|
|
400
|
+
"description": "Validate a data file against scikit-rec required schemas. Reports missing required columns, wrong dtypes, and suggests column renames if near-matches are detected.",
|
|
401
|
+
"input_schema": {
|
|
402
|
+
"type": "object",
|
|
403
|
+
"properties": {
|
|
404
|
+
"file_path": {"type": "string"},
|
|
405
|
+
"file_type": {"type": "string", "enum": ["interactions", "users", "items"]},
|
|
406
|
+
"is_training": {"type": "boolean", "default": true}
|
|
407
|
+
},
|
|
408
|
+
"required": ["file_path", "file_type"]
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
```
|
|
412
|
+
**Returns:** `valid` (bool), `missing_columns`, `wrong_dtypes`, `suggested_column_mapping` (fuzzy-matched renames), `extra_columns` (passed through as features).
|
|
413
|
+
|
|
414
|
+
### create_datasets
|
|
415
|
+
|
|
416
|
+
```json
|
|
417
|
+
{
|
|
418
|
+
"name": "create_datasets",
|
|
419
|
+
"description": "Build scikit-rec Dataset handles. Auto-generates YAML schemas from the data types if client_schema_path is not provided. Applies column_mapping to rename columns to USER_ID/ITEM_ID/OUTCOME as needed. Registers the handles in the session under a dataset_bundle_id.",
|
|
420
|
+
"input_schema": {
|
|
421
|
+
"type": "object",
|
|
422
|
+
"properties": {
|
|
423
|
+
"bundle_name": {"type": "string"},
|
|
424
|
+
"interactions_path": {"type": "string"},
|
|
425
|
+
"users_path": {"type": "string"},
|
|
426
|
+
"items_path": {"type": "string"},
|
|
427
|
+
"column_mapping": {
|
|
428
|
+
"type": "object",
|
|
429
|
+
"description": "Map user's column names to scikit-rec names, e.g. {\"userid\": \"USER_ID\", \"clicked\": \"OUTCOME\"}"
|
|
430
|
+
},
|
|
431
|
+
"schemas": {
|
|
432
|
+
"type": "object",
|
|
433
|
+
"description": "Optional pre-written YAML schema paths keyed by file_type"
|
|
434
|
+
}
|
|
435
|
+
},
|
|
436
|
+
"required": ["bundle_name", "interactions_path"]
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
```
|
|
440
|
+
**Returns:** `bundle_id`, paths to generated schema files (so user can inspect / version them), summary of the three datasets.
|
|
441
|
+
|
|
442
|
+
### train_model
|
|
443
|
+
|
|
444
|
+
```json
|
|
445
|
+
{
|
|
446
|
+
"name": "train_model",
|
|
447
|
+
"description": "Train a recommender pipeline from a RecommenderConfig. Supply either a dataset bundle_id from create_datasets, OR raw file paths with optional column_mapping (train_model will call create_datasets internally). Config is validated by scikit-rec's factory — bad configs raise with a specific error that you can use to correct and retry.",
|
|
448
|
+
"input_schema": {
|
|
449
|
+
"type": "object",
|
|
450
|
+
"properties": {
|
|
451
|
+
"model_name": {"type": "string"},
|
|
452
|
+
"config": {
|
|
453
|
+
"type": "object",
|
|
454
|
+
"description": "RecommenderConfig dict: recommender_type, scorer_type, estimator_config, optional recommender_params. See system prompt for canonical shapes."
|
|
455
|
+
},
|
|
456
|
+
"bundle_id": {"type": "string", "description": "From create_datasets. If provided, paths/column_mapping are ignored."},
|
|
457
|
+
"interactions_path": {"type": "string"},
|
|
458
|
+
"users_path": {"type": "string"},
|
|
459
|
+
"items_path": {"type": "string"},
|
|
460
|
+
"column_mapping": {"type": "object"},
|
|
461
|
+
"validation_split": {"type": "number", "description": "0-1 fraction held out. Default 0.2. Ignored if explicit validation datasets are in the bundle."}
|
|
462
|
+
},
|
|
463
|
+
"required": ["model_name", "config"]
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
```
|
|
467
|
+
**Returns:** `model_id`, `model_name`, `status`, `training_time_seconds`, `estimator_type`, `scorer_type`, `recommender_type`.
|
|
468
|
+
|
|
469
|
+
### evaluate_model
|
|
470
|
+
|
|
471
|
+
```json
|
|
472
|
+
{
|
|
473
|
+
"name": "evaluate_model",
|
|
474
|
+
"description": "Evaluate a trained model using offline evaluation. Supports all 7 evaluator types and all 9 metrics at multiple k values. Results cached on the recommender's evaluation_session — subsequent calls with the same eval_kwargs are free.",
|
|
475
|
+
"input_schema": {
|
|
476
|
+
"type": "object",
|
|
477
|
+
"properties": {
|
|
478
|
+
"model_id": {"type": "string"},
|
|
479
|
+
"evaluator_type": {
|
|
480
|
+
"type": "string",
|
|
481
|
+
"enum": ["simple", "replay_match", "IPS", "DR", "direct_method", "SNIPS", "policy_weighted"]
|
|
482
|
+
},
|
|
483
|
+
"metrics": {
|
|
484
|
+
"type": "array",
|
|
485
|
+
"items": {
|
|
486
|
+
"type": "string",
|
|
487
|
+
"enum": ["NDCG_at_k", "MAP_at_k", "MRR_at_k", "precision_at_k", "recall_at_k", "average_reward_at_k", "roc_auc", "pr_auc", "expected_reward"]
|
|
488
|
+
}
|
|
489
|
+
},
|
|
490
|
+
"k_values": {"type": "array", "items": {"type": "integer"}},
|
|
491
|
+
"eval_kwargs": {"type": "object", "description": "logged_items, logged_rewards, logging_proba, expected_rewards — as required by the evaluator type"}
|
|
492
|
+
},
|
|
493
|
+
"required": ["model_id", "evaluator_type", "metrics", "k_values"]
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
```
|
|
497
|
+
**Returns:** `model_id`, `results` (list of `{metric, k, value}`).
|
|
498
|
+
|
|
499
|
+
### compare_models
|
|
500
|
+
|
|
501
|
+
```json
|
|
502
|
+
{
|
|
503
|
+
"name": "compare_models",
|
|
504
|
+
"description": "Compare trained models in the current session. Returns a markdown leaderboard sorted by primary metric.",
|
|
505
|
+
"input_schema": {
|
|
506
|
+
"type": "object",
|
|
507
|
+
"properties": {
|
|
508
|
+
"model_ids": {"type": "array", "items": {"type": "string"}, "description": "If empty, compares all trained models in the session."},
|
|
509
|
+
"primary_metric": {"type": "string"},
|
|
510
|
+
"k": {"type": "integer"}
|
|
511
|
+
},
|
|
512
|
+
"required": ["primary_metric", "k"]
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
```
|
|
516
|
+
**Returns:** markdown table (models × metrics) + JSON version.
|
|
517
|
+
|
|
518
|
+
### run_hpo
|
|
519
|
+
|
|
520
|
+
```json
|
|
521
|
+
{
|
|
522
|
+
"name": "run_hpo",
|
|
523
|
+
"description": "Run Optuna hyperparameter optimization on a base RecommenderConfig. Supports TPE, GP, CMA-ES, random, grid, QMC samplers. Results persisted to a parquet file keyed by study_name.",
|
|
524
|
+
"input_schema": {
|
|
525
|
+
"type": "object",
|
|
526
|
+
"properties": {
|
|
527
|
+
"study_name": {"type": "string"},
|
|
528
|
+
"base_config": {"type": "object", "description": "RecommenderConfig with fixed values"},
|
|
529
|
+
"search_space": {
|
|
530
|
+
"type": "object",
|
|
531
|
+
"description": "Dot-notation param paths → dimension specs. Each spec is {type: int|float|categorical, low, high, step?, log?, choices?}. Example: {'estimator_config.xgboost.n_estimators': {type: 'int', low: 50, high: 500, step: 50}}"
|
|
532
|
+
},
|
|
533
|
+
"metric_definitions": {
|
|
534
|
+
"type": "array",
|
|
535
|
+
"items": {"type": "string"},
|
|
536
|
+
"description": "Metric names like 'NDCG@10' or 'MAP@5'."
|
|
537
|
+
},
|
|
538
|
+
"objective_metric": {"type": "string"},
|
|
539
|
+
"bundle_id": {"type": "string", "description": "Dataset bundle from create_datasets — must include validation datasets."},
|
|
540
|
+
"n_trials": {"type": "integer"},
|
|
541
|
+
"sampler": {"type": "string", "enum": ["tpe", "gp", "cmaes", "random", "grid", "qmc"], "default": "tpe"},
|
|
542
|
+
"direction": {"type": "string", "enum": ["maximize", "minimize"], "default": "maximize"}
|
|
543
|
+
},
|
|
544
|
+
"required": ["study_name", "base_config", "search_space", "metric_definitions", "objective_metric", "bundle_id", "n_trials"]
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
```
|
|
548
|
+
**Returns:** `best_params`, `best_value`, `n_complete_trials`, `results_parquet_path`, and a `model_id` if the best config is automatically re-trained at the end (configurable via `retrain_best`, default `true`).
|
|
549
|
+
|
|
550
|
+
### save_model
|
|
551
|
+
|
|
552
|
+
```json
|
|
553
|
+
{
|
|
554
|
+
"name": "save_model",
|
|
555
|
+
"description": "Persist a trained model, its config, and evaluation metrics to the local registry (~/.scikit-rec/registry/<model_name>/).",
|
|
556
|
+
"input_schema": {
|
|
557
|
+
"type": "object",
|
|
558
|
+
"properties": {
|
|
559
|
+
"model_id": {"type": "string"},
|
|
560
|
+
"tags": {"type": "array", "items": {"type": "string"}}
|
|
561
|
+
},
|
|
562
|
+
"required": ["model_id"]
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
```
|
|
566
|
+
**Returns:** `registry_path`, `model_name`, `saved_at`.
|
|
567
|
+
|
|
568
|
+
### list_models
|
|
569
|
+
|
|
570
|
+
```json
|
|
571
|
+
{
|
|
572
|
+
"name": "list_models",
|
|
573
|
+
"description": "List all models in the local registry (persistent — not just current session). Returns metadata and metrics so the user can choose one to load.",
|
|
574
|
+
"input_schema": {
|
|
575
|
+
"type": "object",
|
|
576
|
+
"properties": {
|
|
577
|
+
"tag_filter": {"type": "array", "items": {"type": "string"}},
|
|
578
|
+
"recommender_type_filter": {"type": "string"}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
```
|
|
583
|
+
**Returns:** list of `{model_name, model_id, recommender_type, tags, saved_at, metrics}`.
|
|
584
|
+
|
|
585
|
+
### load_model
|
|
586
|
+
|
|
587
|
+
```json
|
|
588
|
+
{
|
|
589
|
+
"name": "load_model",
|
|
590
|
+
"description": "Load a registered model into the current session. Subsequent evaluate_model / compare_models / save_model calls can reference it by model_id.",
|
|
591
|
+
"input_schema": {
|
|
592
|
+
"type": "object",
|
|
593
|
+
"properties": {
|
|
594
|
+
"model_name": {"type": "string"}
|
|
595
|
+
},
|
|
596
|
+
"required": ["model_name"]
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
```
|
|
600
|
+
**Returns:** `model_id`, `name`, `config`, `metrics` from registry.
|
|
601
|
+
|
|
602
|
+
---
|
|
603
|
+
|
|
604
|
+
## Conversation Flow
|
|
605
|
+
|
|
606
|
+
### Typical session
|
|
607
|
+
|
|
608
|
+
```
|
|
609
|
+
Turn 1 — User describes data
|
|
610
|
+
User: "I have click-stream data with user features and an item catalog"
|
|
611
|
+
Agent: Asks for file paths, clarifies goal (ranking? bandit? sequential?)
|
|
612
|
+
|
|
613
|
+
Turn 2 — Profile + validate
|
|
614
|
+
Agent calls: profile_data (x3) + validate_data (x3)
|
|
615
|
+
Agent: "1.2M interactions across 50K users / 10K items. Target is binary (clicks).
|
|
616
|
+
Timestamps span 6 months. User features: age, gender, segment.
|
|
617
|
+
Item features: category, price. Sparsity 99.76%. 342 duplicate pairs.
|
|
618
|
+
Your columns 'userid' / 'itemid' / 'clicked' need to be mapped to
|
|
619
|
+
USER_ID / ITEM_ID / OUTCOME — I'll pass column_mapping automatically."
|
|
620
|
+
|
|
621
|
+
Turn 3 — Agent suggests pipelines IN-PROMPT
|
|
622
|
+
Agent (text, no tool call): "Given binary implicit feedback with timestamps
|
|
623
|
+
and side features, three candidates:
|
|
624
|
+
|
|
625
|
+
1. XGBoost + UniversalScorer + Ranking — fast baseline, handles features well.
|
|
626
|
+
{recommender_type: ranking, scorer_type: universal,
|
|
627
|
+
estimator_config: {ml_task: classification,
|
|
628
|
+
xgboost: {n_estimators: 200, max_depth: 6}}}
|
|
629
|
+
|
|
630
|
+
2. Two-Tower + UniversalScorer + Ranking — learns user/item embeddings,
|
|
631
|
+
robust to cold-start.
|
|
632
|
+
{recommender_type: ranking, scorer_type: universal,
|
|
633
|
+
estimator_config: {estimator_type: embedding,
|
|
634
|
+
embedding: {model_type: two_tower,
|
|
635
|
+
params: {embedding_dim: 64}}}}
|
|
636
|
+
|
|
637
|
+
3. SASRec + SequentialScorer + Sequential — exploits timestamps.
|
|
638
|
+
{recommender_type: sequential, scorer_type: sequential,
|
|
639
|
+
estimator_config: {estimator_type: sequential,
|
|
640
|
+
sequential: {model_type: sasrec_classifier,
|
|
641
|
+
params: {hidden_units: 64, max_len: 50}}},
|
|
642
|
+
recommender_params: {max_len: 50}}
|
|
643
|
+
|
|
644
|
+
Train all three?"
|
|
645
|
+
|
|
646
|
+
Turn 4 — User confirms
|
|
647
|
+
User: "Yes, train all three."
|
|
648
|
+
Agent calls: create_datasets (auto-schema + column_mapping)
|
|
649
|
+
Agent calls: train_model x3
|
|
650
|
+
Agent: "XGBoost: 45s. Two-Tower: 3m 12s. SASRec: 8m 41s."
|
|
651
|
+
|
|
652
|
+
Turn 5 — Evaluate
|
|
653
|
+
Agent calls: evaluate_model x3 (Simple, NDCG/Precision/MAP @ 10)
|
|
654
|
+
Agent calls: compare_models
|
|
655
|
+
Agent: "Two-Tower wins. NDCG@10: XGB 0.312, TT 0.347, SASRec 0.338."
|
|
656
|
+
|
|
657
|
+
Turn 6 — HPO
|
|
658
|
+
User: "Optimize Two-Tower."
|
|
659
|
+
Agent calls: run_hpo (TPE, 50 trials, embedding_dim / lr search space)
|
|
660
|
+
Agent: "Best NDCG@10: 0.381 (embedding_dim=128, lr=0.003).
|
|
661
|
+
Re-trained the best config as twotower_hpo_1712400000."
|
|
662
|
+
Agent calls: save_model
|
|
663
|
+
```
|
|
664
|
+
|
|
665
|
+
### Edge cases the agent handles (via the in-prompt heuristics)
|
|
666
|
+
|
|
667
|
+
- **Missing columns**: `validate_data` detects near-matches, returns `suggested_column_mapping`; agent passes it to `create_datasets`.
|
|
668
|
+
- **Rating scale (1–5) vs binary**: `profile_data` reports `target_type`; agent picks `regression` vs `classification` accordingly.
|
|
669
|
+
- **Too sparse for embeddings**: agent warns when < 100K interactions and recommends XGBoost over Two-Tower/NCF.
|
|
670
|
+
- **No timestamps**: agent skips sequential candidates.
|
|
671
|
+
- **Causal evaluation**: agent asks for `logging_proba` / `expected_rewards` and sets `evaluator_type` to IPS/DR/DM.
|
|
672
|
+
- **Multi-outcome rewards (revenue + clicks)**: agent suggests GCSL with `predefined_value` or `mean_scalarization` inference methods.
|
|
673
|
+
|
|
674
|
+
---
|
|
675
|
+
|
|
676
|
+
## System Prompt
|
|
677
|
+
|
|
678
|
+
The default system prompt (lives in `src/scikit_rec_agent/prompts/system.py`) encodes:
|
|
679
|
+
|
|
680
|
+
1. **Role and tone** — domain expert, concise, never trains what the data can't support.
|
|
681
|
+
2. **scikit-rec architecture recap** — the 3-layer model, when to use each recommender type.
|
|
682
|
+
3. **Capability matrix** — authoritative enums for `recommender_type`, `scorer_type`, `estimator_type`, `model_type` (embedding), `model_type` (sequential), `inference_method.type`, `retriever.type`, `sampler`. **These enums should be imported from `skrec.orchestrator.factory` at prompt build time** so the prompt can't drift from the factory — e.g. read `_EMBEDDING_ESTIMATOR_MAP.keys()` directly.
|
|
683
|
+
4. **Canonical config shapes** — the 6 shapes from the Prereq section above, copied verbatim.
|
|
684
|
+
5. **Decision heuristics**:
|
|
685
|
+
- Data size thresholds (when embeddings outperform XGBoost)
|
|
686
|
+
- Feature availability (dense features → DeepFM, sparse → MF)
|
|
687
|
+
- Sparsity bounds (embedding models need ≥ ~100K interactions)
|
|
688
|
+
- Target type → `ml_task` mapping
|
|
689
|
+
- Timestamps present → sequential is an option
|
|
690
|
+
6. **Evaluator selection**:
|
|
691
|
+
- Held-out split + randomized logging → `simple`
|
|
692
|
+
- Logged from production with known propensities → `IPS` / `SNIPS` / `DR`
|
|
693
|
+
- Reward model available → `direct_method`
|
|
694
|
+
- Exploration deployment → `replay_match` / `policy_weighted`
|
|
695
|
+
7. **Metric selection by use case** — implicit feedback → NDCG/MAP/Precision; revenue → expected_reward; CTR → roc_auc / pr_auc.
|
|
696
|
+
8. **Guardrails**:
|
|
697
|
+
- Always call `validate_data` before `train_model`.
|
|
698
|
+
- Don't suggest embedding models on < 100K interactions.
|
|
699
|
+
- Warn about overfitting with small validation sets.
|
|
700
|
+
- Flag premature HPO (run baselines first).
|
|
701
|
+
9. **Tool-calling discipline**:
|
|
702
|
+
- `suggest_pipelines` is IN-PROMPT — emit configs in text, don't invent a tool call for it.
|
|
703
|
+
- Always set both `recommender_type` AND `scorer_type` explicitly.
|
|
704
|
+
- On factory errors, read the error message and self-correct — don't re-raise to the user.
|
|
705
|
+
|
|
706
|
+
---
|
|
707
|
+
|
|
708
|
+
## Repo Structure
|
|
709
|
+
|
|
710
|
+
```
|
|
711
|
+
scikit-rec-agent/
|
|
712
|
+
├── pyproject.toml
|
|
713
|
+
├── README.md
|
|
714
|
+
├── src/
|
|
715
|
+
│ └── scikit_rec_agent/
|
|
716
|
+
│ ├── __init__.py # Exports: Agent, BaseLLM, Tool, Session, DEFAULT_TOOLS, DEFAULT_SYSTEM_PROMPT
|
|
717
|
+
│ ├── agent.py # Agent loop: BaseLLM + tool dispatch + streaming
|
|
718
|
+
│ ├── session.py # Session + ModelHandle dataclasses
|
|
719
|
+
│ ├── llm/
|
|
720
|
+
│ │ ├── __init__.py # Exports BaseLLM, LLMResponse, LLMStreamEvent, ToolCall
|
|
721
|
+
│ │ ├── base.py # Protocol + dataclasses
|
|
722
|
+
│ │ ├── anthropic.py # AnthropicAdapter
|
|
723
|
+
│ │ └── openai.py # OpenAIAdapter
|
|
724
|
+
│ ├── tools/
|
|
725
|
+
│ │ ├── __init__.py # DEFAULT_TOOLS list; Tool dataclass
|
|
726
|
+
│ │ ├── profiling.py # profile_data, validate_data
|
|
727
|
+
│ │ ├── datasets.py # create_datasets (incl. auto-schema generation)
|
|
728
|
+
│ │ ├── training.py # train_model
|
|
729
|
+
│ │ ├── evaluation.py # evaluate_model, compare_models
|
|
730
|
+
│ │ ├── hpo.py # run_hpo
|
|
731
|
+
│ │ └── registry.py # save_model, list_models, load_model
|
|
732
|
+
│ ├── prompts/
|
|
733
|
+
│ │ ├── __init__.py
|
|
734
|
+
│ │ ├── system.py # DEFAULT_SYSTEM_PROMPT (built at import from factory enums)
|
|
735
|
+
│ │ └── _capability.py # Runtime-derived capability matrix → string
|
|
736
|
+
│ └── cli.py # Entry point: scikit-rec-agent chat
|
|
737
|
+
├── tests/
|
|
738
|
+
│ ├── fixtures/ # Tiny CSVs + mocked LLM transcripts
|
|
739
|
+
│ ├── test_profiling.py
|
|
740
|
+
│ ├── test_datasets.py
|
|
741
|
+
│ ├── test_training.py
|
|
742
|
+
│ ├── test_evaluation.py
|
|
743
|
+
│ ├── test_hpo.py
|
|
744
|
+
│ ├── test_registry.py
|
|
745
|
+
│ ├── test_llm_adapters.py # Anthropic + OpenAI with mocked API
|
|
746
|
+
│ └── test_agent_integration.py # End-to-end with scripted LLM
|
|
747
|
+
└── examples/
|
|
748
|
+
├── movielens_session.md # Example conversation transcript
|
|
749
|
+
├── custom_tool.py # Adding a user-defined tool
|
|
750
|
+
├── custom_prompt.py # Overriding the default system prompt
|
|
751
|
+
└── custom_frontend.py # Driving Agent from Jupyter / Slack / web
|
|
752
|
+
```
|
|
753
|
+
|
|
754
|
+
---
|
|
755
|
+
|
|
756
|
+
## Dependencies
|
|
757
|
+
|
|
758
|
+
```toml
|
|
759
|
+
[project]
|
|
760
|
+
name = "scikit-rec-agent"
|
|
761
|
+
requires-python = ">=3.10"
|
|
762
|
+
dependencies = [
|
|
763
|
+
"scikit-rec>=1.0.0",
|
|
764
|
+
]
|
|
765
|
+
|
|
766
|
+
[project.optional-dependencies]
|
|
767
|
+
anthropic = ["anthropic>=0.40.0"]
|
|
768
|
+
openai = ["openai>=1.0.0"]
|
|
769
|
+
torch = ["scikit-rec[torch]"] # passthrough for sequential / embedding models
|
|
770
|
+
aws = ["scikit-rec[aws]"] # passthrough for S3 dataset loading
|
|
771
|
+
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4", "mypy>=1.0"]
|
|
772
|
+
```
|
|
773
|
+
|
|
774
|
+
Core has zero LLM SDK dependencies. Users install the adapter they need:
|
|
775
|
+
|
|
776
|
+
```bash
|
|
777
|
+
pip install scikit-rec-agent[anthropic] # Claude
|
|
778
|
+
pip install scikit-rec-agent[openai] # GPT-4
|
|
779
|
+
pip install scikit-rec-agent[anthropic,torch] # Claude + deep-learning models
|
|
780
|
+
pip install scikit-rec-agent # bring your own LLM
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
All ML dependencies come transitively through `scikit-rec`.
|
|
784
|
+
|
|
785
|
+
---
|
|
786
|
+
|
|
787
|
+
## Build Plan
|
|
788
|
+
|
|
789
|
+
1. **Day 1 — Skeleton**: `pyproject.toml`, `llm/{base,anthropic,openai}.py`, `session.py`, `agent.py` loop, mocked-LLM smoke test (one scripted `train_model` call end-to-end).
|
|
790
|
+
2. **Days 2–4 — Tools**: all 10 tools against `create_recommender_pipeline` and `HyperparameterOptimizer`. Use `skrec.examples.datasets.sample_*` for fixtures.
|
|
791
|
+
3. **Days 5–6 — System prompt + CLI**: build the capability matrix from factory enums at import time (derive, don't hardcode); CLI entry with streaming; single end-to-end transcript example.
|
|
792
|
+
4. **Day 7 — Tests + polish**: per-tool tests, adapter tests with mocked API, end-to-end scripted-LLM integration test, README.
|
|
793
|
+
|
|
794
|
+
Out of scope for v1: Jupyter widget, web UI, MLflow registry backend, non-XGBoost tabular estimators (LightGBM / logreg / sklearn wrappers) via factory — they work today if manually constructed but aren't in the factory's enum yet, which is a scikit-rec follow-up, not an agent concern.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel", "setuptools-scm"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "scikit-rec-agent"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Conversational AI agent that drives scikit-rec via tool use"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
dependencies = []
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
dev = [
|
|
16
|
+
"pytest>=7.0",
|
|
17
|
+
"pytest-cov>=4.0",
|
|
18
|
+
"ruff>=0.4",
|
|
19
|
+
"mypy>=1.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://github.com/intuit/scikit-rec-agent"
|
|
24
|
+
Repository = "https://github.com/intuit/scikit-rec-agent"
|
|
25
|
+
Issues = "https://github.com/intuit/scikit-rec-agent/issues"
|
|
26
|
+
|
|
27
|
+
[tool.setuptools_scm]
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
include = ["scikit_rec_agent*"]
|
|
31
|
+
|
|
32
|
+
# --- ruff (lint + format) ---
|
|
33
|
+
[tool.ruff]
|
|
34
|
+
line-length = 120
|
|
35
|
+
target-version = "py310"
|
|
36
|
+
|
|
37
|
+
[tool.ruff.lint]
|
|
38
|
+
select = ["E", "F", "I"] # pycodestyle errors, pyflakes, isort
|
|
39
|
+
|
|
40
|
+
[tool.ruff.format]
|
|
41
|
+
quote-style = "double"
|
|
42
|
+
|
|
43
|
+
# --- mypy ---
|
|
44
|
+
[tool.mypy]
|
|
45
|
+
python_version = "3.10"
|
|
46
|
+
strict = false
|
|
47
|
+
ignore_missing_imports = true
|
|
48
|
+
|
|
49
|
+
# --- pytest ---
|
|
50
|
+
[tool.pytest.ini_options]
|
|
51
|
+
testpaths = ["tests"]
|
|
52
|
+
addopts = "-v"
|
|
53
|
+
|
|
54
|
+
# --- coverage ---
|
|
55
|
+
[tool.coverage.run]
|
|
56
|
+
source = ["scikit_rec_agent"]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scikit-rec-agent
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Conversational AI agent that drives scikit-rec via tool use
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Project-URL: Homepage, https://github.com/intuit/scikit-rec-agent
|
|
7
|
+
Project-URL: Repository, https://github.com/intuit/scikit-rec-agent
|
|
8
|
+
Project-URL: Issues, https://github.com/intuit/scikit-rec-agent/issues
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
15
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
16
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# scikit-rec-agent
|
|
20
|
+
|
|
21
|
+
Conversational AI agent that uses [scikit-rec](https://github.com/intuit/scikit-rec) as its tool belt. The agent reasons about the user's data and goals, then calls scikit-rec APIs via structured tool use to build, evaluate, and compare recommendation systems.
|
|
22
|
+
|
|
23
|
+
> **Status:** placeholder release. Real implementation in progress — see [`agentic_design.md`](./agentic_design.md) for the spec.
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install scikit-rec-agent
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## License
|
|
32
|
+
|
|
33
|
+
Apache-2.0
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
.gitignore
|
|
2
|
+
LICENSE
|
|
3
|
+
README.md
|
|
4
|
+
agentic_design.md
|
|
5
|
+
pyproject.toml
|
|
6
|
+
.claude/settings.json
|
|
7
|
+
.github/workflows/ci.yml
|
|
8
|
+
.github/workflows/publish.yml
|
|
9
|
+
scikit_rec_agent/__init__.py
|
|
10
|
+
scikit_rec_agent.egg-info/PKG-INFO
|
|
11
|
+
scikit_rec_agent.egg-info/SOURCES.txt
|
|
12
|
+
scikit_rec_agent.egg-info/dependency_links.txt
|
|
13
|
+
scikit_rec_agent.egg-info/requires.txt
|
|
14
|
+
scikit_rec_agent.egg-info/top_level.txt
|
|
15
|
+
tests/test_import.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
scikit_rec_agent
|