sift-tools 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sift_tools-0.1.0/.dockerignore +14 -0
- sift_tools-0.1.0/.env.example +8 -0
- sift_tools-0.1.0/.gitattributes +4 -0
- sift_tools-0.1.0/.github/workflows/ci.yml +26 -0
- sift_tools-0.1.0/.github/workflows/publish.yml +28 -0
- sift_tools-0.1.0/.gitignore +41 -0
- sift_tools-0.1.0/CHANGELOG.md +32 -0
- sift_tools-0.1.0/CONTRIBUTING.md +35 -0
- sift_tools-0.1.0/Dockerfile +19 -0
- sift_tools-0.1.0/LICENSE +21 -0
- sift_tools-0.1.0/PKG-INFO +366 -0
- sift_tools-0.1.0/README.md +315 -0
- sift_tools-0.1.0/benchmarks/RESULTS.md +66 -0
- sift_tools-0.1.0/benchmarks/run_benchmark.py +137 -0
- sift_tools-0.1.0/core/cmd/sift/main.go +370 -0
- sift_tools-0.1.0/core/data/bench_tasks.json +14 -0
- sift_tools-0.1.0/core/data/registry.json +124 -0
- sift_tools-0.1.0/core/go.mod +3 -0
- sift_tools-0.1.0/core/internal/agent/agent.go +189 -0
- sift_tools-0.1.0/core/internal/bench/bench.go +225 -0
- sift_tools-0.1.0/core/internal/embed/embed.go +92 -0
- sift_tools-0.1.0/core/internal/gateway/gateway.go +244 -0
- sift_tools-0.1.0/core/internal/gateway/gateway_test.go +83 -0
- sift_tools-0.1.0/core/internal/gateway/mock.go +90 -0
- sift_tools-0.1.0/core/internal/openrouter/openrouter.go +124 -0
- sift_tools-0.1.0/core/internal/registry/registry.go +203 -0
- sift_tools-0.1.0/core/internal/registry/registry_test.go +59 -0
- sift_tools-0.1.0/core/internal/toon/toon.go +107 -0
- sift_tools-0.1.0/core/internal/toon/toon_test.go +55 -0
- sift_tools-0.1.0/embed-svc/requirements.txt +3 -0
- sift_tools-0.1.0/embed-svc/server.py +79 -0
- sift_tools-0.1.0/examples/quickstart.py +69 -0
- sift_tools-0.1.0/examples/serve_http.py +35 -0
- sift_tools-0.1.0/examples/serve_mcp.py +43 -0
- sift_tools-0.1.0/examples/smoke_codemode.py +72 -0
- sift_tools-0.1.0/examples/smoke_live.py +94 -0
- sift_tools-0.1.0/examples/smoke_prompted.py +48 -0
- sift_tools-0.1.0/pyproject.toml +52 -0
- sift_tools-0.1.0/src/sift/__init__.py +206 -0
- sift_tools-0.1.0/src/sift/adapters/__init__.py +1 -0
- sift_tools-0.1.0/src/sift/adapters/anthropic.py +70 -0
- sift_tools-0.1.0/src/sift/adapters/langchain.py +31 -0
- sift_tools-0.1.0/src/sift/adapters/mcp_server.py +35 -0
- sift_tools-0.1.0/src/sift/adapters/openai.py +56 -0
- sift_tools-0.1.0/src/sift/adapters/prompted.py +109 -0
- sift_tools-0.1.0/src/sift/agentbench.py +258 -0
- sift_tools-0.1.0/src/sift/bench.py +155 -0
- sift_tools-0.1.0/src/sift/codemode.py +175 -0
- sift_tools-0.1.0/src/sift/constrain.py +63 -0
- sift_tools-0.1.0/src/sift/embeddings.py +39 -0
- sift_tools-0.1.0/src/sift/evalsuite.py +84 -0
- sift_tools-0.1.0/src/sift/gateway.py +197 -0
- sift_tools-0.1.0/src/sift/http_server.py +86 -0
- sift_tools-0.1.0/src/sift/importers/__init__.py +7 -0
- sift_tools-0.1.0/src/sift/importers/_common.py +40 -0
- sift_tools-0.1.0/src/sift/importers/mcp.py +89 -0
- sift_tools-0.1.0/src/sift/importers/mcp_proxy.py +139 -0
- sift_tools-0.1.0/src/sift/importers/openapi.py +125 -0
- sift_tools-0.1.0/src/sift/metatools.py +66 -0
- sift_tools-0.1.0/src/sift/registry.py +264 -0
- sift_tools-0.1.0/src/sift/rerank.py +28 -0
- sift_tools-0.1.0/src/sift/retrieval.py +66 -0
- sift_tools-0.1.0/src/sift/scope.py +96 -0
- sift_tools-0.1.0/src/sift/toon.py +61 -0
- sift_tools-0.1.0/tests/conftest.py +63 -0
- sift_tools-0.1.0/tests/test_adapters.py +98 -0
- sift_tools-0.1.0/tests/test_codemode.py +58 -0
- sift_tools-0.1.0/tests/test_core.py +83 -0
- sift_tools-0.1.0/tests/test_eval.py +38 -0
- sift_tools-0.1.0/tests/test_http_server.py +61 -0
- sift_tools-0.1.0/tests/test_importers.py +105 -0
- sift_tools-0.1.0/tests/test_mcp_proxy.py +39 -0
- sift_tools-0.1.0/tests/test_projection.py +42 -0
- sift_tools-0.1.0/tests/test_prompted.py +61 -0
- sift_tools-0.1.0/tests/test_retrieval.py +77 -0
- sift_tools-0.1.0/tests/test_scope.py +80 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Copie para .env e preencha. NUNCA commite o .env real.
|
|
2
|
+
OPENROUTER_API="sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
|
3
|
+
|
|
4
|
+
# Modelo do agente no OpenRouter (qualquer modelo com tool-calling)
|
|
5
|
+
SIFT_MODEL="anthropic/claude-haiku-4.5"
|
|
6
|
+
|
|
7
|
+
# Endereço do microserviço de embeddings (Python)
|
|
8
|
+
SIFT_EMBED_URL="http://127.0.0.1:8088"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: ${{ matrix.python-version }}
|
|
20
|
+
cache: pip
|
|
21
|
+
- name: Install
|
|
22
|
+
run: pip install -e ".[dev]"
|
|
23
|
+
- name: Lint
|
|
24
|
+
run: ruff check src tests examples benchmarks
|
|
25
|
+
- name: Test
|
|
26
|
+
run: pytest -q
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
# Publishes to PyPI on a version tag (e.g. v0.1.0).
|
|
4
|
+
# Uses PyPI Trusted Publishing (OIDC) — configure a trusted publisher for this
|
|
5
|
+
# repo/workflow at https://pypi.org/manage/account/publishing/ (no token needed).
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
tags: ["v*"]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build-and-publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
# Tip: for extra safety add a GitHub Environment (repo Settings -> Environments)
|
|
14
|
+
# named e.g. "pypi" and uncomment the next line + set it in the PyPI publisher.
|
|
15
|
+
# environment: pypi
|
|
16
|
+
permissions:
|
|
17
|
+
id-token: write # required for trusted publishing
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
- name: Build
|
|
24
|
+
run: |
|
|
25
|
+
pip install build
|
|
26
|
+
python -m build
|
|
27
|
+
- name: Publish to PyPI
|
|
28
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Secrets — never commit real keys
|
|
2
|
+
.env
|
|
3
|
+
.env.local
|
|
4
|
+
*.local
|
|
5
|
+
|
|
6
|
+
# Go reference impl
|
|
7
|
+
core/sift
|
|
8
|
+
core/sift.exe
|
|
9
|
+
*.test
|
|
10
|
+
*.out
|
|
11
|
+
|
|
12
|
+
# Python
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
embed-svc/.venv/
|
|
17
|
+
__pycache__/
|
|
18
|
+
*.py[cod]
|
|
19
|
+
.fastembed_cache/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.eggs/
|
|
22
|
+
build/
|
|
23
|
+
dist/
|
|
24
|
+
.pytest_cache/
|
|
25
|
+
.ruff_cache/
|
|
26
|
+
.mypy_cache/
|
|
27
|
+
.coverage
|
|
28
|
+
htmlcov/
|
|
29
|
+
|
|
30
|
+
# Benchmarks raw output (RESULTS.md is the curated one)
|
|
31
|
+
benchmarks/results.json
|
|
32
|
+
|
|
33
|
+
# Research notes (private scratch — remove this line to publish it)
|
|
34
|
+
conversa.txt
|
|
35
|
+
|
|
36
|
+
# Editor / OS
|
|
37
|
+
.idea/
|
|
38
|
+
.vscode/
|
|
39
|
+
*.swp
|
|
40
|
+
.DS_Store
|
|
41
|
+
Thumbs.db
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. Format loosely follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/); this project uses semver.
|
|
5
|
+
|
|
6
|
+
## [0.1.0] — unreleased
|
|
7
|
+
|
|
8
|
+
Initial release.
|
|
9
|
+
|
|
10
|
+
### Core
|
|
11
|
+
- Hierarchical tool registry (category → service → function) with TOON schema codec.
|
|
12
|
+
- Three meta-tools (`search_tools`, `get_tool_schema`, `execute_tool`); merged
|
|
13
|
+
search+inspect (schema returned inline) so the model executes directly.
|
|
14
|
+
- Hybrid retrieval (embeddings + BM25 + RRF), optional cross-encoder reranker,
|
|
15
|
+
relevance floor (`min_score`) with an explicit "no matching tools" reply.
|
|
16
|
+
- Response projection: per-tool field whitelist (`returns`) and/or `transform`,
|
|
17
|
+
configurable on imported tools too.
|
|
18
|
+
- Per-model scoping (`sift.scope(allow=, deny=, allow_risky=)`) — an `allowedTools`.
|
|
19
|
+
- Code mode (`run_code`) to orchestrate many tools in one turn, in a hardened
|
|
20
|
+
in-process sandbox (AST policy + line budget); scope-aware.
|
|
21
|
+
|
|
22
|
+
### Integrations
|
|
23
|
+
- Adapters: OpenAI-compatible, native Anthropic, LangChain, MCP server, and a
|
|
24
|
+
prompted (text) adapter for models without native tool calling.
|
|
25
|
+
- Constrained-decoding helpers (`tool_call_schema`, `json_gbnf`).
|
|
26
|
+
- Importers: OpenAPI and MCP (with a live `StdioMcpProxy` executor).
|
|
27
|
+
- Servers: MCP (`serve_mcp`, stdio/SSE) and OpenAPI HTTP (`serve_http`) + Docker.
|
|
28
|
+
|
|
29
|
+
### Tooling
|
|
30
|
+
- Evaluation: filter-level metrics, token report, BFCL-style accuracy, and a
|
|
31
|
+
SIFT-vs-flat agent benchmark.
|
|
32
|
+
- CI (lint + tests on 3.10–3.12) and PyPI trusted-publishing workflow.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Contributing to SIFT
|
|
2
|
+
|
|
3
|
+
Thanks for your interest! SIFT is a small, dependency-light Python package.
|
|
4
|
+
|
|
5
|
+
## Dev setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
python -m venv .venv
|
|
9
|
+
. .venv/bin/activate # Windows: .\.venv\Scripts\Activate.ps1
|
|
10
|
+
pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Run checks
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
ruff check src tests examples benchmarks
|
|
17
|
+
pytest -q
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Tests are offline and deterministic (a fake embedder / `retrieval="bm25"`), so they
|
|
21
|
+
need no model download or API keys. Live smoke tests under `examples/` (`smoke_*.py`)
|
|
22
|
+
do need an OpenRouter key in `.env`.
|
|
23
|
+
|
|
24
|
+
## Guidelines
|
|
25
|
+
|
|
26
|
+
- Keep the core dependency-light: heavy/optional integrations go behind extras
|
|
27
|
+
(`[openai]`, `[anthropic]`, `[langchain]`, `[mcp]`, `[server]`).
|
|
28
|
+
- New behavior needs a test. Keep `ruff` clean.
|
|
29
|
+
- The public surface is the `Sift` facade and the adapters; prefer adding to those.
|
|
30
|
+
|
|
31
|
+
## Releasing
|
|
32
|
+
|
|
33
|
+
1. Bump `version` in `pyproject.toml` and update `CHANGELOG.md`.
|
|
34
|
+
2. Tag: `git tag vX.Y.Z && git push --tags`.
|
|
35
|
+
3. The `Publish` workflow builds and publishes to PyPI (Trusted Publishing).
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# SIFT as an OpenAPI tool server (for OpenWebUI / REST clients).
|
|
2
|
+
# Customize examples/serve_http.py with your own tools, then build & run:
|
|
3
|
+
# docker build -t sift-server .
|
|
4
|
+
# docker run -p 8000:8000 -e SIFT_API_KEY=secret sift-server
|
|
5
|
+
FROM python:3.12-slim
|
|
6
|
+
|
|
7
|
+
WORKDIR /app
|
|
8
|
+
|
|
9
|
+
# install deps first for better layer caching
|
|
10
|
+
COPY pyproject.toml README.md LICENSE ./
|
|
11
|
+
COPY src ./src
|
|
12
|
+
RUN pip install --no-cache-dir ".[server]"
|
|
13
|
+
|
|
14
|
+
# your tool definitions / importers live here
|
|
15
|
+
COPY examples ./examples
|
|
16
|
+
|
|
17
|
+
EXPOSE 8000
|
|
18
|
+
ENV SIFT_HOST=0.0.0.0 SIFT_PORT=8000
|
|
19
|
+
CMD ["python", "examples/serve_http.py"]
|
sift_tools-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Victor Alves
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sift-tools
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Hierarchical, search-first tool discovery for LLM agents. Give the model 3 meta-tools instead of a 30k-token catalog.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Victor-Alves0/SIFT
|
|
6
|
+
Project-URL: Repository, https://github.com/Victor-Alves0/SIFT
|
|
7
|
+
Project-URL: Issues, https://github.com/Victor-Alves0/SIFT/issues
|
|
8
|
+
Author: Victor Alves
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agents,function-calling,langchain,llm,mcp,tool-discovery,tools
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: fastembed>=0.4.2
|
|
22
|
+
Requires-Dist: numpy>=1.24
|
|
23
|
+
Provides-Extra: all
|
|
24
|
+
Requires-Dist: anthropic>=0.40; extra == 'all'
|
|
25
|
+
Requires-Dist: fastapi>=0.110; extra == 'all'
|
|
26
|
+
Requires-Dist: httpx>=0.27; extra == 'all'
|
|
27
|
+
Requires-Dist: langchain-core>=0.3; extra == 'all'
|
|
28
|
+
Requires-Dist: mcp>=1.0; extra == 'all'
|
|
29
|
+
Requires-Dist: openai>=1.40; extra == 'all'
|
|
30
|
+
Requires-Dist: uvicorn>=0.29; extra == 'all'
|
|
31
|
+
Provides-Extra: anthropic
|
|
32
|
+
Requires-Dist: anthropic>=0.40; extra == 'anthropic'
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: fastapi>=0.110; extra == 'dev'
|
|
35
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
37
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
38
|
+
Requires-Dist: uvicorn>=0.29; extra == 'dev'
|
|
39
|
+
Provides-Extra: langchain
|
|
40
|
+
Requires-Dist: langchain-core>=0.3; extra == 'langchain'
|
|
41
|
+
Provides-Extra: mcp
|
|
42
|
+
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
43
|
+
Provides-Extra: openai
|
|
44
|
+
Requires-Dist: openai>=1.40; extra == 'openai'
|
|
45
|
+
Provides-Extra: openapi
|
|
46
|
+
Requires-Dist: httpx>=0.27; extra == 'openapi'
|
|
47
|
+
Provides-Extra: server
|
|
48
|
+
Requires-Dist: fastapi>=0.110; extra == 'server'
|
|
49
|
+
Requires-Dist: uvicorn>=0.29; extra == 'server'
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
|
|
52
|
+
# SIFT — Search · Inspect · Filter · Trigger
|
|
53
|
+
|
|
54
|
+
[](https://github.com/Victor-Alves0/SIFT/actions/workflows/ci.yml)
|
|
55
|
+
[](https://pypi.org/project/sift-tools/)
|
|
56
|
+
[](https://pypi.org/project/sift-tools/)
|
|
57
|
+
[](LICENSE)
|
|
58
|
+
|
|
59
|
+
**Hierarchical, search-first tool discovery for LLM agents.** Give the model
|
|
60
|
+
**3 meta-tools** instead of a 30k-token catalogue — it discovers the rest by
|
|
61
|
+
navigating. Drop-in for OpenAI function-calling, LangChain, or MCP.
|
|
62
|
+
|
|
63
|
+
Repo: [github.com/Victor-Alves0/SIFT](https://github.com/Victor-Alves0/SIFT)
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from sift import Sift
|
|
67
|
+
|
|
68
|
+
sift = Sift()
|
|
69
|
+
|
|
70
|
+
@sift.tool("google_workspace.gmail.read",
|
|
71
|
+
description="Read emails from the inbox",
|
|
72
|
+
params={"q": "string:o:is:unread:search query", "m": "number:o:10:max"},
|
|
73
|
+
returns=["id", "subject", "from", "snippet", "date"])
|
|
74
|
+
def gmail_read(q="is:unread", m=10):
|
|
75
|
+
... # call the real Gmail API
|
|
76
|
+
return {"id": "1", "subject": "Hi", "from": "a@b.c", "snippet": "...",
|
|
77
|
+
"date": "2026-06-30", "body": "filtered out by the whitelist"}
|
|
78
|
+
|
|
79
|
+
sift.build_index()
|
|
80
|
+
|
|
81
|
+
sift.search_tools("read my last email") # → ranked candidate paths
|
|
82
|
+
sift.get_tool_schema("google_workspace.gmail.read") # → compact TOON schema
|
|
83
|
+
sift.execute_tool("google_workspace.gmail.read", {"m": 1}) # → run + filter
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Why
|
|
87
|
+
|
|
88
|
+
The model never sees the whole catalogue — only 3 tools. It discovers what it
|
|
89
|
+
needs by walking **category → service → function**. The system prompt stays a
|
|
90
|
+
fixed ~200 tokens whether you have 5 tools or 5,000. Adding a tool is one
|
|
91
|
+
decorator. Schemas are returned in **TOON** (one line per tool), and responses
|
|
92
|
+
are **filtered** to a per-tool whitelist.
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
search_tools(q) → semantic discovery (local embeddings) [Search]
|
|
96
|
+
get_tool_schema(path) → hierarchical navigation, TOON schema [Inspect]
|
|
97
|
+
execute_tool(path, params) → run + response filtering [Trigger + Filter]
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Install
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
pip install sift-tools # core (local embeddings, no API key)
|
|
104
|
+
pip install "sift-tools[langchain]" # + LangChain adapter
|
|
105
|
+
pip install "sift-tools[mcp]" # + MCP server adapter
|
|
106
|
+
pip install "sift-tools[all,dev]" # everything + test tooling
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Embeddings run **locally** via `fastembed` (ONNX) — no embedding API key needed.
|
|
110
|
+
Swap in any embedder with an `embed(texts) -> list[vector]` method.
|
|
111
|
+
|
|
112
|
+
## Bring your own model (provider-agnostic)
|
|
113
|
+
|
|
114
|
+
The core is **LLM-agnostic** — it never calls a model itself. It hands you the
|
|
115
|
+
3 tool specs + a system prompt, and `sift.dispatch(name, args)` executes whatever
|
|
116
|
+
tool call your model emits. Wire it to any provider:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
# 1) OpenAI-compatible (OpenAI, OpenRouter, DeepSeek, Together, Groq, Mistral,
|
|
120
|
+
# and LOCAL servers: Ollama / LM Studio / vLLM) — works out of the box
|
|
121
|
+
from openai import OpenAI
|
|
122
|
+
from sift.adapters.openai import run_agent
|
|
123
|
+
|
|
124
|
+
client = OpenAI() # OpenAI
|
|
125
|
+
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama") # Ollama, local
|
|
126
|
+
client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=KEY) # OpenRouter
|
|
127
|
+
run_agent(sift, client, "gpt-4o-mini", "what's my last email?")
|
|
128
|
+
|
|
129
|
+
# 2) Native Anthropic (Messages API)
|
|
130
|
+
import anthropic
|
|
131
|
+
from sift.adapters.anthropic import run_agent as run_claude
|
|
132
|
+
run_claude(sift, anthropic.Anthropic(), "claude-haiku-4.5", "what's my last email?")
|
|
133
|
+
|
|
134
|
+
# 3) LangChain (Anthropic, Gemini, Cohere, Bedrock, Ollama, ...)
|
|
135
|
+
agent_tools = sift.langchain_tools() # plug into any LangChain agent
|
|
136
|
+
|
|
137
|
+
# 4) Expose SIFT itself as an MCP server (Claude Desktop, IDEs, ...)
|
|
138
|
+
sift.serve_mcp()
|
|
139
|
+
|
|
140
|
+
# 5) Any other SDK — the universal primitive:
|
|
141
|
+
specs = sift.openai_tools() # give your model the 3 tool specs
|
|
142
|
+
system = sift.system_prompt
|
|
143
|
+
answer = sift.dispatch(name, arguments) # run a tool call -> string back
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
| Provider / path | How | Status |
|
|
147
|
+
|---|---|---|
|
|
148
|
+
| OpenAI-compatible (incl. local Ollama/vLLM) | `openai_tools()` + `dispatch()` / `adapters.openai.run_agent` | ✅ live-tested |
|
|
149
|
+
| Native Anthropic | `adapters.anthropic.run_agent` | ✅ unit + offline-tested |
|
|
150
|
+
| LangChain | `langchain_tools()` | ✅ live-tested |
|
|
151
|
+
| MCP clients | `serve_mcp()` | ✅ |
|
|
152
|
+
| **No native tool calling** (base/small models) | `adapters.prompted` | ✅ live-tested |
|
|
153
|
+
|
|
154
|
+
### Weak or no-tool-calling models (Llama 3B, base models, …)
|
|
155
|
+
|
|
156
|
+
`dispatch` is format-agnostic, so any text model can drive SIFT via a prompted
|
|
157
|
+
JSON protocol — no native function calling required:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from sift.adapters.prompted import run_agent, single_decision
|
|
161
|
+
|
|
162
|
+
def generate(prompt: str) -> str: # wrap ANY text model (HF, llama.cpp, Ollama)
|
|
163
|
+
return my_model(prompt)
|
|
164
|
+
|
|
165
|
+
run_agent(sift, generate, "what's my last email?") # text-protocol tool loop
|
|
166
|
+
single_decision(sift, generate, "read my last email") # 1 decision, for the weakest models
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
For small local models, constrain the decoder so output is always parseable:
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
sift.tool_call_schema() # JSON Schema -> Outlines / LM Format Enforcer / vLLM guided_json
|
|
173
|
+
sift.json_gbnf() # GBNF grammar -> llama.cpp
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
SIFT's tiny 3-tool surface actually *helps* weak models (less to get lost in).
|
|
177
|
+
Realistic floor is ~1–3B params; sub-1B models (OPT-350M) can be interfaced but
|
|
178
|
+
are too small to follow the format reliably.
|
|
179
|
+
|
|
180
|
+
## Import an existing ecosystem
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from sift.importers.openapi import register_openapi
|
|
184
|
+
from sift.importers.mcp import import_mcp_stdio, register_listing
|
|
185
|
+
|
|
186
|
+
register_openapi(sift, spec, category="acme") # OpenAPI 3.x
|
|
187
|
+
await import_mcp_stdio(sift, "npx", ["-y", "@modelcontextprotocol/server-github"],
|
|
188
|
+
category="integrations", service="github") # MCP server
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Each operation/tool becomes a node in the hierarchy — instantly searchable.
|
|
192
|
+
|
|
193
|
+
## Per-model scoping (`allowedTools`) & response projection
|
|
194
|
+
|
|
195
|
+
Built for hubs like OpenWebUI: build the catalogue **once**, then give each model
|
|
196
|
+
a scoped view of which tools it may see/run, and trim what each tool returns.
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
# pick tools for this model (globs over the dotted path); reuses the built index
|
|
200
|
+
view = sift.scope(allow=["google_workspace.gmail.*", "web.search.*"],
|
|
201
|
+
deny=["*.delete", "*.send"])
|
|
202
|
+
view.dispatch("search_tools", {"q": "read my last email"}) # only allowed tools
|
|
203
|
+
view.execute_tool("crm.contacts.delete", {}) # PermissionError (deny wins)
|
|
204
|
+
|
|
205
|
+
# trim a verbose tool's result so each call costs fewer tokens (great for MCPs):
|
|
206
|
+
sift.set_response("google_workspace.gmail.query",
|
|
207
|
+
transform=lambda r: {"ids": [m["id"] for m in r["messages"]]})
|
|
208
|
+
sift.set_response("google_workspace.gmail.read", returns=["id", "subject", "from"])
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
**Idle cost:** when a tool isn't used (the user just says "hi"), SIFT adds only the
|
|
212
|
+
~480-token fixed surface (system prompt + 3 meta-tool specs) — **independent of
|
|
213
|
+
catalogue size**, and ~free across a conversation with prompt caching. A flat
|
|
214
|
+
catalogue instead injects *every* schema each turn (~2.4k tokens at 25 tools,
|
|
215
|
+
~95k at 1,000).
|
|
216
|
+
|
|
217
|
+
## Hybrid retrieval & reranking
|
|
218
|
+
|
|
219
|
+
Discovery fuses **embeddings + BM25** with Reciprocal Rank Fusion (semantics +
|
|
220
|
+
exact terms), and an optional cross-encoder **reranker** sharpens the final order:
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
sift = Sift(retrieval="hybrid") # default; also "embedding" or "bm25"
|
|
224
|
+
|
|
225
|
+
from sift.rerank import FastEmbedReranker
|
|
226
|
+
sift = Sift(reranker=FastEmbedReranker()) # opt-in cross-encoder rerank
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
`retrieval="bm25"` needs no model download at all. Set a relevance floor so
|
|
230
|
+
discovery returns *nothing* (an explicit "no matching tools") instead of the
|
|
231
|
+
nearest-but-irrelevant tool when the catalogue doesn't cover the request:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
sift = Sift(min_score=0.3) # cosine floor (tune per embedding model)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Code mode (compose many tools in one turn)
|
|
238
|
+
|
|
239
|
+
Instead of one round-trip per tool, let the model write a snippet that
|
|
240
|
+
orchestrates tools in a single turn (collapses multi-turn overhead):
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
tools = sift.code_tools() # search_tools + run_code
|
|
244
|
+
system = sift.code_system_prompt
|
|
245
|
+
# in the loop, run_code executes: call(path, **params), search(q), schema(path)
|
|
246
|
+
sift.run_code("output = call('google_workspace.gmail.read', m=1)")
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
The snippet runs in a constrained namespace (no imports/file/eval). It is not a
|
|
250
|
+
hardened sandbox — use code mode with trusted catalogues.
|
|
251
|
+
|
|
252
|
+
## Evaluate
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
from sift.bench import Task, run_filter, token_report
|
|
256
|
+
print(token_report(sift.registry).format()) # TOON vs JSON token savings
|
|
257
|
+
print(run_filter(sift, tasks, top_k=3).format()) # filter-level metrics (no LLM cost)
|
|
258
|
+
|
|
259
|
+
from sift.evalsuite import Case, bfcl_style # BFCL-style function-call accuracy
|
|
260
|
+
print(bfcl_style(call_model, sift.registry, cases).format())
|
|
261
|
+
|
|
262
|
+
from sift.agentbench import build_catalog, run_flat, run_sift # SIFT vs flat baseline
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
Filter-level metrics (à la ToolMenuBench): gold next-tool exposure, no-visible-tool
|
|
266
|
+
rate, average visible tools, MRR, risky-tool exposure, unauthorized risky exposure.
|
|
267
|
+
(tau-bench's stateful environment is out of scope — it's an external harness.)
|
|
268
|
+
|
|
269
|
+
## Schema format
|
|
270
|
+
|
|
271
|
+
A param is either the **compact string** `"<type>:<req>:<default>:<description>"`
|
|
272
|
+
(`req` is `n` required / `o` optional) or the **structured dict** form when you
|
|
273
|
+
need a default containing `:` (e.g. a Gmail `is:unread` query):
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
params={
|
|
277
|
+
"m": "number:o:10:max results", # compact
|
|
278
|
+
"q": {"type": "string", "default": "is:unread", "desc": "query"}, # structured
|
|
279
|
+
}
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
`returns` is the response whitelist. `risk=True` flags high-impact actions
|
|
283
|
+
(send/delete) — surfaced as `|risk` in TOON so the agent can confirm first.
|
|
284
|
+
|
|
285
|
+
## Make imported tools runnable
|
|
286
|
+
|
|
287
|
+
Importers populate the hierarchy for discovery; bind an executor to also run them:
|
|
288
|
+
|
|
289
|
+
```python
|
|
290
|
+
from sift.importers.openapi import register_openapi, httpx_request
|
|
291
|
+
register_openapi(sift, spec, category="acme",
|
|
292
|
+
request=httpx_request("https://api.acme.com"))
|
|
293
|
+
|
|
294
|
+
from sift.importers.mcp import register_listing
|
|
295
|
+
register_listing(sift, listing, category="integrations", service="github",
|
|
296
|
+
executor=lambda name, params: my_mcp_proxy(name, params))
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
For a live MCP server, `connect_mcp_stdio` launches it, registers its tools AND
|
|
300
|
+
binds execution (keeps the session open) in one call:
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
from sift.importers import connect_mcp_stdio
|
|
304
|
+
proxy = connect_mcp_stdio(sift, "npx", ["-y", "@modelcontextprotocol/server-github"],
|
|
305
|
+
category="integrations", service="github")
|
|
306
|
+
sift.build_index()
|
|
307
|
+
# ... imported MCP tools now run out of the box ...
|
|
308
|
+
proxy.close()
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
## Deploy as a server
|
|
312
|
+
|
|
313
|
+
Run SIFT as a standalone server so a hub (OpenWebUI, IDEs, …) connects to *it*,
|
|
314
|
+
and you wire tools/MCPs/OpenAPI into SIFT — one hub for everything.
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
# OpenAPI HTTP server (OpenWebUI "tool server", REST clients)
|
|
318
|
+
python examples/serve_http.py # OpenAPI at /openapi.json, docs at /docs
|
|
319
|
+
|
|
320
|
+
# MCP server
|
|
321
|
+
python examples/serve_mcp.py # stdio (Claude Desktop)
|
|
322
|
+
python examples/serve_mcp.py sse # HTTP/SSE (remote)
|
|
323
|
+
|
|
324
|
+
# Docker (OpenAPI server)
|
|
325
|
+
docker build -t sift-server .
|
|
326
|
+
docker run -p 8000:8000 -e SIFT_API_KEY=secret sift-server
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Set `SIFT_API_KEY` to require `Authorization: Bearer <key>`. Pass a `scope=` to
|
|
330
|
+
`build_app` / `serve_http` to expose only a subset of tools per server. Customize
|
|
331
|
+
`examples/serve_http.py` with your own `@sift.tool`s and importers.
|
|
332
|
+
|
|
333
|
+
> OpenWebUI: add the server URL under Tools → OpenAPI tool server. (For MCP,
|
|
334
|
+
> bridge via `mcpo` or OpenWebUI's MCP support.) The model then sees just the 3
|
|
335
|
+
> meta-tools and discovers your catalogue through them.
|
|
336
|
+
|
|
337
|
+
## Repo layout
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
src/sift/ the Python library (the product)
|
|
341
|
+
registry.py hierarchy + navigation
|
|
342
|
+
toon.py TOON codec
|
|
343
|
+
embeddings.py local fastembed backend
|
|
344
|
+
retrieval.py BM25 + RRF (hybrid search)
|
|
345
|
+
rerank.py optional cross-encoder reranker
|
|
346
|
+
gateway.py the 3 meta-tools + hybrid search + filtering + cache
|
|
347
|
+
scope.py per-model allow/deny tool scoping (allowedTools)
|
|
348
|
+
metatools.py canonical tool specs + system prompt
|
|
349
|
+
codemode.py run_code: orchestrate tools in one turn (hardened sandbox)
|
|
350
|
+
constrain.py JSON schema / GBNF for constrained decoders
|
|
351
|
+
http_server.py OpenAPI HTTP tool server (serve_http)
|
|
352
|
+
adapters/ openai · anthropic · langchain · mcp_server · prompted
|
|
353
|
+
importers/ mcp · openapi · mcp_proxy (live MCP execution)
|
|
354
|
+
bench.py filter-level metrics + token report
|
|
355
|
+
agentbench.py SIFT vs flat-catalogue benchmark
|
|
356
|
+
evalsuite.py BFCL-style function-call accuracy
|
|
357
|
+
examples/ quickstart, live smokes, serve_http / serve_mcp
|
|
358
|
+
tests/ pytest suite (offline, deterministic)
|
|
359
|
+
.github/workflows/ CI (lint+test) and PyPI publish
|
|
360
|
+
Dockerfile containerized OpenAPI server
|
|
361
|
+
core/ (reference) a Go implementation of the same gateway (optional backend)
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
## License
|
|
365
|
+
|
|
366
|
+
MIT.
|