sift-tools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sift/__init__.py ADDED
@@ -0,0 +1,206 @@
1
+ """SIFT — Search · Inspect · Filter · Trigger.
2
+
3
+ Hierarchical, search-first tool discovery for LLM agents. Give the model 3
4
+ meta-tools instead of a 30k-token catalogue; it discovers the rest by navigating.
5
+
6
+ Quickstart::
7
+
8
+ from sift import Sift
9
+
10
+ sift = Sift()
11
+
12
+ @sift.tool("google_workspace.gmail.read",
13
+ description="Read emails from the inbox",
14
+ params={"q": "string:o:is:unread:search query", "m": "number:o:10:max"},
15
+ returns=["id", "subject", "from", "snippet", "date"])
16
+ def gmail_read(q="is:unread", m=10):
17
+ return {"id": "1", "subject": "Hi", "from": "a@b.c", "snippet": "...",
18
+ "date": "2026-06-30", "body": "filtered out"}
19
+
20
+ sift.build_index()
21
+
22
+ sift.search_tools("read my last email") # discovery
23
+ sift.get_tool_schema("google_workspace.gmail.read") # TOON schema
24
+ sift.execute_tool("google_workspace.gmail.read", {"m": 1}) # run + filter
25
+
26
+ # Plug into any stack:
27
+ sift.openai_tools() # function-calling specs
28
+ sift.langchain_tools() # LangChain BaseTool list
29
+ sift.serve_mcp() # expose as an MCP server
30
+ """
31
+ from __future__ import annotations
32
+
33
+ import json
34
+ from typing import Callable
35
+
36
+ from .gateway import Gateway, SearchResult
37
+ from .metatools import META_TOOL_NAMES, SYSTEM_PROMPT, tool_specs
38
+ from .registry import Registry, ToolDef
39
+
40
+ __all__ = ["Sift", "Registry", "ToolDef", "SearchResult", "SYSTEM_PROMPT", "tool_specs"]
41
+ __version__ = "0.1.0"
42
+
43
+
44
+ class Sift:
45
+ """The public facade: register tools, build the index, expose meta-tools."""
46
+
47
+ def __init__(self, *, registry: Registry | None = None, embedder=None,
48
+ model_name: str | None = None, retrieval: str = "hybrid",
49
+ reranker=None, min_score: float = 0.0) -> None:
50
+ self.registry = registry or Registry()
51
+ self._embedder = embedder
52
+ self._model_name = model_name
53
+ self._retrieval = retrieval
54
+ self._reranker = reranker
55
+ self._min_score = min_score
56
+ self._gateway: Gateway | None = None
57
+
58
+ # ----------------------------------------------------------- registration
59
+ def tool(self, path: str, *, description: str, params: dict | None = None,
60
+ returns: list[str] | None = None, risk: bool = False,
61
+ transform: Callable | None = None) -> Callable:
62
+ """Decorator: register a function as a tool at ``path``."""
63
+ def deco(fn: Callable[..., dict]) -> Callable[..., dict]:
64
+ self.registry.add(ToolDef(path, description, params or {}, returns or [], risk, fn, transform))
65
+ return fn
66
+ return deco
67
+
68
+ def add_tool(self, path: str, fn: Callable[..., dict], *, description: str,
69
+ params: dict | None = None, returns: list[str] | None = None,
70
+ risk: bool = False, transform: Callable | None = None) -> "Sift":
71
+ self.registry.add(ToolDef(path, description, params or {}, returns or [], risk, fn, transform))
72
+ return self
73
+
74
+ def describe(self, node_path: str, description: str) -> "Sift":
75
+ self.registry.describe(node_path, description)
76
+ return self
77
+
78
+ def set_response(self, path: str, *, returns: list[str] | None = None,
79
+ transform: Callable | None = None) -> "Sift":
80
+ """Trim what a tool returns to the model — a field whitelist and/or a
81
+ reshaping transform. Works on imported (MCP/OpenAPI) tools too."""
82
+ self.registry.set_response(path, returns=returns, transform=transform)
83
+ return self
84
+
85
+ def scope(self, *, allow: list[str] | None = None, deny: list[str] | None = None,
86
+ allow_risky: bool = True):
87
+ """A scoped view that only sees/runs tools matching the allow/deny globs
88
+ (an `allowedTools` per model/session). Reuses the built index.
89
+ ``allow_risky=False`` additionally blocks every tool flagged ``risk``."""
90
+ from .scope import SiftScope
91
+ return SiftScope(self, allow=allow, deny=deny, allow_risky=allow_risky)
92
+
93
+ # ----------------------------------------------------------------- index
94
+ def build_index(self) -> "Sift":
95
+ if self._embedder is None and self._retrieval != "bm25":
96
+ from .embeddings import FastEmbedder
97
+ self._embedder = FastEmbedder(self._model_name)
98
+ self._gateway = Gateway(self.registry, self._embedder, retrieval=self._retrieval,
99
+ reranker=self._reranker, min_score=self._min_score)
100
+ self._gateway.build_index()
101
+ return self
102
+
103
+ @property
104
+ def gateway(self) -> Gateway:
105
+ if self._gateway is None:
106
+ raise RuntimeError("call build_index() before using the gateway")
107
+ return self._gateway
108
+
109
+ # ------------------------------------------------------------ meta-tools
110
+ def search_tools(self, q: str, top_k: int = 5) -> list[SearchResult]:
111
+ return self.gateway.search_tools(q, top_k)
112
+
113
+ def get_tool_schema(self, path: str) -> str:
114
+ return self.gateway.get_tool_schema(path)
115
+
116
+ def execute_tool(self, path: str, params: dict | None = None) -> dict:
117
+ return self.gateway.execute_tool(path, params)
118
+
119
+ def dispatch(self, name: str, arguments: dict | str) -> str:
120
+ """Run a meta-tool call by name; returns a string (TOON or JSON).
121
+
122
+ Handy as the single entry point when wiring SIFT into an LLM loop.
123
+ Handles the 3 meta-tools plus ``run_code`` (code mode).
124
+ """
125
+ args = json.loads(arguments) if isinstance(arguments, str) else dict(arguments or {})
126
+ try:
127
+ if name == "search_tools":
128
+ # compact TOON with schema inline — lets the model execute directly
129
+ return self.gateway.search_compact(args["q"], int(args.get("top_k", 3)))
130
+ if name == "run_code":
131
+ return self.run_code(args["code"])
132
+ if name == "get_tool_schema":
133
+ return self.get_tool_schema(args.get("path", ""))
134
+ if name == "execute_tool":
135
+ res = self.execute_tool(args["path"], args.get("params") or {})
136
+ return json.dumps(res, ensure_ascii=False, default=str)
137
+ return json.dumps({"error": f"unknown meta-tool {name!r}"})
138
+ except Exception as exc: # surfaced back to the model as a tool result
139
+ return json.dumps({"error": str(exc)}, ensure_ascii=False)
140
+
141
+ # --------------------------------------------------------------- adapters
142
+ def openai_tools(self) -> list[dict]:
143
+ """OpenAI/OpenRouter function-calling specs for the 3 meta-tools."""
144
+ return tool_specs()
145
+
146
+ @property
147
+ def system_prompt(self) -> str:
148
+ return SYSTEM_PROMPT
149
+
150
+ # --- code mode (orchestrate many tools in one turn) ---
151
+ def code_tools(self) -> list[dict]:
152
+ from .codemode import code_tool_specs
153
+ return code_tool_specs()
154
+
155
+ @property
156
+ def code_system_prompt(self) -> str:
157
+ from .codemode import CODE_SYSTEM_PROMPT
158
+ return CODE_SYSTEM_PROMPT
159
+
160
+ def run_code(self, code: str) -> str:
161
+ from .codemode import run_code
162
+ return run_code(self, code)
163
+
164
+ @property
165
+ def meta_tool_names(self) -> tuple[str, ...]:
166
+ return META_TOOL_NAMES
167
+
168
+ def langchain_tools(self) -> list:
169
+ from .adapters.langchain import langchain_tools
170
+ return langchain_tools(self)
171
+
172
+ def anthropic_tools(self) -> list[dict]:
173
+ """The 3 meta-tools in the native Anthropic (Messages API) tool format."""
174
+ from .adapters.anthropic import anthropic_tools
175
+ return anthropic_tools(self)
176
+
177
+ # --- for models without native tool calling (prompted / constrained) ---
178
+ def tool_call_schema(self) -> dict:
179
+ """JSON Schema for a prompted step — feed to a structured-output decoder."""
180
+ from .constrain import tool_call_json_schema
181
+ return tool_call_json_schema()
182
+
183
+ def json_gbnf(self) -> str:
184
+ """GBNF grammar (llama.cpp) constraining output to valid JSON."""
185
+ from .constrain import json_gbnf
186
+ return json_gbnf()
187
+
188
+ def mcp_server(self, name: str = "sift"):
189
+ from .adapters.mcp_server import build_mcp_server
190
+ return build_mcp_server(self, name=name)
191
+
192
+ def serve_http(self, *, host: str = "127.0.0.1", port: int = 8000, scope=None) -> None:
193
+ """Run an OpenAPI HTTP server exposing the 3 meta-tools (OpenWebUI tool
194
+ server, REST clients). Requires the ``server`` extra."""
195
+ from .http_server import serve_http
196
+ serve_http(self, host=host, port=port, scope=scope)
197
+
198
+ def serve_mcp(self, name: str = "sift", transport: str = "stdio") -> None:
199
+ """Run SIFT as an MCP server exposing the 3 meta-tools.
200
+
201
+ transport: "stdio" (default; Claude Desktop, local clients) or "sse" /
202
+ "streamable-http" (remote clients, OpenWebUI). HTTP host/port are taken
203
+ from the MCP server settings / env.
204
+ """
205
+ server = self.mcp_server(name)
206
+ server.run() if transport == "stdio" else server.run(transport=transport)
@@ -0,0 +1 @@
1
+ """Framework adapters: expose SIFT's 3 meta-tools to OpenAI, LangChain and MCP."""
@@ -0,0 +1,70 @@
1
+ """Native Anthropic (Messages API) adapter.
2
+
3
+ Anthropic's tool format differs from OpenAI's: tools use ``input_schema`` (not a
4
+ ``function`` wrapper), the system prompt is a separate argument, and tool calls
5
+ come back as ``tool_use`` content blocks answered with ``tool_result`` blocks.
6
+ This adapter bridges that so SIFT works with the native ``anthropic`` SDK.
7
+
8
+ import anthropic
9
+ from sift.adapters.anthropic import run_agent
10
+ run_agent(sift, anthropic.Anthropic(), "claude-haiku-4.5", "what's my last email?")
11
+
12
+ Requires the ``anthropic`` extra: pip install "sift-tools[anthropic]"
13
+ """
14
+ from __future__ import annotations
15
+
16
+ from typing import Any
17
+
18
+
19
+ def anthropic_tools(sift) -> list[dict]:
20
+ """The 3 meta-tools in Anthropic's tool format."""
21
+ out = []
22
+ for spec in sift.openai_tools():
23
+ fn = spec["function"]
24
+ out.append({
25
+ "name": fn["name"],
26
+ "description": fn["description"],
27
+ "input_schema": fn["parameters"],
28
+ })
29
+ return out
30
+
31
+
32
+ def _text_of(content: Any) -> str:
33
+ parts = []
34
+ for block in content:
35
+ if getattr(block, "type", None) == "text":
36
+ parts.append(block.text)
37
+ return "".join(parts)
38
+
39
+
40
+ def run_agent(sift, client: Any, model: str, message: str, *,
41
+ max_tokens: int = 1024, max_steps: int = 12, verbose: bool = False,
42
+ extra: dict | None = None) -> str:
43
+ """Drive a tool-use loop against the native Anthropic Messages API.
44
+
45
+ ``client`` is duck-typed: it just needs ``messages.create(...)``.
46
+ """
47
+ tools = anthropic_tools(sift)
48
+ messages: list[dict] = [{"role": "user", "content": message}]
49
+
50
+ for _ in range(max_steps):
51
+ resp = client.messages.create(
52
+ model=model, system=sift.system_prompt, tools=tools,
53
+ max_tokens=max_tokens, messages=messages, **(extra or {}))
54
+
55
+ messages.append({"role": "assistant", "content": resp.content})
56
+
57
+ if getattr(resp, "stop_reason", None) != "tool_use":
58
+ return _text_of(resp.content)
59
+
60
+ results = []
61
+ for block in resp.content:
62
+ if getattr(block, "type", None) != "tool_use":
63
+ continue
64
+ out = sift.dispatch(block.name, block.input)
65
+ if verbose:
66
+ print(f" ↳ {block.name}({block.input}) = {out[:160]}")
67
+ results.append({"type": "tool_result", "tool_use_id": block.id, "content": out})
68
+ messages.append({"role": "user", "content": results})
69
+
70
+ raise RuntimeError(f"reached max_steps={max_steps} without a final answer")
@@ -0,0 +1,31 @@
1
+ """LangChain adapter — exposes the 3 meta-tools as LangChain ``StructuredTool``s.
2
+
3
+ from sift import Sift
4
+ sift = Sift(); ...; sift.build_index()
5
+ tools = sift.langchain_tools() # plug into any LangChain agent
6
+
7
+ Requires the ``langchain`` extra: pip install "sift-tools[langchain]"
8
+ """
9
+ from __future__ import annotations
10
+
11
+
12
+ def langchain_tools(sift) -> list:
13
+ from langchain_core.tools import StructuredTool
14
+
15
+ def search_tools(q: str) -> str:
16
+ """Discover tools by natural language; returns candidate paths with scores."""
17
+ return sift.dispatch("search_tools", {"q": q})
18
+
19
+ def get_tool_schema(path: str) -> str:
20
+ """Compact (TOON) schema of a hierarchy level. Empty path lists categories."""
21
+ return sift.dispatch("get_tool_schema", {"path": path})
22
+
23
+ def execute_tool(path: str, params: dict | None = None) -> str:
24
+ """Execute a function (full path category.service.function) and return the filtered result."""
25
+ return sift.dispatch("execute_tool", {"path": path, "params": params or {}})
26
+
27
+ return [
28
+ StructuredTool.from_function(search_tools, name="search_tools"),
29
+ StructuredTool.from_function(get_tool_schema, name="get_tool_schema"),
30
+ StructuredTool.from_function(execute_tool, name="execute_tool"),
31
+ ]
@@ -0,0 +1,35 @@
1
+ """MCP server adapter — expose SIFT as a Model Context Protocol server.
2
+
3
+ Any MCP client (Claude Desktop, IDEs, etc.) then sees just the 3 meta-tools and
4
+ discovers your whole catalogue through them.
5
+
6
+ from sift import Sift
7
+ sift = Sift(); ...; sift.build_index()
8
+ sift.serve_mcp() # runs a stdio MCP server
9
+
10
+ Requires the ``mcp`` extra: pip install "sift-tools[mcp]"
11
+ """
12
+ from __future__ import annotations
13
+
14
+
15
+ def build_mcp_server(sift, name: str = "sift"):
16
+ from mcp.server.fastmcp import FastMCP
17
+
18
+ server = FastMCP(name)
19
+
20
+ @server.tool()
21
+ def search_tools(q: str) -> str:
22
+ """Discover tools by natural language; returns candidate paths with scores."""
23
+ return sift.dispatch("search_tools", {"q": q})
24
+
25
+ @server.tool()
26
+ def get_tool_schema(path: str) -> str:
27
+ """Compact (TOON) schema of a hierarchy level. Empty path lists categories."""
28
+ return sift.dispatch("get_tool_schema", {"path": path})
29
+
30
+ @server.tool()
31
+ def execute_tool(path: str, params: dict | None = None) -> str:
32
+ """Execute a function (full path category.service.function); returns the filtered result."""
33
+ return sift.dispatch("execute_tool", {"path": path, "params": params or {}})
34
+
35
+ return server
@@ -0,0 +1,56 @@
1
+ """OpenAI / OpenRouter (function-calling) adapter.
2
+
3
+ ``sift.openai_tools()`` already returns the tool specs. This module adds a small
4
+ driver that runs a full agent loop against any OpenAI-compatible client (the
5
+ official ``openai`` SDK, OpenRouter via the same SDK, Azure, etc.). The client is
6
+ duck-typed: it just needs ``client.chat.completions.create(...)``.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+
13
+ def run_agent(sift, client: Any, model: str, message: str, *,
14
+ max_steps: int = 12, verbose: bool = False,
15
+ extra_body: dict | None = None) -> str:
16
+ """Drive a tool-calling loop until the model returns a final answer.
17
+
18
+ ``extra_body`` is forwarded to ``chat.completions.create`` — use it for
19
+ provider extras like prompt caching or ``{"reasoning": {"effort": "low"}}``
20
+ to keep tool-routing turns cheap.
21
+ """
22
+ messages: list[dict] = [
23
+ {"role": "system", "content": sift.system_prompt},
24
+ {"role": "user", "content": message},
25
+ ]
26
+ tools = sift.openai_tools()
27
+
28
+ for _ in range(max_steps):
29
+ resp = client.chat.completions.create(
30
+ model=model, messages=messages, tools=tools, extra_body=extra_body or {})
31
+ msg = resp.choices[0].message
32
+
33
+ assistant: dict = {"role": "assistant", "content": msg.content or ""}
34
+ if getattr(msg, "tool_calls", None):
35
+ assistant["tool_calls"] = [
36
+ {"id": tc.id, "type": "function",
37
+ "function": {"name": tc.function.name, "arguments": tc.function.arguments}}
38
+ for tc in msg.tool_calls
39
+ ]
40
+ messages.append(assistant)
41
+
42
+ if not getattr(msg, "tool_calls", None):
43
+ return msg.content or ""
44
+
45
+ for tc in msg.tool_calls:
46
+ result = sift.dispatch(tc.function.name, tc.function.arguments)
47
+ if verbose:
48
+ print(f" ↳ {tc.function.name}({tc.function.arguments}) = {result[:200]}")
49
+ messages.append({
50
+ "role": "tool",
51
+ "tool_call_id": tc.id,
52
+ "name": tc.function.name,
53
+ "content": result,
54
+ })
55
+
56
+ raise RuntimeError(f"reached max_steps={max_steps} without a final answer")
@@ -0,0 +1,109 @@
1
+ """Prompted (text-based) tool calling — for models WITHOUT native function calling.
2
+
3
+ The core ``sift.dispatch(name, args)`` is format-agnostic, so any text model can
4
+ drive SIFT if we (a) tell it a JSON protocol in the prompt and (b) parse its plain
5
+ text back. This adapter does both, looping search -> execute -> answer.
6
+
7
+ ``generate`` is the only thing you supply: a callable ``generate(prompt: str) ->
8
+ str``. That wraps ANYTHING — a HuggingFace pipeline, llama.cpp, Ollama /generate,
9
+ a base model — so SIFT reaches models the native adapters can't.
10
+
11
+ For weak models, pair this with constrained decoding (see ``sift.constrain``) and
12
+ prefer :func:`single_decision`.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import re
18
+ from typing import Callable
19
+
20
+ PROMPTED_SYSTEM = """You solve the user's task using tools, by emitting JSON.
21
+
22
+ You have 3 tools:
23
+ - search_tools args: {"q": "<what you need>"} -> returns matching tool paths WITH their schema
24
+ - get_tool_schema args: {"path": "<'' | category | category.service>"} -> browse (rarely needed)
25
+ - execute_tool args: {"path": "<category.service.function>", "params": {...}} -> run a tool
26
+
27
+ PROTOCOL — every reply MUST be exactly ONE JSON object and nothing else:
28
+ to use a tool: {"tool": "<name>", "args": {...}}
29
+ to answer the user: {"answer": "<your reply>"}
30
+
31
+ Always start with search_tools, then execute_tool with a full path from the results,
32
+ then give {"answer": ...}. Tool results are provided back to you as JSON."""
33
+
34
+
35
+ def _extract_json(text: str) -> dict | None:
36
+ """Best-effort: pull the first JSON object out of a model's text reply."""
37
+ text = text.strip()
38
+ fenced = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.S)
39
+ if fenced:
40
+ try:
41
+ return json.loads(fenced.group(1))
42
+ except json.JSONDecodeError:
43
+ pass
44
+ start = text.find("{")
45
+ while start != -1:
46
+ depth = 0
47
+ for i in range(start, len(text)):
48
+ if text[i] == "{":
49
+ depth += 1
50
+ elif text[i] == "}":
51
+ depth -= 1
52
+ if depth == 0:
53
+ try:
54
+ return json.loads(text[start:i + 1])
55
+ except json.JSONDecodeError:
56
+ break
57
+ start = text.find("{", start + 1)
58
+ return None
59
+
60
+
61
+ def run_agent(sift, generate: Callable[[str], str], message: str, *,
62
+ max_steps: int = 10, verbose: bool = False) -> str:
63
+ """Drive a text model through the tool loop. ``generate(prompt)->str``."""
64
+ transcript = f"{PROMPTED_SYSTEM}\n\nUser: {message}\n"
65
+
66
+ for _ in range(max_steps):
67
+ reply = generate(transcript + "Assistant:")
68
+ obj = _extract_json(reply)
69
+
70
+ if obj is None:
71
+ transcript += (f"Assistant: {reply.strip()[:200]}\n"
72
+ 'System: Invalid. Reply with ONE JSON object: '
73
+ '{"tool": ...} or {"answer": ...}.\n')
74
+ continue
75
+
76
+ if "answer" in obj:
77
+ return str(obj["answer"])
78
+
79
+ if "tool" in obj:
80
+ result = sift.dispatch(obj["tool"], obj.get("args") or {})
81
+ if verbose:
82
+ print(f" ↳ {obj['tool']}({obj.get('args')}) = {result[:160]}")
83
+ transcript += (f'Assistant: {json.dumps(obj, ensure_ascii=False)}\n'
84
+ f"Tool result: {result}\n")
85
+ continue
86
+
87
+ transcript += ('System: JSON must contain "tool" or "answer".\n')
88
+
89
+ raise RuntimeError(f"reached max_steps={max_steps} without an answer")
90
+
91
+
92
+ def single_decision(sift, generate: Callable[[str], str], query: str, *,
93
+ top_k: int = 3) -> dict:
94
+ """One-shot path for very weak models: search server-side, then ask the model
95
+ for a SINGLE decision (which tool + args). Returns {path, args, result}."""
96
+ candidates = sift.gateway.search_compact(query, top_k)
97
+ prompt = (
98
+ f"{candidates}\n\n"
99
+ f"User wants: {query}\n"
100
+ 'Reply with ONLY JSON: {"path": "<one path above>", "args": {<parameters>}}'
101
+ )
102
+ obj = _extract_json(generate(prompt)) or {}
103
+ path = obj.get("path")
104
+ args = obj.get("args") or {}
105
+ try:
106
+ result = sift.execute_tool(path, args) if path else {"error": "no path chosen"}
107
+ except Exception as exc:
108
+ result = {"error": str(exc)}
109
+ return {"path": path, "args": args, "result": result}