agent-corex 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_core/__init__.py +19 -0
- agent_core/api/__init__.py +1 -0
- agent_core/api/main.py +46 -0
- agent_core/cli/__init__.py +1 -0
- agent_core/cli/main.py +164 -0
- agent_core/retrieval/__init__.py +0 -0
- agent_core/retrieval/embeddings.py +118 -0
- agent_core/retrieval/hybrid_scorer.py +105 -0
- agent_core/retrieval/ranker.py +96 -0
- agent_core/retrieval/scorer.py +18 -0
- agent_core/tools/__init__.py +0 -0
- agent_core/tools/base_tool.py +10 -0
- agent_core/tools/mcp/__init__.py +0 -0
- agent_core/tools/mcp/mcp_client.py +134 -0
- agent_core/tools/mcp/mcp_loader.py +40 -0
- agent_core/tools/mcp/mcp_manager.py +28 -0
- agent_core/tools/registry.py +9 -0
- agent_corex-1.0.0.dist-info/METADATA +511 -0
- agent_corex-1.0.0.dist-info/RECORD +23 -0
- agent_corex-1.0.0.dist-info/WHEEL +5 -0
- agent_corex-1.0.0.dist-info/entry_points.txt +2 -0
- agent_corex-1.0.0.dist-info/licenses/LICENSE +21 -0
- agent_corex-1.0.0.dist-info/top_level.txt +1 -0
agent_core/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent-Core: Fast, accurate MCP tool retrieval engine for LLMs with semantic search.
|
|
3
|
+
|
|
4
|
+
Provides multiple ranking methods for selecting the most relevant tools from large sets.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "1.0.0"
|
|
8
|
+
__author__ = "Ankit Agarwal"
|
|
9
|
+
__email__ = "ankitagarwalpro@gmail.com"
|
|
10
|
+
|
|
11
|
+
from agent_core.retrieval.ranker import rank_tools
|
|
12
|
+
from agent_core.tools.registry import ToolRegistry
|
|
13
|
+
from agent_core.tools.mcp.mcp_loader import MCPLoader
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"rank_tools",
|
|
17
|
+
"ToolRegistry",
|
|
18
|
+
"MCPLoader",
|
|
19
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API module."""
|
agent_core/api/main.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from fastapi import FastAPI
|
|
2
|
+
from packages.tools.registry import ToolRegistry
|
|
3
|
+
from packages.retrieval.ranker import rank_tools
|
|
4
|
+
|
|
5
|
+
app = FastAPI()
|
|
6
|
+
|
|
7
|
+
tool_registry = ToolRegistry()
|
|
8
|
+
|
|
9
|
+
# Example tools (replace with MCP-loaded later)
|
|
10
|
+
tool_registry.register({
|
|
11
|
+
"name": "edit_file",
|
|
12
|
+
"description": "Edit a file with line-based changes"
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
tool_registry.register({
|
|
16
|
+
"name": "write_file",
|
|
17
|
+
"description": "Create or overwrite a file"
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
tool_registry.register({
|
|
21
|
+
"name": "run_tests",
|
|
22
|
+
"description": "Run test suite"
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.get("/retrieve_tools")
|
|
27
|
+
def retrieve_tools(query: str, top_k: int = 5, method: str = "hybrid"):
|
|
28
|
+
"""
|
|
29
|
+
Retrieve the most relevant tools for a given query.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
query: Search query describing what you need
|
|
33
|
+
top_k: Number of results to return (default: 5)
|
|
34
|
+
method: Ranking method - 'keyword', 'hybrid', or 'embedding' (default: 'hybrid')
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
List of tools ranked by relevance
|
|
38
|
+
"""
|
|
39
|
+
tools = tool_registry.get_all_tools()
|
|
40
|
+
ranked = rank_tools(query, tools, top_k, method=method)
|
|
41
|
+
return ranked
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@app.get("/health")
|
|
45
|
+
def health():
|
|
46
|
+
return {"status": "ok"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI module for Agent-Core."""
|
agent_core/cli/main.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent-Core CLI interface.
|
|
3
|
+
|
|
4
|
+
Provides command-line tools for managing and using Agent-Core.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from typing import Optional
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(
|
|
12
|
+
name="agent-corex",
|
|
13
|
+
help="Fast, accurate MCP tool retrieval engine for LLMs",
|
|
14
|
+
no_args_is_help=True
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@app.command()
|
|
19
|
+
def retrieve(
|
|
20
|
+
query: str = typer.Argument(..., help="Search query for tool retrieval"),
|
|
21
|
+
top_k: int = typer.Option(5, "--top-k", "-k", help="Number of results to return"),
|
|
22
|
+
method: str = typer.Option("hybrid", "--method", "-m", help="Ranking method: keyword, hybrid, or embedding"),
|
|
23
|
+
config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to mcp.json config file")
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Retrieve the most relevant tools for a given query.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
agent-corex retrieve "edit a file" --top-k 5 --method hybrid
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
from agent_core.retrieval.ranker import rank_tools
|
|
33
|
+
from agent_core.tools.registry import ToolRegistry
|
|
34
|
+
from agent_core.tools.mcp.mcp_loader import MCPLoader
|
|
35
|
+
import pathlib
|
|
36
|
+
|
|
37
|
+
# Load tools
|
|
38
|
+
registry = ToolRegistry()
|
|
39
|
+
|
|
40
|
+
# Try to load MCP servers if config provided
|
|
41
|
+
if config:
|
|
42
|
+
config_path = pathlib.Path(config)
|
|
43
|
+
if config_path.exists():
|
|
44
|
+
try:
|
|
45
|
+
loader = MCPLoader(str(config_path))
|
|
46
|
+
manager = loader.load()
|
|
47
|
+
tools = manager.get_all_tools()
|
|
48
|
+
except Exception as e:
|
|
49
|
+
typer.echo(f"Warning: Failed to load MCP servers: {e}", err=True)
|
|
50
|
+
tools = registry.get_all_tools()
|
|
51
|
+
else:
|
|
52
|
+
typer.echo(f"Config file not found: {config}", err=True)
|
|
53
|
+
tools = registry.get_all_tools()
|
|
54
|
+
else:
|
|
55
|
+
tools = registry.get_all_tools()
|
|
56
|
+
|
|
57
|
+
if not tools:
|
|
58
|
+
typer.echo("No tools available", err=True)
|
|
59
|
+
raise typer.Exit(1)
|
|
60
|
+
|
|
61
|
+
# Retrieve tools
|
|
62
|
+
results = rank_tools(query, tools, top_k=top_k, method=method)
|
|
63
|
+
|
|
64
|
+
if not results:
|
|
65
|
+
typer.echo(f"No tools found for query: {query}")
|
|
66
|
+
raise typer.Exit(0)
|
|
67
|
+
|
|
68
|
+
# Display results
|
|
69
|
+
typer.echo(f"\nFound {len(results)} tool(s) for: '{query}'\n")
|
|
70
|
+
for i, tool in enumerate(results, 1):
|
|
71
|
+
typer.echo(f"{i}. {tool['name']}")
|
|
72
|
+
typer.echo(f" {tool.get('description', 'No description')}\n")
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
typer.echo(f"Error: {str(e)}", err=True)
|
|
76
|
+
raise typer.Exit(1)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command()
|
|
80
|
+
def start(
|
|
81
|
+
host: str = typer.Option("127.0.0.1", "--host", "-h", help="Server host"),
|
|
82
|
+
port: int = typer.Option(8000, "--port", "-p", help="Server port"),
|
|
83
|
+
reload: bool = typer.Option(True, "--reload/--no-reload", help="Enable auto-reload"),
|
|
84
|
+
config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to mcp.json config file")
|
|
85
|
+
):
|
|
86
|
+
"""
|
|
87
|
+
Start the Agent-Core API server.
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
agent-corex start --host 0.0.0.0 --port 8000
|
|
91
|
+
"""
|
|
92
|
+
import uvicorn
|
|
93
|
+
import os
|
|
94
|
+
|
|
95
|
+
# Set config path if provided
|
|
96
|
+
if config:
|
|
97
|
+
os.environ["AGENT_CORE_CONFIG"] = config
|
|
98
|
+
|
|
99
|
+
typer.echo(f"Starting Agent-Core API server at http://{host}:{port}")
|
|
100
|
+
typer.echo("Press Ctrl+C to stop\n")
|
|
101
|
+
|
|
102
|
+
uvicorn.run(
|
|
103
|
+
"agent_core.api.main:app",
|
|
104
|
+
host=host,
|
|
105
|
+
port=port,
|
|
106
|
+
reload=reload,
|
|
107
|
+
log_level="info"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.command()
|
|
112
|
+
def version():
|
|
113
|
+
"""Show Agent-Core version."""
|
|
114
|
+
from agent_core import __version__
|
|
115
|
+
typer.echo(f"Agent-Core {__version__}")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@app.command()
|
|
119
|
+
def health():
|
|
120
|
+
"""Check API health (requires running server)."""
|
|
121
|
+
import requests
|
|
122
|
+
try:
|
|
123
|
+
response = requests.get("http://127.0.0.1:8000/health", timeout=5)
|
|
124
|
+
if response.status_code == 200:
|
|
125
|
+
typer.echo("✓ Agent-Core API is healthy")
|
|
126
|
+
else:
|
|
127
|
+
typer.echo(f"✗ Agent-Core API returned status {response.status_code}", err=True)
|
|
128
|
+
except requests.exceptions.ConnectionError:
|
|
129
|
+
typer.echo("✗ Cannot connect to Agent-Core API. Is it running?", err=True)
|
|
130
|
+
raise typer.Exit(1)
|
|
131
|
+
except Exception as e:
|
|
132
|
+
typer.echo(f"✗ Error: {str(e)}", err=True)
|
|
133
|
+
raise typer.Exit(1)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@app.command()
|
|
137
|
+
def config():
|
|
138
|
+
"""Show configuration information."""
|
|
139
|
+
import pathlib
|
|
140
|
+
from agent_core import __version__
|
|
141
|
+
|
|
142
|
+
typer.echo(f"Agent-Core {__version__}\n")
|
|
143
|
+
typer.echo("Configuration:")
|
|
144
|
+
typer.echo(f" Python version: {__import__('sys').version.split()[0]}")
|
|
145
|
+
typer.echo(f" Installation path: {pathlib.Path(__import__('agent_core').__file__).parent}")
|
|
146
|
+
|
|
147
|
+
# Check if dependencies are installed
|
|
148
|
+
deps = {
|
|
149
|
+
"fastapi": "FastAPI",
|
|
150
|
+
"sentence_transformers": "Sentence Transformers",
|
|
151
|
+
"faiss": "FAISS",
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
typer.echo("\nDependencies:")
|
|
155
|
+
for module, name in deps.items():
|
|
156
|
+
try:
|
|
157
|
+
__import__(module)
|
|
158
|
+
typer.echo(f" ✓ {name}")
|
|
159
|
+
except ImportError:
|
|
160
|
+
typer.echo(f" ✗ {name} (not installed)")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
if __name__ == "__main__":
|
|
164
|
+
app()
|
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding-based tool indexing and semantic search using FAISS and sentence-transformers.
|
|
3
|
+
|
|
4
|
+
This module provides semantic search capabilities for tools using pre-trained embeddings.
|
|
5
|
+
Tools are indexed by their name and description, enabling semantic similarity matching
|
|
6
|
+
for queries that aren't exact keyword matches.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from sentence_transformers import SentenceTransformer
|
|
10
|
+
import faiss
|
|
11
|
+
import numpy as np
|
|
12
|
+
from typing import List, Dict, Any, Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EmbeddingIndexer:
|
|
16
|
+
"""
|
|
17
|
+
FAISS-based semantic search indexer for tools.
|
|
18
|
+
|
|
19
|
+
Uses sentence-transformers to generate embeddings for tool names and descriptions,
|
|
20
|
+
then uses FAISS for efficient similarity search.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
model: Cached SentenceTransformer model (shared across instances)
|
|
24
|
+
tools: List of tools in the index
|
|
25
|
+
index: FAISS index for similarity search
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
_model = None # Class-level cache to avoid reloading model
|
|
29
|
+
|
|
30
|
+
def __init__(self, tools: Optional[List[Dict[str, Any]]] = None):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the embedding indexer.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
tools: Optional list of tools to index. Each tool should have 'name' and 'description'.
|
|
36
|
+
"""
|
|
37
|
+
# Load model once and cache at class level
|
|
38
|
+
if EmbeddingIndexer._model is None:
|
|
39
|
+
EmbeddingIndexer._model = SentenceTransformer(
|
|
40
|
+
"sentence-transformers/all-MiniLM-L6-v2",
|
|
41
|
+
cache_folder=".agent_core_models"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.model = EmbeddingIndexer._model
|
|
45
|
+
self.tools = tools or []
|
|
46
|
+
self.index = None
|
|
47
|
+
|
|
48
|
+
if self.tools:
|
|
49
|
+
self._build_index(self.tools)
|
|
50
|
+
|
|
51
|
+
def _build_index(self, tools: List[Dict[str, Any]]) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Build FAISS index from tools.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
tools: List of tools to index.
|
|
57
|
+
"""
|
|
58
|
+
texts = [
|
|
59
|
+
f"{t['name']} {t.get('description', '')}"
|
|
60
|
+
for t in tools
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
embeddings = self.model.encode(texts)
|
|
64
|
+
dim = embeddings.shape[1]
|
|
65
|
+
|
|
66
|
+
self.index = faiss.IndexFlatL2(dim)
|
|
67
|
+
self.index.add(np.array(embeddings, dtype=np.float32))
|
|
68
|
+
|
|
69
|
+
def add_tools(self, tools: List[Dict[str, Any]]) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Add new tools to the index.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
tools: List of tools to add.
|
|
75
|
+
"""
|
|
76
|
+
texts = [
|
|
77
|
+
f"{t['name']} {t.get('description', '')}"
|
|
78
|
+
for t in tools
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
embeddings = self.model.encode(texts)
|
|
82
|
+
|
|
83
|
+
if self.index is None:
|
|
84
|
+
dim = embeddings.shape[1]
|
|
85
|
+
self.index = faiss.IndexFlatL2(dim)
|
|
86
|
+
|
|
87
|
+
self.index.add(np.array(embeddings, dtype=np.float32))
|
|
88
|
+
|
|
89
|
+
for tool in tools:
|
|
90
|
+
self.tools.append(tool)
|
|
91
|
+
|
|
92
|
+
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
|
93
|
+
"""
|
|
94
|
+
Search for tools semantically similar to the query.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
query: The search query.
|
|
98
|
+
top_k: Number of top results to return.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of tools sorted by semantic similarity to the query.
|
|
102
|
+
"""
|
|
103
|
+
if top_k is None:
|
|
104
|
+
top_k = 5
|
|
105
|
+
|
|
106
|
+
if self.index is None or not self.tools:
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
query_vec = self.model.encode([query])
|
|
110
|
+
D, I = self.index.search(np.array(query_vec, dtype=np.float32), min(top_k, len(self.tools)))
|
|
111
|
+
|
|
112
|
+
results = [
|
|
113
|
+
self.tools[i]
|
|
114
|
+
for i in I[0]
|
|
115
|
+
if 0 <= i < len(self.tools)
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
return results
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hybrid scoring system combining keyword-based and embedding-based relevance.
|
|
3
|
+
|
|
4
|
+
This module provides scoring methods that combine:
|
|
5
|
+
1. Keyword overlap (fast, exact matching)
|
|
6
|
+
2. Semantic similarity via embeddings (catches related tools)
|
|
7
|
+
|
|
8
|
+
The hybrid approach gives the best of both worlds: speed + accuracy.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import List, Dict, Any, Tuple, Optional
|
|
12
|
+
from .scorer import score as keyword_score
|
|
13
|
+
from .embeddings import EmbeddingIndexer
|
|
14
|
+
from sentence_transformers import SentenceTransformer
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HybridScorer:
|
|
19
|
+
"""
|
|
20
|
+
Combines keyword-based and embedding-based scoring.
|
|
21
|
+
|
|
22
|
+
Uses a weighted combination of:
|
|
23
|
+
- Keyword overlap score (0.3 weight)
|
|
24
|
+
- Embedding similarity score (0.7 weight)
|
|
25
|
+
|
|
26
|
+
This gives semantic search the priority while still considering exact keyword matches.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, keyword_weight: float = 0.3, embedding_weight: float = 0.7):
|
|
30
|
+
"""
|
|
31
|
+
Initialize the hybrid scorer.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
keyword_weight: Weight for keyword-based scoring (0-1).
|
|
35
|
+
embedding_weight: Weight for embedding-based scoring (0-1).
|
|
36
|
+
"""
|
|
37
|
+
self.keyword_weight = keyword_weight
|
|
38
|
+
self.embedding_weight = embedding_weight
|
|
39
|
+
|
|
40
|
+
# Normalize weights
|
|
41
|
+
total = keyword_weight + embedding_weight
|
|
42
|
+
self.keyword_weight /= total
|
|
43
|
+
self.embedding_weight /= total
|
|
44
|
+
|
|
45
|
+
# Initialize the embedding model
|
|
46
|
+
self.model = SentenceTransformer(
|
|
47
|
+
"sentence-transformers/all-MiniLM-L6-v2",
|
|
48
|
+
cache_folder=".agent_core_models"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def score(self, query: str, tool: Dict[str, Any]) -> float:
|
|
52
|
+
"""
|
|
53
|
+
Compute hybrid score for a tool.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
query: The search query.
|
|
57
|
+
tool: Tool dictionary with 'name' and 'description'.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Hybrid score between 0 and 1.
|
|
61
|
+
"""
|
|
62
|
+
# Get keyword score
|
|
63
|
+
kw_score = keyword_score(query, tool)
|
|
64
|
+
|
|
65
|
+
# Get embedding score
|
|
66
|
+
query_vec = self.model.encode([query])
|
|
67
|
+
tool_text = f"{tool['name']} {tool.get('description', '')}"
|
|
68
|
+
tool_vec = self.model.encode([tool_text])
|
|
69
|
+
|
|
70
|
+
# Cosine similarity (convert L2 distance to similarity)
|
|
71
|
+
# FAISS uses L2 distance, so we convert: similarity = 1 / (1 + distance)
|
|
72
|
+
similarity = np.dot(query_vec[0], tool_vec[0]) / (
|
|
73
|
+
np.linalg.norm(query_vec[0]) * np.linalg.norm(tool_vec[0])
|
|
74
|
+
)
|
|
75
|
+
# Normalize to 0-1 range
|
|
76
|
+
embedding_score = (similarity + 1) / 2
|
|
77
|
+
|
|
78
|
+
# Combine scores
|
|
79
|
+
hybrid_score = (
|
|
80
|
+
self.keyword_weight * kw_score +
|
|
81
|
+
self.embedding_weight * embedding_score
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return hybrid_score
|
|
85
|
+
|
|
86
|
+
def score_batch(
|
|
87
|
+
self, query: str, tools: List[Dict[str, Any]]
|
|
88
|
+
) -> List[Tuple[Dict[str, Any], float]]:
|
|
89
|
+
"""
|
|
90
|
+
Score multiple tools efficiently.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
query: The search query.
|
|
94
|
+
tools: List of tools to score.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of (tool, score) tuples.
|
|
98
|
+
"""
|
|
99
|
+
scored = []
|
|
100
|
+
|
|
101
|
+
for tool in tools:
|
|
102
|
+
s = self.score(query, tool)
|
|
103
|
+
scored.append((tool, s))
|
|
104
|
+
|
|
105
|
+
return scored
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tool ranking engine for retrieval.
|
|
3
|
+
|
|
4
|
+
Supports multiple ranking strategies:
|
|
5
|
+
1. Keyword-only (fast, for simple queries)
|
|
6
|
+
2. Hybrid (keyword + embeddings, recommended)
|
|
7
|
+
3. Embedding-only (semantic search only)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import List, Dict, Any, Optional
|
|
11
|
+
from .scorer import score as keyword_score
|
|
12
|
+
from .hybrid_scorer import HybridScorer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def rank_tools(
|
|
16
|
+
query: str,
|
|
17
|
+
tools: List[Dict[str, Any]],
|
|
18
|
+
top_k: int = 5,
|
|
19
|
+
method: str = "hybrid"
|
|
20
|
+
) -> List[Dict[str, Any]]:
|
|
21
|
+
"""
|
|
22
|
+
Rank tools by relevance to a query.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
query: The search query.
|
|
26
|
+
tools: List of tools to rank.
|
|
27
|
+
top_k: Number of top results to return.
|
|
28
|
+
method: Ranking method ('keyword', 'hybrid', or 'embedding').
|
|
29
|
+
- 'keyword': Fast, exact match only
|
|
30
|
+
- 'hybrid': Recommended, combines keyword + semantic
|
|
31
|
+
- 'embedding': Semantic similarity only
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Top-k tools ranked by relevance.
|
|
35
|
+
"""
|
|
36
|
+
if method == "keyword":
|
|
37
|
+
return _rank_by_keyword(query, tools, top_k)
|
|
38
|
+
elif method == "hybrid":
|
|
39
|
+
return _rank_by_hybrid(query, tools, top_k)
|
|
40
|
+
elif method == "embedding":
|
|
41
|
+
return _rank_by_embedding(query, tools, top_k)
|
|
42
|
+
else:
|
|
43
|
+
raise ValueError(f"Unknown ranking method: {method}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _rank_by_keyword(
|
|
47
|
+
query: str, tools: List[Dict[str, Any]], top_k: int
|
|
48
|
+
) -> List[Dict[str, Any]]:
|
|
49
|
+
"""Rank by keyword overlap only (fast)."""
|
|
50
|
+
scored = []
|
|
51
|
+
|
|
52
|
+
for tool in tools:
|
|
53
|
+
s = keyword_score(query, tool)
|
|
54
|
+
scored.append((tool, s))
|
|
55
|
+
|
|
56
|
+
# Sort descending
|
|
57
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
58
|
+
|
|
59
|
+
# Filter zero scores
|
|
60
|
+
ranked = [tool for tool, s in scored if s > 0]
|
|
61
|
+
|
|
62
|
+
return ranked[:top_k]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _rank_by_hybrid(
|
|
66
|
+
query: str, tools: List[Dict[str, Any]], top_k: int
|
|
67
|
+
) -> List[Dict[str, Any]]:
|
|
68
|
+
"""Rank by hybrid scoring (keyword + embeddings)."""
|
|
69
|
+
try:
|
|
70
|
+
scorer = HybridScorer()
|
|
71
|
+
scored = scorer.score_batch(query, tools)
|
|
72
|
+
|
|
73
|
+
# Sort descending
|
|
74
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
75
|
+
|
|
76
|
+
# Return top-k (don't filter zero scores for embeddings)
|
|
77
|
+
return [tool for tool, s in scored[:top_k]]
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# Fallback to keyword-only if embedding model fails
|
|
80
|
+
print(f"Warning: Embedding model failed ({e}), falling back to keyword ranking")
|
|
81
|
+
return _rank_by_keyword(query, tools, top_k)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _rank_by_embedding(
|
|
85
|
+
query: str, tools: List[Dict[str, Any]], top_k: int
|
|
86
|
+
) -> List[Dict[str, Any]]:
|
|
87
|
+
"""Rank by semantic similarity only (requires embeddings)."""
|
|
88
|
+
from .embeddings import EmbeddingIndexer
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
indexer = EmbeddingIndexer(tools)
|
|
92
|
+
return indexer.search(query, top_k)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
# Fallback to keyword-only if embedding model fails
|
|
95
|
+
print(f"Warning: Embedding model failed ({e}), falling back to keyword ranking")
|
|
96
|
+
return _rank_by_keyword(query, tools, top_k)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def tokenize(text: str):
|
|
4
|
+
return set(re.findall(r"\w+", text.lower()))
|
|
5
|
+
|
|
6
|
+
def score(query: str, tool) -> float:
|
|
7
|
+
"""
|
|
8
|
+
Basic keyword overlap scoring
|
|
9
|
+
"""
|
|
10
|
+
query_tokens = tokenize(query)
|
|
11
|
+
text = f"{tool['name']} {tool.get('description', '')}"
|
|
12
|
+
tool_tokens = tokenize(text)
|
|
13
|
+
|
|
14
|
+
if not tool_tokens:
|
|
15
|
+
return 0.0
|
|
16
|
+
|
|
17
|
+
overlap = query_tokens.intersection(tool_tokens)
|
|
18
|
+
return len(overlap) / len(query_tokens) if query_tokens else 0.0
|
|
File without changes
|
|
File without changes
|