agent-corex 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agent_core/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """
2
+ Agent-Core: Fast, accurate MCP tool retrieval engine for LLMs with semantic search.
3
+
4
+ Provides multiple ranking methods for selecting the most relevant tools from large sets.
5
+ """
6
+
7
+ __version__ = "1.0.0"
8
+ __author__ = "Ankit Agarwal"
9
+ __email__ = "ankitagarwalpro@gmail.com"
10
+
11
+ from agent_core.retrieval.ranker import rank_tools
12
+ from agent_core.tools.registry import ToolRegistry
13
+ from agent_core.tools.mcp.mcp_loader import MCPLoader
14
+
15
+ __all__ = [
16
+ "rank_tools",
17
+ "ToolRegistry",
18
+ "MCPLoader",
19
+ ]
@@ -0,0 +1 @@
1
+ """API module."""
agent_core/api/main.py ADDED
@@ -0,0 +1,46 @@
1
+ from fastapi import FastAPI
2
+ from packages.tools.registry import ToolRegistry
3
+ from packages.retrieval.ranker import rank_tools
4
+
5
+ app = FastAPI()
6
+
7
+ tool_registry = ToolRegistry()
8
+
9
+ # Example tools (replace with MCP-loaded later)
10
+ tool_registry.register({
11
+ "name": "edit_file",
12
+ "description": "Edit a file with line-based changes"
13
+ })
14
+
15
+ tool_registry.register({
16
+ "name": "write_file",
17
+ "description": "Create or overwrite a file"
18
+ })
19
+
20
+ tool_registry.register({
21
+ "name": "run_tests",
22
+ "description": "Run test suite"
23
+ })
24
+
25
+
26
+ @app.get("/retrieve_tools")
27
+ def retrieve_tools(query: str, top_k: int = 5, method: str = "hybrid"):
28
+ """
29
+ Retrieve the most relevant tools for a given query.
30
+
31
+ Args:
32
+ query: Search query describing what you need
33
+ top_k: Number of results to return (default: 5)
34
+ method: Ranking method - 'keyword', 'hybrid', or 'embedding' (default: 'hybrid')
35
+
36
+ Returns:
37
+ List of tools ranked by relevance
38
+ """
39
+ tools = tool_registry.get_all_tools()
40
+ ranked = rank_tools(query, tools, top_k, method=method)
41
+ return ranked
42
+
43
+
44
+ @app.get("/health")
45
+ def health():
46
+ return {"status": "ok"}
@@ -0,0 +1 @@
1
+ """CLI module for Agent-Core."""
agent_core/cli/main.py ADDED
@@ -0,0 +1,164 @@
1
+ """
2
+ Agent-Core CLI interface.
3
+
4
+ Provides command-line tools for managing and using Agent-Core.
5
+ """
6
+
7
+ import typer
8
+ from typing import Optional
9
+ import json
10
+
11
+ app = typer.Typer(
12
+ name="agent-corex",
13
+ help="Fast, accurate MCP tool retrieval engine for LLMs",
14
+ no_args_is_help=True
15
+ )
16
+
17
+
18
+ @app.command()
19
+ def retrieve(
20
+ query: str = typer.Argument(..., help="Search query for tool retrieval"),
21
+ top_k: int = typer.Option(5, "--top-k", "-k", help="Number of results to return"),
22
+ method: str = typer.Option("hybrid", "--method", "-m", help="Ranking method: keyword, hybrid, or embedding"),
23
+ config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to mcp.json config file")
24
+ ):
25
+ """
26
+ Retrieve the most relevant tools for a given query.
27
+
28
+ Example:
29
+ agent-corex retrieve "edit a file" --top-k 5 --method hybrid
30
+ """
31
+ try:
32
+ from agent_core.retrieval.ranker import rank_tools
33
+ from agent_core.tools.registry import ToolRegistry
34
+ from agent_core.tools.mcp.mcp_loader import MCPLoader
35
+ import pathlib
36
+
37
+ # Load tools
38
+ registry = ToolRegistry()
39
+
40
+ # Try to load MCP servers if config provided
41
+ if config:
42
+ config_path = pathlib.Path(config)
43
+ if config_path.exists():
44
+ try:
45
+ loader = MCPLoader(str(config_path))
46
+ manager = loader.load()
47
+ tools = manager.get_all_tools()
48
+ except Exception as e:
49
+ typer.echo(f"Warning: Failed to load MCP servers: {e}", err=True)
50
+ tools = registry.get_all_tools()
51
+ else:
52
+ typer.echo(f"Config file not found: {config}", err=True)
53
+ tools = registry.get_all_tools()
54
+ else:
55
+ tools = registry.get_all_tools()
56
+
57
+ if not tools:
58
+ typer.echo("No tools available", err=True)
59
+ raise typer.Exit(1)
60
+
61
+ # Retrieve tools
62
+ results = rank_tools(query, tools, top_k=top_k, method=method)
63
+
64
+ if not results:
65
+ typer.echo(f"No tools found for query: {query}")
66
+ raise typer.Exit(0)
67
+
68
+ # Display results
69
+ typer.echo(f"\nFound {len(results)} tool(s) for: '{query}'\n")
70
+ for i, tool in enumerate(results, 1):
71
+ typer.echo(f"{i}. {tool['name']}")
72
+ typer.echo(f" {tool.get('description', 'No description')}\n")
73
+
74
+ except Exception as e:
75
+ typer.echo(f"Error: {str(e)}", err=True)
76
+ raise typer.Exit(1)
77
+
78
+
79
+ @app.command()
80
+ def start(
81
+ host: str = typer.Option("127.0.0.1", "--host", "-h", help="Server host"),
82
+ port: int = typer.Option(8000, "--port", "-p", help="Server port"),
83
+ reload: bool = typer.Option(True, "--reload/--no-reload", help="Enable auto-reload"),
84
+ config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to mcp.json config file")
85
+ ):
86
+ """
87
+ Start the Agent-Core API server.
88
+
89
+ Example:
90
+ agent-corex start --host 0.0.0.0 --port 8000
91
+ """
92
+ import uvicorn
93
+ import os
94
+
95
+ # Set config path if provided
96
+ if config:
97
+ os.environ["AGENT_CORE_CONFIG"] = config
98
+
99
+ typer.echo(f"Starting Agent-Core API server at http://{host}:{port}")
100
+ typer.echo("Press Ctrl+C to stop\n")
101
+
102
+ uvicorn.run(
103
+ "agent_core.api.main:app",
104
+ host=host,
105
+ port=port,
106
+ reload=reload,
107
+ log_level="info"
108
+ )
109
+
110
+
111
+ @app.command()
112
+ def version():
113
+ """Show Agent-Core version."""
114
+ from agent_core import __version__
115
+ typer.echo(f"Agent-Core {__version__}")
116
+
117
+
118
+ @app.command()
119
+ def health():
120
+ """Check API health (requires running server)."""
121
+ import requests
122
+ try:
123
+ response = requests.get("http://127.0.0.1:8000/health", timeout=5)
124
+ if response.status_code == 200:
125
+ typer.echo("✓ Agent-Core API is healthy")
126
+ else:
127
+ typer.echo(f"✗ Agent-Core API returned status {response.status_code}", err=True)
128
+ except requests.exceptions.ConnectionError:
129
+ typer.echo("✗ Cannot connect to Agent-Core API. Is it running?", err=True)
130
+ raise typer.Exit(1)
131
+ except Exception as e:
132
+ typer.echo(f"✗ Error: {str(e)}", err=True)
133
+ raise typer.Exit(1)
134
+
135
+
136
+ @app.command()
137
+ def config():
138
+ """Show configuration information."""
139
+ import pathlib
140
+ from agent_core import __version__
141
+
142
+ typer.echo(f"Agent-Core {__version__}\n")
143
+ typer.echo("Configuration:")
144
+ typer.echo(f" Python version: {__import__('sys').version.split()[0]}")
145
+ typer.echo(f" Installation path: {pathlib.Path(__import__('agent_core').__file__).parent}")
146
+
147
+ # Check if dependencies are installed
148
+ deps = {
149
+ "fastapi": "FastAPI",
150
+ "sentence_transformers": "Sentence Transformers",
151
+ "faiss": "FAISS",
152
+ }
153
+
154
+ typer.echo("\nDependencies:")
155
+ for module, name in deps.items():
156
+ try:
157
+ __import__(module)
158
+ typer.echo(f" ✓ {name}")
159
+ except ImportError:
160
+ typer.echo(f" ✗ {name} (not installed)")
161
+
162
+
163
+ if __name__ == "__main__":
164
+ app()
File without changes
@@ -0,0 +1,118 @@
1
+ """
2
+ Embedding-based tool indexing and semantic search using FAISS and sentence-transformers.
3
+
4
+ This module provides semantic search capabilities for tools using pre-trained embeddings.
5
+ Tools are indexed by their name and description, enabling semantic similarity matching
6
+ for queries that aren't exact keyword matches.
7
+ """
8
+
9
+ from sentence_transformers import SentenceTransformer
10
+ import faiss
11
+ import numpy as np
12
+ from typing import List, Dict, Any, Optional
13
+
14
+
15
+ class EmbeddingIndexer:
16
+ """
17
+ FAISS-based semantic search indexer for tools.
18
+
19
+ Uses sentence-transformers to generate embeddings for tool names and descriptions,
20
+ then uses FAISS for efficient similarity search.
21
+
22
+ Attributes:
23
+ model: Cached SentenceTransformer model (shared across instances)
24
+ tools: List of tools in the index
25
+ index: FAISS index for similarity search
26
+ """
27
+
28
+ _model = None # Class-level cache to avoid reloading model
29
+
30
+ def __init__(self, tools: Optional[List[Dict[str, Any]]] = None):
31
+ """
32
+ Initialize the embedding indexer.
33
+
34
+ Args:
35
+ tools: Optional list of tools to index. Each tool should have 'name' and 'description'.
36
+ """
37
+ # Load model once and cache at class level
38
+ if EmbeddingIndexer._model is None:
39
+ EmbeddingIndexer._model = SentenceTransformer(
40
+ "sentence-transformers/all-MiniLM-L6-v2",
41
+ cache_folder=".agent_core_models"
42
+ )
43
+
44
+ self.model = EmbeddingIndexer._model
45
+ self.tools = tools or []
46
+ self.index = None
47
+
48
+ if self.tools:
49
+ self._build_index(self.tools)
50
+
51
+ def _build_index(self, tools: List[Dict[str, Any]]) -> None:
52
+ """
53
+ Build FAISS index from tools.
54
+
55
+ Args:
56
+ tools: List of tools to index.
57
+ """
58
+ texts = [
59
+ f"{t['name']} {t.get('description', '')}"
60
+ for t in tools
61
+ ]
62
+
63
+ embeddings = self.model.encode(texts)
64
+ dim = embeddings.shape[1]
65
+
66
+ self.index = faiss.IndexFlatL2(dim)
67
+ self.index.add(np.array(embeddings, dtype=np.float32))
68
+
69
+ def add_tools(self, tools: List[Dict[str, Any]]) -> None:
70
+ """
71
+ Add new tools to the index.
72
+
73
+ Args:
74
+ tools: List of tools to add.
75
+ """
76
+ texts = [
77
+ f"{t['name']} {t.get('description', '')}"
78
+ for t in tools
79
+ ]
80
+
81
+ embeddings = self.model.encode(texts)
82
+
83
+ if self.index is None:
84
+ dim = embeddings.shape[1]
85
+ self.index = faiss.IndexFlatL2(dim)
86
+
87
+ self.index.add(np.array(embeddings, dtype=np.float32))
88
+
89
+ for tool in tools:
90
+ self.tools.append(tool)
91
+
92
+ def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
93
+ """
94
+ Search for tools semantically similar to the query.
95
+
96
+ Args:
97
+ query: The search query.
98
+ top_k: Number of top results to return.
99
+
100
+ Returns:
101
+ List of tools sorted by semantic similarity to the query.
102
+ """
103
+ if top_k is None:
104
+ top_k = 5
105
+
106
+ if self.index is None or not self.tools:
107
+ return []
108
+
109
+ query_vec = self.model.encode([query])
110
+ D, I = self.index.search(np.array(query_vec, dtype=np.float32), min(top_k, len(self.tools)))
111
+
112
+ results = [
113
+ self.tools[i]
114
+ for i in I[0]
115
+ if 0 <= i < len(self.tools)
116
+ ]
117
+
118
+ return results
@@ -0,0 +1,105 @@
1
+ """
2
+ Hybrid scoring system combining keyword-based and embedding-based relevance.
3
+
4
+ This module provides scoring methods that combine:
5
+ 1. Keyword overlap (fast, exact matching)
6
+ 2. Semantic similarity via embeddings (catches related tools)
7
+
8
+ The hybrid approach gives the best of both worlds: speed + accuracy.
9
+ """
10
+
11
+ from typing import List, Dict, Any, Tuple, Optional
12
+ from .scorer import score as keyword_score
13
+ from .embeddings import EmbeddingIndexer
14
+ from sentence_transformers import SentenceTransformer
15
+ import numpy as np
16
+
17
+
18
+ class HybridScorer:
19
+ """
20
+ Combines keyword-based and embedding-based scoring.
21
+
22
+ Uses a weighted combination of:
23
+ - Keyword overlap score (0.3 weight)
24
+ - Embedding similarity score (0.7 weight)
25
+
26
+ This gives semantic search the priority while still considering exact keyword matches.
27
+ """
28
+
29
+ def __init__(self, keyword_weight: float = 0.3, embedding_weight: float = 0.7):
30
+ """
31
+ Initialize the hybrid scorer.
32
+
33
+ Args:
34
+ keyword_weight: Weight for keyword-based scoring (0-1).
35
+ embedding_weight: Weight for embedding-based scoring (0-1).
36
+ """
37
+ self.keyword_weight = keyword_weight
38
+ self.embedding_weight = embedding_weight
39
+
40
+ # Normalize weights
41
+ total = keyword_weight + embedding_weight
42
+ self.keyword_weight /= total
43
+ self.embedding_weight /= total
44
+
45
+ # Initialize the embedding model
46
+ self.model = SentenceTransformer(
47
+ "sentence-transformers/all-MiniLM-L6-v2",
48
+ cache_folder=".agent_core_models"
49
+ )
50
+
51
+ def score(self, query: str, tool: Dict[str, Any]) -> float:
52
+ """
53
+ Compute hybrid score for a tool.
54
+
55
+ Args:
56
+ query: The search query.
57
+ tool: Tool dictionary with 'name' and 'description'.
58
+
59
+ Returns:
60
+ Hybrid score between 0 and 1.
61
+ """
62
+ # Get keyword score
63
+ kw_score = keyword_score(query, tool)
64
+
65
+ # Get embedding score
66
+ query_vec = self.model.encode([query])
67
+ tool_text = f"{tool['name']} {tool.get('description', '')}"
68
+ tool_vec = self.model.encode([tool_text])
69
+
70
+ # Cosine similarity (convert L2 distance to similarity)
71
+ # FAISS uses L2 distance, so we convert: similarity = 1 / (1 + distance)
72
+ similarity = np.dot(query_vec[0], tool_vec[0]) / (
73
+ np.linalg.norm(query_vec[0]) * np.linalg.norm(tool_vec[0])
74
+ )
75
+ # Normalize to 0-1 range
76
+ embedding_score = (similarity + 1) / 2
77
+
78
+ # Combine scores
79
+ hybrid_score = (
80
+ self.keyword_weight * kw_score +
81
+ self.embedding_weight * embedding_score
82
+ )
83
+
84
+ return hybrid_score
85
+
86
+ def score_batch(
87
+ self, query: str, tools: List[Dict[str, Any]]
88
+ ) -> List[Tuple[Dict[str, Any], float]]:
89
+ """
90
+ Score multiple tools efficiently.
91
+
92
+ Args:
93
+ query: The search query.
94
+ tools: List of tools to score.
95
+
96
+ Returns:
97
+ List of (tool, score) tuples.
98
+ """
99
+ scored = []
100
+
101
+ for tool in tools:
102
+ s = self.score(query, tool)
103
+ scored.append((tool, s))
104
+
105
+ return scored
@@ -0,0 +1,96 @@
1
+ """
2
+ Tool ranking engine for retrieval.
3
+
4
+ Supports multiple ranking strategies:
5
+ 1. Keyword-only (fast, for simple queries)
6
+ 2. Hybrid (keyword + embeddings, recommended)
7
+ 3. Embedding-only (semantic search only)
8
+ """
9
+
10
+ from typing import List, Dict, Any, Optional
11
+ from .scorer import score as keyword_score
12
+ from .hybrid_scorer import HybridScorer
13
+
14
+
15
+ def rank_tools(
16
+ query: str,
17
+ tools: List[Dict[str, Any]],
18
+ top_k: int = 5,
19
+ method: str = "hybrid"
20
+ ) -> List[Dict[str, Any]]:
21
+ """
22
+ Rank tools by relevance to a query.
23
+
24
+ Args:
25
+ query: The search query.
26
+ tools: List of tools to rank.
27
+ top_k: Number of top results to return.
28
+ method: Ranking method ('keyword', 'hybrid', or 'embedding').
29
+ - 'keyword': Fast, exact match only
30
+ - 'hybrid': Recommended, combines keyword + semantic
31
+ - 'embedding': Semantic similarity only
32
+
33
+ Returns:
34
+ Top-k tools ranked by relevance.
35
+ """
36
+ if method == "keyword":
37
+ return _rank_by_keyword(query, tools, top_k)
38
+ elif method == "hybrid":
39
+ return _rank_by_hybrid(query, tools, top_k)
40
+ elif method == "embedding":
41
+ return _rank_by_embedding(query, tools, top_k)
42
+ else:
43
+ raise ValueError(f"Unknown ranking method: {method}")
44
+
45
+
46
+ def _rank_by_keyword(
47
+ query: str, tools: List[Dict[str, Any]], top_k: int
48
+ ) -> List[Dict[str, Any]]:
49
+ """Rank by keyword overlap only (fast)."""
50
+ scored = []
51
+
52
+ for tool in tools:
53
+ s = keyword_score(query, tool)
54
+ scored.append((tool, s))
55
+
56
+ # Sort descending
57
+ scored.sort(key=lambda x: x[1], reverse=True)
58
+
59
+ # Filter zero scores
60
+ ranked = [tool for tool, s in scored if s > 0]
61
+
62
+ return ranked[:top_k]
63
+
64
+
65
+ def _rank_by_hybrid(
66
+ query: str, tools: List[Dict[str, Any]], top_k: int
67
+ ) -> List[Dict[str, Any]]:
68
+ """Rank by hybrid scoring (keyword + embeddings)."""
69
+ try:
70
+ scorer = HybridScorer()
71
+ scored = scorer.score_batch(query, tools)
72
+
73
+ # Sort descending
74
+ scored.sort(key=lambda x: x[1], reverse=True)
75
+
76
+ # Return top-k (don't filter zero scores for embeddings)
77
+ return [tool for tool, s in scored[:top_k]]
78
+ except Exception as e:
79
+ # Fallback to keyword-only if embedding model fails
80
+ print(f"Warning: Embedding model failed ({e}), falling back to keyword ranking")
81
+ return _rank_by_keyword(query, tools, top_k)
82
+
83
+
84
+ def _rank_by_embedding(
85
+ query: str, tools: List[Dict[str, Any]], top_k: int
86
+ ) -> List[Dict[str, Any]]:
87
+ """Rank by semantic similarity only (requires embeddings)."""
88
+ from .embeddings import EmbeddingIndexer
89
+
90
+ try:
91
+ indexer = EmbeddingIndexer(tools)
92
+ return indexer.search(query, top_k)
93
+ except Exception as e:
94
+ # Fallback to keyword-only if embedding model fails
95
+ print(f"Warning: Embedding model failed ({e}), falling back to keyword ranking")
96
+ return _rank_by_keyword(query, tools, top_k)
@@ -0,0 +1,18 @@
1
+ import re
2
+
3
+ def tokenize(text: str):
4
+ return set(re.findall(r"\w+", text.lower()))
5
+
6
+ def score(query: str, tool) -> float:
7
+ """
8
+ Basic keyword overlap scoring
9
+ """
10
+ query_tokens = tokenize(query)
11
+ text = f"{tool['name']} {tool.get('description', '')}"
12
+ tool_tokens = tokenize(text)
13
+
14
+ if not tool_tokens:
15
+ return 0.0
16
+
17
+ overlap = query_tokens.intersection(tool_tokens)
18
+ return len(overlap) / len(query_tokens) if query_tokens else 0.0
File without changes
@@ -0,0 +1,10 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class BaseTool(ABC):
5
+
6
+ name = ""
7
+
8
+ @abstractmethod
9
+ async def run(self, **kwargs):
10
+ pass
File without changes