indx 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indx/__init__.py +36 -0
- indx/_version.py +3 -0
- indx/agent/__init__.py +54 -0
- indx/agent/claude_agent.py +62 -0
- indx/agent/connector.py +309 -0
- indx/agent/langchain.py +106 -0
- indx/agent/mcp.py +72 -0
- indx/agent/openai_agents.py +43 -0
- indx/agent/pydantic_ai.py +47 -0
- indx/agent/schema.py +177 -0
- indx/app/__init__.py +26 -0
- indx/app/api.py +667 -0
- indx/app/models.py +212 -0
- indx/app/server.py +110 -0
- indx/app/static/.gitkeep +0 -0
- indx/archive/__init__.py +6 -0
- indx/archive/format.py +24 -0
- indx/archive/reader.py +86 -0
- indx/archive/writer.py +52 -0
- indx/cli/__init__.py +1 -0
- indx/cli/_render.py +97 -0
- indx/cli/app.py +423 -0
- indx/cli/build.py +270 -0
- indx/cli/inspect.py +75 -0
- indx/cli/query.py +61 -0
- indx/config/__init__.py +22 -0
- indx/config/defaults.py +27 -0
- indx/config/loader.py +224 -0
- indx/config/schema.py +139 -0
- indx/core/__init__.py +24 -0
- indx/core/chunk.py +50 -0
- indx/core/context.py +44 -0
- indx/core/document.py +49 -0
- indx/core/knowledge_space.py +125 -0
- indx/core/parsed.py +32 -0
- indx/core/relation.py +24 -0
- indx/core/source.py +23 -0
- indx/core/stats.py +22 -0
- indx/demo/__init__.py +9 -0
- indx/demo/corpus/engineering/code-review.md +23 -0
- indx/demo/corpus/engineering/guide.md +21 -0
- indx/demo/corpus/handbook/onboarding.md +22 -0
- indx/demo/corpus/handbook/welcome.md +18 -0
- indx/demo/corpus/people/remote-work.md +20 -0
- indx/demo/corpus/people/team.txt +16 -0
- indx/demo/corpus/people/time-off.md +19 -0
- indx/embed/__init__.py +9 -0
- indx/embed/azure.py +173 -0
- indx/embed/base.py +15 -0
- indx/embed/bedrock.py +168 -0
- indx/embed/bge_m3.py +104 -0
- indx/embed/cohere.py +122 -0
- indx/embed/e5.py +100 -0
- indx/embed/hash_embedder.py +34 -0
- indx/embed/litellm.py +99 -0
- indx/embed/openai.py +91 -0
- indx/embed/vertex.py +119 -0
- indx/errors.py +81 -0
- indx/llm/__init__.py +10 -0
- indx/llm/anthropic.py +110 -0
- indx/llm/azure.py +221 -0
- indx/llm/base.py +23 -0
- indx/llm/bedrock.py +144 -0
- indx/llm/litellm.py +99 -0
- indx/llm/none.py +24 -0
- indx/llm/ollama.py +117 -0
- indx/llm/openai.py +136 -0
- indx/llm/vertex.py +142 -0
- indx/llm/vllm.py +108 -0
- indx/output/__init__.py +13 -0
- indx/output/base.py +15 -0
- indx/output/indx_writer.py +90 -0
- indx/output/jsonl_writer.py +30 -0
- indx/output/langchain.py +101 -0
- indx/output/llamaindex.py +102 -0
- indx/parsers/__init__.py +10 -0
- indx/parsers/base.py +19 -0
- indx/parsers/docai.py +161 -0
- indx/parsers/docintel.py +158 -0
- indx/parsers/docling.py +197 -0
- indx/parsers/llamaparse.py +112 -0
- indx/parsers/markitdown.py +89 -0
- indx/parsers/plaintext.py +35 -0
- indx/parsers/textract.py +106 -0
- indx/parsers/unstructured.py +119 -0
- indx/pipeline/__init__.py +6 -0
- indx/pipeline/pipeline.py +671 -0
- indx/pipeline/stage.py +14 -0
- indx/pipeline/stages/__init__.py +17 -0
- indx/pipeline/stages/chunk.py +55 -0
- indx/pipeline/stages/enrich.py +311 -0
- indx/pipeline/stages/pack.py +35 -0
- indx/pipeline/stages/parse.py +23 -0
- indx/pipeline/stages/relate.py +209 -0
- indx/pipeline/stages/walk.py +26 -0
- indx/py.typed +0 -0
- indx/registry/__init__.py +27 -0
- indx/registry/builtins.py +136 -0
- indx/registry/plugins.py +48 -0
- indx/registry/registry.py +74 -0
- indx/store/__init__.py +13 -0
- indx/store/azure_search.py +311 -0
- indx/store/base.py +56 -0
- indx/store/bigquery.py +261 -0
- indx/store/chroma.py +202 -0
- indx/store/jsonl.py +48 -0
- indx/store/lancedb.py +209 -0
- indx/store/opensearch.py +278 -0
- indx/store/pgvector.py +230 -0
- indx/store/qdrant.py +243 -0
- indx/store/s3vectors.py +304 -0
- indx/store/vertex_vector.py +244 -0
- indx/utils/__init__.py +7 -0
- indx/utils/cache.py +84 -0
- indx/utils/hashing.py +12 -0
- indx/utils/io.py +35 -0
- indx/utils/lazy.py +26 -0
- indx/utils/logging.py +39 -0
- indx/utils/zip_input.py +70 -0
- indx/vlm/__init__.py +5 -0
- indx/vlm/azure.py +155 -0
- indx/vlm/base.py +15 -0
- indx/vlm/bedrock.py +139 -0
- indx/vlm/gpt4o.py +100 -0
- indx/vlm/local.py +142 -0
- indx/vlm/none.py +13 -0
- indx/vlm/qwen_vl.py +158 -0
- indx/vlm/vertex.py +122 -0
- indx-0.0.1.dist-info/METADATA +306 -0
- indx-0.0.1.dist-info/RECORD +134 -0
- indx-0.0.1.dist-info/WHEEL +4 -0
- indx-0.0.1.dist-info/entry_points.txt +61 -0
- indx-0.0.1.dist-info/licenses/LICENSE +201 -0
- indx-0.0.1.dist-info/licenses/NOTICE +16 -0
indx/__init__.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""indx — make directories AI-ready, not just files.
|
|
2
|
+
|
|
3
|
+
Public SDK surface. The CLI is this same surface with handles (CLI⇄SDK parity).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from indx._version import __version__
|
|
7
|
+
from indx.core import (
|
|
8
|
+
Chunk,
|
|
9
|
+
Document,
|
|
10
|
+
KnowledgeSpace,
|
|
11
|
+
Manifest,
|
|
12
|
+
ParsedDoc,
|
|
13
|
+
Relation,
|
|
14
|
+
RelationType,
|
|
15
|
+
Source,
|
|
16
|
+
SpaceContext,
|
|
17
|
+
SpaceStats,
|
|
18
|
+
)
|
|
19
|
+
from indx.pipeline import DirectoryPipeline
|
|
20
|
+
from indx.store.base import SearchHit
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"__version__",
|
|
24
|
+
"DirectoryPipeline",
|
|
25
|
+
"KnowledgeSpace",
|
|
26
|
+
"Manifest",
|
|
27
|
+
"Document",
|
|
28
|
+
"Chunk",
|
|
29
|
+
"Relation",
|
|
30
|
+
"RelationType",
|
|
31
|
+
"ParsedDoc",
|
|
32
|
+
"Source",
|
|
33
|
+
"SearchHit",
|
|
34
|
+
"SpaceContext",
|
|
35
|
+
"SpaceStats",
|
|
36
|
+
]
|
indx/_version.py
ADDED
indx/agent/__init__.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""indx.agent — plug a knowledge space into any AI agent, USB-drive style.
|
|
2
|
+
|
|
3
|
+
A ``.indx`` archive is a portable knowledge space. This package is the plug: one call turns
|
|
4
|
+
it into tools for whichever agent framework you use.
|
|
5
|
+
|
|
6
|
+
```python
|
|
7
|
+
from indx.agent import connect
|
|
8
|
+
|
|
9
|
+
kb = connect("ai-ready/handbook.indx") # load the "USB drive"
|
|
10
|
+
|
|
11
|
+
kb.langchain() # LangChain StructuredTools (+ kb.langchain_retriever())
|
|
12
|
+
kb.openai() # OpenAI Agents SDK function tools
|
|
13
|
+
kb.pydantic_ai() # Pydantic AI tools
|
|
14
|
+
kb.claude() # Claude Agent SDK in-process MCP server
|
|
15
|
+
kb.mcp() # FastMCP server — Mastra & any MCP client
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Or run it as a standalone MCP server from the shell — ``indx mcp ai-ready/handbook.indx`` —
|
|
19
|
+
and connect Claude Desktop, Cursor, or Mastra to it with no Python glue.
|
|
20
|
+
|
|
21
|
+
Importing this package is safe on a bare ``pip install indx``: the framework adapters are
|
|
22
|
+
imported lazily and each gates on its own optional extra (``indx[langchain]``,
|
|
23
|
+
``indx[openai-agents]``, ``indx[pydantic-ai]``, ``indx[claude-agent]``, ``indx[mcp]`` — or
|
|
24
|
+
``indx[agent]`` for all of them).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from indx.agent.connector import KnowledgeConnector, connect
|
|
28
|
+
from indx.agent.schema import (
|
|
29
|
+
GET_DOCUMENT_TOOL,
|
|
30
|
+
OVERVIEW_TOOL,
|
|
31
|
+
SEARCH_TOOL,
|
|
32
|
+
TOOLS,
|
|
33
|
+
DocumentCard,
|
|
34
|
+
DocumentDetail,
|
|
35
|
+
Hit,
|
|
36
|
+
SearchResults,
|
|
37
|
+
SpaceOverview,
|
|
38
|
+
ToolDef,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"connect",
|
|
43
|
+
"KnowledgeConnector",
|
|
44
|
+
"Hit",
|
|
45
|
+
"SearchResults",
|
|
46
|
+
"DocumentCard",
|
|
47
|
+
"DocumentDetail",
|
|
48
|
+
"SpaceOverview",
|
|
49
|
+
"ToolDef",
|
|
50
|
+
"TOOLS",
|
|
51
|
+
"SEARCH_TOOL",
|
|
52
|
+
"OVERVIEW_TOOL",
|
|
53
|
+
"GET_DOCUMENT_TOOL",
|
|
54
|
+
]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Claude Agent SDK adapter: expose a knowledge space as an in-process MCP server.
|
|
2
|
+
|
|
3
|
+
The Claude Agent SDK consumes tools as MCP servers. :func:`to_claude_mcp_server` builds an
|
|
4
|
+
*in-process* SDK MCP server (no subprocess, no socket) from the canonical operations; hand it
|
|
5
|
+
to ``ClaudeAgentOptions(mcp_servers={"indx": server})`` and the agent can search the space.
|
|
6
|
+
|
|
7
|
+
The ``claude_agent_sdk`` package is the optional ``claude-agent`` extra, imported lazily and
|
|
8
|
+
gated by :func:`~indx.utils.lazy.require_extra`; importing this module is always safe.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from indx.agent.connector import KnowledgeConnector
|
|
16
|
+
from indx.agent.schema import GET_DOCUMENT_TOOL, OVERVIEW_TOOL, SEARCH_TOOL
|
|
17
|
+
from indx.utils.lazy import require_extra
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _text_result(payload: Any) -> dict[str, Any]:
|
|
21
|
+
"""Wrap a JSON-able payload in the SDK's ``{"content": [{"type": "text", ...}]}`` shape."""
|
|
22
|
+
import json
|
|
23
|
+
|
|
24
|
+
return {"content": [{"type": "text", "text": json.dumps(payload, ensure_ascii=False)}]}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def to_claude_mcp_server(connector: KnowledgeConnector, *, name: str = "indx") -> Any:
|
|
28
|
+
"""Build an in-process Claude Agent SDK MCP server exposing the space's tools."""
|
|
29
|
+
require_extra("agent connector", "claude-agent", "claude-agent", "claude_agent_sdk")
|
|
30
|
+
from claude_agent_sdk import ( # type: ignore[import-not-found] # optional extra: claude-agent
|
|
31
|
+
create_sdk_mcp_server,
|
|
32
|
+
tool,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@tool( # type: ignore[untyped-decorator]
|
|
36
|
+
SEARCH_TOOL.name,
|
|
37
|
+
SEARCH_TOOL.description,
|
|
38
|
+
{"query": str, "k": int, "doc_type": str},
|
|
39
|
+
)
|
|
40
|
+
async def search(args: dict[str, Any]) -> dict[str, Any]:
|
|
41
|
+
return _text_result(
|
|
42
|
+
connector.call(
|
|
43
|
+
SEARCH_TOOL.name,
|
|
44
|
+
{
|
|
45
|
+
"query": args["query"],
|
|
46
|
+
"k": args.get("k", connector.default_k),
|
|
47
|
+
"doc_type": args.get("doc_type"),
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@tool(OVERVIEW_TOOL.name, OVERVIEW_TOOL.description, {"sample": int}) # type: ignore[untyped-decorator]
|
|
53
|
+
async def overview(args: dict[str, Any]) -> dict[str, Any]:
|
|
54
|
+
return _text_result(connector.call(OVERVIEW_TOOL.name, {"sample": args.get("sample", 10)}))
|
|
55
|
+
|
|
56
|
+
@tool(GET_DOCUMENT_TOOL.name, GET_DOCUMENT_TOOL.description, {"path_or_id": str}) # type: ignore[untyped-decorator]
|
|
57
|
+
async def get_document(args: dict[str, Any]) -> dict[str, Any]:
|
|
58
|
+
return _text_result(
|
|
59
|
+
connector.call(GET_DOCUMENT_TOOL.name, {"path_or_id": args["path_or_id"]})
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return create_sdk_mcp_server(name=name, tools=[search, overview, get_document])
|
indx/agent/connector.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""KnowledgeConnector — plug a knowledge space into any AI agent, USB-drive style.
|
|
2
|
+
|
|
3
|
+
A ``.indx`` archive is a portable knowledge space: the "USB drive" you carry between
|
|
4
|
+
machines and agents. :class:`KnowledgeConnector` is the plug. It wraps a
|
|
5
|
+
:class:`~indx.core.knowledge_space.KnowledgeSpace` and exposes a tiny, stable set of
|
|
6
|
+
agent operations — **search**, **overview**, **get_document** — plus one-call adapters that
|
|
7
|
+
hand those operations to whichever agent framework you use:
|
|
8
|
+
|
|
9
|
+
* :meth:`~KnowledgeConnector.langchain` / :meth:`~KnowledgeConnector.langchain_retriever`
|
|
10
|
+
* :meth:`~KnowledgeConnector.openai` (OpenAI Agents SDK)
|
|
11
|
+
* :meth:`~KnowledgeConnector.pydantic_ai` (Pydantic AI)
|
|
12
|
+
* :meth:`~KnowledgeConnector.claude` (Claude Agent SDK, in-process MCP server)
|
|
13
|
+
* :meth:`~KnowledgeConnector.mcp` (Model Context Protocol — Mastra & any client)
|
|
14
|
+
|
|
15
|
+
For frameworks not covered, :meth:`openai_schema` / :meth:`anthropic_schema` emit raw
|
|
16
|
+
tool specs and :meth:`call` dispatches a tool call by name — enough to wire the bare
|
|
17
|
+
Chat Completions / Messages API by hand.
|
|
18
|
+
|
|
19
|
+
This module imports **no vendor SDKs at top level**; every adapter is imported lazily inside
|
|
20
|
+
its method and gated by :func:`~indx.utils.lazy.require_extra`, so ``import indx.agent`` is
|
|
21
|
+
safe on a bare ``pip install indx`` (file-architecture §5, coding-standards §6.3).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import TYPE_CHECKING, Any
|
|
28
|
+
|
|
29
|
+
from indx.agent.schema import (
|
|
30
|
+
GET_DOCUMENT_TOOL,
|
|
31
|
+
OVERVIEW_TOOL,
|
|
32
|
+
SEARCH_TOOL,
|
|
33
|
+
TOOLS,
|
|
34
|
+
DocumentCard,
|
|
35
|
+
DocumentDetail,
|
|
36
|
+
Hit,
|
|
37
|
+
SearchResults,
|
|
38
|
+
SpaceOverview,
|
|
39
|
+
ToolDef,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
43
|
+
from indx.core.knowledge_space import KnowledgeSpace
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class KnowledgeConnector:
|
|
47
|
+
"""An agent-ready handle on a single knowledge space.
|
|
48
|
+
|
|
49
|
+
Construct it directly from an in-memory space, or use :meth:`open` / the module-level
|
|
50
|
+
:func:`connect` to load a ``.indx`` archive (or an output directory) from disk.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
name: A short identifier for the space, surfaced to the agent in tool descriptions
|
|
54
|
+
and used as the default MCP server name.
|
|
55
|
+
default_k: The number of hits :meth:`search` returns when ``k`` is unset.
|
|
56
|
+
with_context: When true, every :meth:`search` hit carries its neighbor chunks' text
|
|
57
|
+
in ``hit.context`` for wider grounding windows.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
space: KnowledgeSpace,
|
|
63
|
+
*,
|
|
64
|
+
name: str = "indx",
|
|
65
|
+
default_k: int = 5,
|
|
66
|
+
with_context: bool = False,
|
|
67
|
+
) -> None:
|
|
68
|
+
self._space = space
|
|
69
|
+
self.name = name
|
|
70
|
+
self.default_k = default_k
|
|
71
|
+
self.with_context = with_context
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def open(
|
|
75
|
+
cls,
|
|
76
|
+
source: str | Path | KnowledgeSpace,
|
|
77
|
+
*,
|
|
78
|
+
name: str | None = None,
|
|
79
|
+
default_k: int = 5,
|
|
80
|
+
with_context: bool = False,
|
|
81
|
+
) -> KnowledgeConnector:
|
|
82
|
+
"""Load a knowledge space from ``source`` and wrap it.
|
|
83
|
+
|
|
84
|
+
``source`` may be an already-loaded :class:`KnowledgeSpace`, a path to a ``.indx``
|
|
85
|
+
archive, or an output directory containing one (the same inputs ``indx inspect`` /
|
|
86
|
+
``indx query`` accept). ``name`` defaults to the archive's file stem.
|
|
87
|
+
"""
|
|
88
|
+
from indx.core.knowledge_space import KnowledgeSpace
|
|
89
|
+
|
|
90
|
+
if isinstance(source, KnowledgeSpace):
|
|
91
|
+
space = source
|
|
92
|
+
label = name or "indx"
|
|
93
|
+
else:
|
|
94
|
+
# Reuse the CLI loader so the connector accepts every on-disk shape the CLI does
|
|
95
|
+
# (a .indx file, a directory holding one, or a jsonl output directory).
|
|
96
|
+
from indx.cli._render import load_space
|
|
97
|
+
|
|
98
|
+
path = Path(source)
|
|
99
|
+
space = load_space(path)
|
|
100
|
+
label = name or (path.stem if path.is_file() else path.name or "indx")
|
|
101
|
+
|
|
102
|
+
return cls(space, name=label, default_k=default_k, with_context=with_context)
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def space(self) -> KnowledgeSpace:
|
|
106
|
+
"""The wrapped :class:`KnowledgeSpace` (read access for advanced callers)."""
|
|
107
|
+
return self._space
|
|
108
|
+
|
|
109
|
+
# ------------------------------------------------------------------ operations
|
|
110
|
+
|
|
111
|
+
def search(
|
|
112
|
+
self,
|
|
113
|
+
query: str,
|
|
114
|
+
k: int | None = None,
|
|
115
|
+
doc_type: str | None = None,
|
|
116
|
+
*,
|
|
117
|
+
with_context: bool | None = None,
|
|
118
|
+
) -> SearchResults:
|
|
119
|
+
"""Semantic search over the space; the backbone of the ``indx_search`` tool.
|
|
120
|
+
|
|
121
|
+
Routes through :meth:`KnowledgeSpace.search` (CLI ⇄ SDK parity), then flattens each hit
|
|
122
|
+
into a JSON-primitive :class:`~indx.agent.schema.Hit`. When ``doc_type`` is given,
|
|
123
|
+
results are filtered to that detected type, over-fetching first so a full ``k`` can
|
|
124
|
+
still come back.
|
|
125
|
+
"""
|
|
126
|
+
k = k or self.default_k
|
|
127
|
+
want_context = self.with_context if with_context is None else with_context
|
|
128
|
+
|
|
129
|
+
raw = self._space.search(query, k=k * 5 if doc_type else k)
|
|
130
|
+
|
|
131
|
+
hits: list[Hit] = []
|
|
132
|
+
for hit in raw:
|
|
133
|
+
doc = self._space.document(hit.chunk.doc_id)
|
|
134
|
+
hit_type = (hit.source.type if hit.source else None) or (doc.doc_type if doc else None)
|
|
135
|
+
if doc_type and (hit_type or "unknown") != doc_type:
|
|
136
|
+
continue
|
|
137
|
+
hits.append(
|
|
138
|
+
Hit(
|
|
139
|
+
chunk_id=hit.chunk.id,
|
|
140
|
+
document_id=hit.chunk.doc_id,
|
|
141
|
+
score=hit.score,
|
|
142
|
+
text=hit.chunk.text,
|
|
143
|
+
source=(hit.source.path if hit.source else (doc.path if doc else None)),
|
|
144
|
+
folder=(hit.source.folder if hit.source else (doc.folder if doc else "")),
|
|
145
|
+
doc_type=hit_type,
|
|
146
|
+
topics=list(doc.topics) if doc else [],
|
|
147
|
+
tags=list(doc.tags) if doc else [],
|
|
148
|
+
context=[c.text for c in hit.neighbors] if want_context else [],
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
if len(hits) >= k:
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
return SearchResults(query=query, count=len(hits), hits=hits)
|
|
155
|
+
|
|
156
|
+
def overview(self, sample: int = 10) -> SpaceOverview:
|
|
157
|
+
"""Summarize the space; the backbone of the ``indx_overview`` tool."""
|
|
158
|
+
stats = self._space.stats
|
|
159
|
+
cards = [self._card(doc) for doc in self._space.documents()[: max(sample, 0)]]
|
|
160
|
+
return SpaceOverview(
|
|
161
|
+
name=self.name,
|
|
162
|
+
documents=stats.documents,
|
|
163
|
+
chunks=stats.chunks,
|
|
164
|
+
relations=stats.relations,
|
|
165
|
+
embeddings=stats.embeddings,
|
|
166
|
+
embedding_model=self._space.manifest.embedding_model,
|
|
167
|
+
embedding_dim=stats.embed_dim,
|
|
168
|
+
types=dict(stats.types),
|
|
169
|
+
sample_documents=cards,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def get_document(self, path_or_id: str) -> DocumentDetail | None:
|
|
173
|
+
"""Fetch one document's full text + metadata; the backbone of ``indx_get_document``.
|
|
174
|
+
|
|
175
|
+
Resolves ``path_or_id`` against document ids first, then exact paths, then a path
|
|
176
|
+
suffix match (so ``remote-work.md`` finds ``people/remote-work.md``). Returns ``None``
|
|
177
|
+
when nothing matches.
|
|
178
|
+
"""
|
|
179
|
+
doc = self._space.document(path_or_id)
|
|
180
|
+
if doc is None:
|
|
181
|
+
docs = self._space.documents()
|
|
182
|
+
doc = next((d for d in docs if d.path == path_or_id), None)
|
|
183
|
+
if doc is None:
|
|
184
|
+
doc = next((d for d in docs if d.path.endswith(path_or_id)), None)
|
|
185
|
+
if doc is None:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
chunks = sorted(self._space.chunks_for(doc.id), key=lambda c: c.position)
|
|
189
|
+
card = self._card(doc)
|
|
190
|
+
return DocumentDetail(
|
|
191
|
+
**card.model_dump(),
|
|
192
|
+
chunk_count=len(chunks),
|
|
193
|
+
text="\n\n".join(c.text for c in chunks),
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
@staticmethod
|
|
197
|
+
def _card(doc: Any) -> DocumentCard:
|
|
198
|
+
return DocumentCard(
|
|
199
|
+
id=doc.id,
|
|
200
|
+
path=doc.path,
|
|
201
|
+
doc_type=doc.doc_type,
|
|
202
|
+
folder=doc.folder,
|
|
203
|
+
topics=list(doc.topics),
|
|
204
|
+
tags=list(doc.tags),
|
|
205
|
+
summary=doc.summary,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# ------------------------------------------------------------------ raw specs
|
|
209
|
+
|
|
210
|
+
def tools(self) -> list[ToolDef]:
|
|
211
|
+
"""The canonical, framework-agnostic tool definitions for this space."""
|
|
212
|
+
return list(TOOLS)
|
|
213
|
+
|
|
214
|
+
def openai_schema(self) -> list[dict[str, Any]]:
|
|
215
|
+
"""Tool specs in OpenAI Chat Completions / Responses ``tools=[...]`` shape."""
|
|
216
|
+
return [{"type": "function", "function": t.model_dump()} for t in self.tools()]
|
|
217
|
+
|
|
218
|
+
def anthropic_schema(self) -> list[dict[str, Any]]:
|
|
219
|
+
"""Tool specs in Anthropic Messages API ``tools=[...]`` shape."""
|
|
220
|
+
return [
|
|
221
|
+
{"name": t.name, "description": t.description, "input_schema": t.parameters}
|
|
222
|
+
for t in self.tools()
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
def call(self, name: str, arguments: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
226
|
+
"""Dispatch a tool call by ``name`` and return a JSON-able result.
|
|
227
|
+
|
|
228
|
+
This is the single execution path every adapter and the MCP server funnel through, so
|
|
229
|
+
a tool behaves identically regardless of which framework invoked it. Unknown names
|
|
230
|
+
raise :class:`ValueError`.
|
|
231
|
+
"""
|
|
232
|
+
args = arguments or {}
|
|
233
|
+
if name == SEARCH_TOOL.name:
|
|
234
|
+
return self.search(
|
|
235
|
+
query=args["query"],
|
|
236
|
+
k=args.get("k"),
|
|
237
|
+
doc_type=args.get("doc_type"),
|
|
238
|
+
).model_dump(mode="json")
|
|
239
|
+
if name == OVERVIEW_TOOL.name:
|
|
240
|
+
return self.overview(sample=args.get("sample", 10)).model_dump(mode="json")
|
|
241
|
+
if name == GET_DOCUMENT_TOOL.name:
|
|
242
|
+
detail = self.get_document(args["path_or_id"])
|
|
243
|
+
if detail is None:
|
|
244
|
+
return {"error": f"no document matching {args['path_or_id']!r}"}
|
|
245
|
+
return detail.model_dump(mode="json")
|
|
246
|
+
raise ValueError(f"unknown tool {name!r}; known tools: {[t.name for t in TOOLS]}")
|
|
247
|
+
|
|
248
|
+
# ------------------------------------------------------------------ adapters
|
|
249
|
+
|
|
250
|
+
def langchain(self) -> list[Any]:
|
|
251
|
+
"""Return LangChain ``StructuredTool``s for this space (needs ``indx[langchain]``)."""
|
|
252
|
+
from indx.agent.langchain import to_langchain_tools
|
|
253
|
+
|
|
254
|
+
return to_langchain_tools(self)
|
|
255
|
+
|
|
256
|
+
def langchain_retriever(self, k: int | None = None) -> Any:
|
|
257
|
+
"""Return a LangChain ``BaseRetriever`` over this space (needs ``indx[langchain]``)."""
|
|
258
|
+
from indx.agent.langchain import to_langchain_retriever
|
|
259
|
+
|
|
260
|
+
return to_langchain_retriever(self, k=k or self.default_k)
|
|
261
|
+
|
|
262
|
+
def openai(self) -> list[Any]:
|
|
263
|
+
"""Return OpenAI Agents SDK ``function_tool``s (needs ``indx[openai-agents]``)."""
|
|
264
|
+
from indx.agent.openai_agents import to_openai_agent_tools
|
|
265
|
+
|
|
266
|
+
return to_openai_agent_tools(self)
|
|
267
|
+
|
|
268
|
+
def pydantic_ai(self) -> list[Any]:
|
|
269
|
+
"""Return Pydantic AI ``Tool``s for this space (needs ``indx[pydantic-ai]``)."""
|
|
270
|
+
from indx.agent.pydantic_ai import to_pydantic_ai_tools
|
|
271
|
+
|
|
272
|
+
return to_pydantic_ai_tools(self)
|
|
273
|
+
|
|
274
|
+
def claude(self, *, name: str | None = None) -> Any:
|
|
275
|
+
"""Return an in-process Claude Agent SDK MCP server (needs ``indx[claude-agent]``)."""
|
|
276
|
+
from indx.agent.claude_agent import to_claude_mcp_server
|
|
277
|
+
|
|
278
|
+
return to_claude_mcp_server(self, name=name or self.name)
|
|
279
|
+
|
|
280
|
+
def mcp(self, *, name: str | None = None) -> Any:
|
|
281
|
+
"""Return a ``FastMCP`` server exposing this space (needs ``indx[mcp]``)."""
|
|
282
|
+
from indx.agent.mcp import build_mcp_server
|
|
283
|
+
|
|
284
|
+
return build_mcp_server(self, name=name or self.name)
|
|
285
|
+
|
|
286
|
+
def serve(self, *, transport: str = "stdio", name: str | None = None) -> None:
|
|
287
|
+
"""Run an MCP server over ``transport`` until interrupted (needs ``indx[mcp]``).
|
|
288
|
+
|
|
289
|
+
This is what ``indx mcp <archive>`` calls: it turns the knowledge space into a live
|
|
290
|
+
MCP endpoint that Claude Desktop, Mastra, Cursor, or any MCP client can connect to.
|
|
291
|
+
"""
|
|
292
|
+
self.mcp(name=name).run(transport=transport)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def connect(
|
|
296
|
+
source: str | Path | KnowledgeSpace,
|
|
297
|
+
*,
|
|
298
|
+
name: str | None = None,
|
|
299
|
+
default_k: int = 5,
|
|
300
|
+
with_context: bool = False,
|
|
301
|
+
) -> KnowledgeConnector:
|
|
302
|
+
"""Plug a knowledge space into an agent in one line — ``connect("space.indx")``.
|
|
303
|
+
|
|
304
|
+
A thin alias for :meth:`KnowledgeConnector.open`; the headline entry point of
|
|
305
|
+
:mod:`indx.agent`.
|
|
306
|
+
"""
|
|
307
|
+
return KnowledgeConnector.open(
|
|
308
|
+
source, name=name, default_k=default_k, with_context=with_context
|
|
309
|
+
)
|
indx/agent/langchain.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""LangChain adapter: expose a knowledge space as tools and a retriever.
|
|
2
|
+
|
|
3
|
+
Two integration points, both built on :class:`~indx.agent.connector.KnowledgeConnector`:
|
|
4
|
+
|
|
5
|
+
* :func:`to_langchain_tools` — ``StructuredTool``s an agent can call (search / overview /
|
|
6
|
+
get_document), the agentic path.
|
|
7
|
+
* :func:`to_langchain_retriever` — a ``BaseRetriever`` that returns LangChain ``Document``s,
|
|
8
|
+
the classic RAG path (drop it into any retrieval chain).
|
|
9
|
+
|
|
10
|
+
``langchain-core`` is the optional ``langchain`` extra, imported lazily and gated by
|
|
11
|
+
:func:`~indx.utils.lazy.require_extra`; importing this module is always safe.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import TYPE_CHECKING, Any
|
|
17
|
+
|
|
18
|
+
from indx.agent.connector import KnowledgeConnector
|
|
19
|
+
from indx.agent.schema import GET_DOCUMENT_TOOL, OVERVIEW_TOOL, SEARCH_TOOL
|
|
20
|
+
from indx.utils.lazy import require_extra
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
23
|
+
from langchain_core.callbacks import ( # type: ignore[import-not-found] # optional extra: langchain
|
|
24
|
+
CallbackManagerForRetrieverRun,
|
|
25
|
+
)
|
|
26
|
+
from langchain_core.documents import ( # type: ignore[import-not-found] # optional extra: langchain
|
|
27
|
+
Document,
|
|
28
|
+
)
|
|
29
|
+
from langchain_core.tools import ( # type: ignore[import-not-found] # optional extra: langchain
|
|
30
|
+
StructuredTool,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def to_langchain_tools(connector: KnowledgeConnector) -> list[StructuredTool]:
|
|
35
|
+
"""Build LangChain ``StructuredTool``s (search / overview / get_document) for the space."""
|
|
36
|
+
require_extra("agent connector", "langchain", "langchain", "langchain_core")
|
|
37
|
+
from langchain_core.tools import StructuredTool # optional extra: langchain
|
|
38
|
+
|
|
39
|
+
def _search(query: str, k: int = 5, doc_type: str | None = None) -> dict[str, Any]:
|
|
40
|
+
return connector.call(SEARCH_TOOL.name, {"query": query, "k": k, "doc_type": doc_type})
|
|
41
|
+
|
|
42
|
+
def _overview(sample: int = 10) -> dict[str, Any]:
|
|
43
|
+
return connector.call(OVERVIEW_TOOL.name, {"sample": sample})
|
|
44
|
+
|
|
45
|
+
def _get_document(path_or_id: str) -> dict[str, Any]:
|
|
46
|
+
return connector.call(GET_DOCUMENT_TOOL.name, {"path_or_id": path_or_id})
|
|
47
|
+
|
|
48
|
+
return [
|
|
49
|
+
StructuredTool.from_function(
|
|
50
|
+
func=_search, name=SEARCH_TOOL.name, description=SEARCH_TOOL.description
|
|
51
|
+
),
|
|
52
|
+
StructuredTool.from_function(
|
|
53
|
+
func=_overview, name=OVERVIEW_TOOL.name, description=OVERVIEW_TOOL.description
|
|
54
|
+
),
|
|
55
|
+
StructuredTool.from_function(
|
|
56
|
+
func=_get_document,
|
|
57
|
+
name=GET_DOCUMENT_TOOL.name,
|
|
58
|
+
description=GET_DOCUMENT_TOOL.description,
|
|
59
|
+
),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def to_langchain_retriever(connector: KnowledgeConnector, *, k: int = 5) -> Any:
|
|
64
|
+
"""Build a LangChain ``BaseRetriever`` that returns ``Document``s from the space.
|
|
65
|
+
|
|
66
|
+
Each retrieved ``Document`` carries the chunk text as ``page_content`` and the hit's
|
|
67
|
+
provenance (source path, document type, score, topics, tags) as JSON-primitive
|
|
68
|
+
``metadata`` — the same metadata shape the ``langchain`` output writer emits.
|
|
69
|
+
"""
|
|
70
|
+
require_extra("agent connector", "langchain", "langchain", "langchain_core")
|
|
71
|
+
from langchain_core.documents import Document # optional extra: langchain
|
|
72
|
+
from langchain_core.retrievers import ( # type: ignore[import-not-found] # optional extra: langchain
|
|
73
|
+
BaseRetriever,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
class IndxRetriever(BaseRetriever): # type: ignore[misc] # BaseRetriever is Any without the extra
|
|
77
|
+
"""Retrieve indx knowledge-space chunks as LangChain ``Document``s."""
|
|
78
|
+
|
|
79
|
+
connector: Any
|
|
80
|
+
k: int = 5
|
|
81
|
+
|
|
82
|
+
def _get_relevant_documents(
|
|
83
|
+
self,
|
|
84
|
+
query: str,
|
|
85
|
+
*,
|
|
86
|
+
run_manager: CallbackManagerForRetrieverRun | None = None,
|
|
87
|
+
) -> list[Document]:
|
|
88
|
+
results = self.connector.search(query, k=self.k)
|
|
89
|
+
return [
|
|
90
|
+
Document(
|
|
91
|
+
id=hit.chunk_id,
|
|
92
|
+
page_content=hit.text,
|
|
93
|
+
metadata={
|
|
94
|
+
"doc_id": hit.document_id,
|
|
95
|
+
"score": hit.score,
|
|
96
|
+
"source": hit.source,
|
|
97
|
+
"folder": hit.folder,
|
|
98
|
+
"doc_type": hit.doc_type,
|
|
99
|
+
"topics": hit.topics,
|
|
100
|
+
"tags": hit.tags,
|
|
101
|
+
},
|
|
102
|
+
)
|
|
103
|
+
for hit in results.hits
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
return IndxRetriever(connector=connector, k=k)
|
indx/agent/mcp.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""MCP server: serve a knowledge space over the Model Context Protocol.
|
|
2
|
+
|
|
3
|
+
MCP is the universal connector — Claude Desktop, Cursor, Mastra (TypeScript), and any other
|
|
4
|
+
MCP client speak it, so one ``indx mcp <archive>`` command plugs a knowledge space into all
|
|
5
|
+
of them, no Python glue on the client side. :func:`build_mcp_server` builds a ``FastMCP``
|
|
6
|
+
server exposing the canonical search / overview / get_document tools;
|
|
7
|
+
:meth:`KnowledgeConnector.serve` runs it.
|
|
8
|
+
|
|
9
|
+
FastMCP does the heavy lifting: it derives each tool's JSON schema from the handler's typed
|
|
10
|
+
signature and owns the transport loop. We prefer the standalone, batteries-included
|
|
11
|
+
``fastmcp`` package (v2) and fall back to the ``FastMCP`` bundled in the official ``mcp`` SDK
|
|
12
|
+
(v1) — either satisfies the ``indx[mcp]`` extra. Imports are lazy, so importing this module is
|
|
13
|
+
always safe on a core-only install.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from indx.agent.connector import KnowledgeConnector
|
|
21
|
+
from indx.errors import MissingExtraError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_fastmcp() -> Any:
|
|
25
|
+
"""Return a ``FastMCP`` class, preferring the standalone ``fastmcp`` over the bundled one.
|
|
26
|
+
|
|
27
|
+
Raises :class:`MissingExtraError` (the standard ``pip install indx[mcp]`` message) if
|
|
28
|
+
neither is installed — mirroring :func:`~indx.utils.lazy.require_extra` for the
|
|
29
|
+
"either of two modules" case it can't express directly.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
from fastmcp import FastMCP # type: ignore[import-not-found] # optional extra: mcp
|
|
33
|
+
|
|
34
|
+
return FastMCP
|
|
35
|
+
except ModuleNotFoundError:
|
|
36
|
+
pass
|
|
37
|
+
try:
|
|
38
|
+
from mcp.server.fastmcp import ( # type: ignore[import-not-found] # optional extra: mcp
|
|
39
|
+
FastMCP,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return FastMCP
|
|
43
|
+
except ModuleNotFoundError:
|
|
44
|
+
raise MissingExtraError(slot="agent connector", name="mcp", extra="mcp") from None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def build_mcp_server(connector: KnowledgeConnector, *, name: str | None = None) -> Any:
|
|
48
|
+
"""Build a ``FastMCP`` server exposing the space's tools (search/overview/get_document).
|
|
49
|
+
|
|
50
|
+
Tools are registered with ``add_tool`` (rather than the ``@server.tool`` decorator) so the
|
|
51
|
+
handler functions keep their static types — mypy stays strict over this module.
|
|
52
|
+
"""
|
|
53
|
+
fast_mcp = _load_fastmcp()
|
|
54
|
+
server = fast_mcp(name or connector.name)
|
|
55
|
+
|
|
56
|
+
def indx_search(query: str, k: int = 5, doc_type: str | None = None) -> dict[str, Any]:
|
|
57
|
+
"""Semantic search over the indx knowledge space."""
|
|
58
|
+
return connector.search(query, k=k, doc_type=doc_type).model_dump(mode="json")
|
|
59
|
+
|
|
60
|
+
def indx_overview(sample: int = 10) -> dict[str, Any]:
|
|
61
|
+
"""Describe the knowledge space: counts, types, sample documents."""
|
|
62
|
+
return connector.overview(sample=sample).model_dump(mode="json")
|
|
63
|
+
|
|
64
|
+
def indx_get_document(path_or_id: str) -> dict[str, Any]:
|
|
65
|
+
"""Fetch one document's full text and metadata by path or id."""
|
|
66
|
+
detail = connector.get_document(path_or_id)
|
|
67
|
+
return detail.model_dump(mode="json") if detail else {"error": "not found"}
|
|
68
|
+
|
|
69
|
+
server.add_tool(indx_search)
|
|
70
|
+
server.add_tool(indx_overview)
|
|
71
|
+
server.add_tool(indx_get_document)
|
|
72
|
+
return server
|