indx 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. indx/__init__.py +36 -0
  2. indx/_version.py +3 -0
  3. indx/agent/__init__.py +54 -0
  4. indx/agent/claude_agent.py +62 -0
  5. indx/agent/connector.py +309 -0
  6. indx/agent/langchain.py +106 -0
  7. indx/agent/mcp.py +72 -0
  8. indx/agent/openai_agents.py +43 -0
  9. indx/agent/pydantic_ai.py +47 -0
  10. indx/agent/schema.py +177 -0
  11. indx/app/__init__.py +26 -0
  12. indx/app/api.py +667 -0
  13. indx/app/models.py +212 -0
  14. indx/app/server.py +110 -0
  15. indx/app/static/.gitkeep +0 -0
  16. indx/archive/__init__.py +6 -0
  17. indx/archive/format.py +24 -0
  18. indx/archive/reader.py +86 -0
  19. indx/archive/writer.py +52 -0
  20. indx/cli/__init__.py +1 -0
  21. indx/cli/_render.py +97 -0
  22. indx/cli/app.py +423 -0
  23. indx/cli/build.py +270 -0
  24. indx/cli/inspect.py +75 -0
  25. indx/cli/query.py +61 -0
  26. indx/config/__init__.py +22 -0
  27. indx/config/defaults.py +27 -0
  28. indx/config/loader.py +224 -0
  29. indx/config/schema.py +139 -0
  30. indx/core/__init__.py +24 -0
  31. indx/core/chunk.py +50 -0
  32. indx/core/context.py +44 -0
  33. indx/core/document.py +49 -0
  34. indx/core/knowledge_space.py +125 -0
  35. indx/core/parsed.py +32 -0
  36. indx/core/relation.py +24 -0
  37. indx/core/source.py +23 -0
  38. indx/core/stats.py +22 -0
  39. indx/demo/__init__.py +9 -0
  40. indx/demo/corpus/engineering/code-review.md +23 -0
  41. indx/demo/corpus/engineering/guide.md +21 -0
  42. indx/demo/corpus/handbook/onboarding.md +22 -0
  43. indx/demo/corpus/handbook/welcome.md +18 -0
  44. indx/demo/corpus/people/remote-work.md +20 -0
  45. indx/demo/corpus/people/team.txt +16 -0
  46. indx/demo/corpus/people/time-off.md +19 -0
  47. indx/embed/__init__.py +9 -0
  48. indx/embed/azure.py +173 -0
  49. indx/embed/base.py +15 -0
  50. indx/embed/bedrock.py +168 -0
  51. indx/embed/bge_m3.py +104 -0
  52. indx/embed/cohere.py +122 -0
  53. indx/embed/e5.py +100 -0
  54. indx/embed/hash_embedder.py +34 -0
  55. indx/embed/litellm.py +99 -0
  56. indx/embed/openai.py +91 -0
  57. indx/embed/vertex.py +119 -0
  58. indx/errors.py +81 -0
  59. indx/llm/__init__.py +10 -0
  60. indx/llm/anthropic.py +110 -0
  61. indx/llm/azure.py +221 -0
  62. indx/llm/base.py +23 -0
  63. indx/llm/bedrock.py +144 -0
  64. indx/llm/litellm.py +99 -0
  65. indx/llm/none.py +24 -0
  66. indx/llm/ollama.py +117 -0
  67. indx/llm/openai.py +136 -0
  68. indx/llm/vertex.py +142 -0
  69. indx/llm/vllm.py +108 -0
  70. indx/output/__init__.py +13 -0
  71. indx/output/base.py +15 -0
  72. indx/output/indx_writer.py +90 -0
  73. indx/output/jsonl_writer.py +30 -0
  74. indx/output/langchain.py +101 -0
  75. indx/output/llamaindex.py +102 -0
  76. indx/parsers/__init__.py +10 -0
  77. indx/parsers/base.py +19 -0
  78. indx/parsers/docai.py +161 -0
  79. indx/parsers/docintel.py +158 -0
  80. indx/parsers/docling.py +197 -0
  81. indx/parsers/llamaparse.py +112 -0
  82. indx/parsers/markitdown.py +89 -0
  83. indx/parsers/plaintext.py +35 -0
  84. indx/parsers/textract.py +106 -0
  85. indx/parsers/unstructured.py +119 -0
  86. indx/pipeline/__init__.py +6 -0
  87. indx/pipeline/pipeline.py +671 -0
  88. indx/pipeline/stage.py +14 -0
  89. indx/pipeline/stages/__init__.py +17 -0
  90. indx/pipeline/stages/chunk.py +55 -0
  91. indx/pipeline/stages/enrich.py +311 -0
  92. indx/pipeline/stages/pack.py +35 -0
  93. indx/pipeline/stages/parse.py +23 -0
  94. indx/pipeline/stages/relate.py +209 -0
  95. indx/pipeline/stages/walk.py +26 -0
  96. indx/py.typed +0 -0
  97. indx/registry/__init__.py +27 -0
  98. indx/registry/builtins.py +136 -0
  99. indx/registry/plugins.py +48 -0
  100. indx/registry/registry.py +74 -0
  101. indx/store/__init__.py +13 -0
  102. indx/store/azure_search.py +311 -0
  103. indx/store/base.py +56 -0
  104. indx/store/bigquery.py +261 -0
  105. indx/store/chroma.py +202 -0
  106. indx/store/jsonl.py +48 -0
  107. indx/store/lancedb.py +209 -0
  108. indx/store/opensearch.py +278 -0
  109. indx/store/pgvector.py +230 -0
  110. indx/store/qdrant.py +243 -0
  111. indx/store/s3vectors.py +304 -0
  112. indx/store/vertex_vector.py +244 -0
  113. indx/utils/__init__.py +7 -0
  114. indx/utils/cache.py +84 -0
  115. indx/utils/hashing.py +12 -0
  116. indx/utils/io.py +35 -0
  117. indx/utils/lazy.py +26 -0
  118. indx/utils/logging.py +39 -0
  119. indx/utils/zip_input.py +70 -0
  120. indx/vlm/__init__.py +5 -0
  121. indx/vlm/azure.py +155 -0
  122. indx/vlm/base.py +15 -0
  123. indx/vlm/bedrock.py +139 -0
  124. indx/vlm/gpt4o.py +100 -0
  125. indx/vlm/local.py +142 -0
  126. indx/vlm/none.py +13 -0
  127. indx/vlm/qwen_vl.py +158 -0
  128. indx/vlm/vertex.py +122 -0
  129. indx-0.0.1.dist-info/METADATA +306 -0
  130. indx-0.0.1.dist-info/RECORD +134 -0
  131. indx-0.0.1.dist-info/WHEEL +4 -0
  132. indx-0.0.1.dist-info/entry_points.txt +61 -0
  133. indx-0.0.1.dist-info/licenses/LICENSE +201 -0
  134. indx-0.0.1.dist-info/licenses/NOTICE +16 -0
indx/__init__.py ADDED
@@ -0,0 +1,36 @@
1
+ """indx — make directories AI-ready, not just files.
2
+
3
+ Public SDK surface. The CLI is this same surface with handles (CLI⇄SDK parity).
4
+ """
5
+
6
+ from indx._version import __version__
7
+ from indx.core import (
8
+ Chunk,
9
+ Document,
10
+ KnowledgeSpace,
11
+ Manifest,
12
+ ParsedDoc,
13
+ Relation,
14
+ RelationType,
15
+ Source,
16
+ SpaceContext,
17
+ SpaceStats,
18
+ )
19
+ from indx.pipeline import DirectoryPipeline
20
+ from indx.store.base import SearchHit
21
+
22
+ __all__ = [
23
+ "__version__",
24
+ "DirectoryPipeline",
25
+ "KnowledgeSpace",
26
+ "Manifest",
27
+ "Document",
28
+ "Chunk",
29
+ "Relation",
30
+ "RelationType",
31
+ "ParsedDoc",
32
+ "Source",
33
+ "SearchHit",
34
+ "SpaceContext",
35
+ "SpaceStats",
36
+ ]
indx/_version.py ADDED
@@ -0,0 +1,3 @@
1
+ """Single source of version truth."""
2
+
3
+ __version__ = "0.0.1"
indx/agent/__init__.py ADDED
@@ -0,0 +1,54 @@
1
+ """indx.agent — plug a knowledge space into any AI agent, USB-drive style.
2
+
3
+ A ``.indx`` archive is a portable knowledge space. This package is the plug: one call turns
4
+ it into tools for whichever agent framework you use.
5
+
6
+ ```python
7
+ from indx.agent import connect
8
+
9
+ kb = connect("ai-ready/handbook.indx") # load the "USB drive"
10
+
11
+ kb.langchain() # LangChain StructuredTools (+ kb.langchain_retriever())
12
+ kb.openai() # OpenAI Agents SDK function tools
13
+ kb.pydantic_ai() # Pydantic AI tools
14
+ kb.claude() # Claude Agent SDK in-process MCP server
15
+ kb.mcp() # FastMCP server — Mastra & any MCP client
16
+ ```
17
+
18
+ Or run it as a standalone MCP server from the shell — ``indx mcp ai-ready/handbook.indx`` —
19
+ and connect Claude Desktop, Cursor, or Mastra to it with no Python glue.
20
+
21
+ Importing this package is safe on a bare ``pip install indx``: the framework adapters are
22
+ imported lazily and each gates on its own optional extra (``indx[langchain]``,
23
+ ``indx[openai-agents]``, ``indx[pydantic-ai]``, ``indx[claude-agent]``, ``indx[mcp]`` — or
24
+ ``indx[agent]`` for all of them).
25
+ """
26
+
27
+ from indx.agent.connector import KnowledgeConnector, connect
28
+ from indx.agent.schema import (
29
+ GET_DOCUMENT_TOOL,
30
+ OVERVIEW_TOOL,
31
+ SEARCH_TOOL,
32
+ TOOLS,
33
+ DocumentCard,
34
+ DocumentDetail,
35
+ Hit,
36
+ SearchResults,
37
+ SpaceOverview,
38
+ ToolDef,
39
+ )
40
+
41
+ __all__ = [
42
+ "connect",
43
+ "KnowledgeConnector",
44
+ "Hit",
45
+ "SearchResults",
46
+ "DocumentCard",
47
+ "DocumentDetail",
48
+ "SpaceOverview",
49
+ "ToolDef",
50
+ "TOOLS",
51
+ "SEARCH_TOOL",
52
+ "OVERVIEW_TOOL",
53
+ "GET_DOCUMENT_TOOL",
54
+ ]
@@ -0,0 +1,62 @@
1
+ """Claude Agent SDK adapter: expose a knowledge space as an in-process MCP server.
2
+
3
+ The Claude Agent SDK consumes tools as MCP servers. :func:`to_claude_mcp_server` builds an
4
+ *in-process* SDK MCP server (no subprocess, no socket) from the canonical operations; hand it
5
+ to ``ClaudeAgentOptions(mcp_servers={"indx": server})`` and the agent can search the space.
6
+
7
+ The ``claude_agent_sdk`` package is the optional ``claude-agent`` extra, imported lazily and
8
+ gated by :func:`~indx.utils.lazy.require_extra`; importing this module is always safe.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Any
14
+
15
+ from indx.agent.connector import KnowledgeConnector
16
+ from indx.agent.schema import GET_DOCUMENT_TOOL, OVERVIEW_TOOL, SEARCH_TOOL
17
+ from indx.utils.lazy import require_extra
18
+
19
+
20
+ def _text_result(payload: Any) -> dict[str, Any]:
21
+ """Wrap a JSON-able payload in the SDK's ``{"content": [{"type": "text", ...}]}`` shape."""
22
+ import json
23
+
24
+ return {"content": [{"type": "text", "text": json.dumps(payload, ensure_ascii=False)}]}
25
+
26
+
27
+ def to_claude_mcp_server(connector: KnowledgeConnector, *, name: str = "indx") -> Any:
28
+ """Build an in-process Claude Agent SDK MCP server exposing the space's tools."""
29
+ require_extra("agent connector", "claude-agent", "claude-agent", "claude_agent_sdk")
30
+ from claude_agent_sdk import ( # type: ignore[import-not-found] # optional extra: claude-agent
31
+ create_sdk_mcp_server,
32
+ tool,
33
+ )
34
+
35
+ @tool( # type: ignore[untyped-decorator]
36
+ SEARCH_TOOL.name,
37
+ SEARCH_TOOL.description,
38
+ {"query": str, "k": int, "doc_type": str},
39
+ )
40
+ async def search(args: dict[str, Any]) -> dict[str, Any]:
41
+ return _text_result(
42
+ connector.call(
43
+ SEARCH_TOOL.name,
44
+ {
45
+ "query": args["query"],
46
+ "k": args.get("k", connector.default_k),
47
+ "doc_type": args.get("doc_type"),
48
+ },
49
+ )
50
+ )
51
+
52
+ @tool(OVERVIEW_TOOL.name, OVERVIEW_TOOL.description, {"sample": int}) # type: ignore[untyped-decorator]
53
+ async def overview(args: dict[str, Any]) -> dict[str, Any]:
54
+ return _text_result(connector.call(OVERVIEW_TOOL.name, {"sample": args.get("sample", 10)}))
55
+
56
+ @tool(GET_DOCUMENT_TOOL.name, GET_DOCUMENT_TOOL.description, {"path_or_id": str}) # type: ignore[untyped-decorator]
57
+ async def get_document(args: dict[str, Any]) -> dict[str, Any]:
58
+ return _text_result(
59
+ connector.call(GET_DOCUMENT_TOOL.name, {"path_or_id": args["path_or_id"]})
60
+ )
61
+
62
+ return create_sdk_mcp_server(name=name, tools=[search, overview, get_document])
@@ -0,0 +1,309 @@
1
+ """KnowledgeConnector — plug a knowledge space into any AI agent, USB-drive style.
2
+
3
+ A ``.indx`` archive is a portable knowledge space: the "USB drive" you carry between
4
+ machines and agents. :class:`KnowledgeConnector` is the plug. It wraps a
5
+ :class:`~indx.core.knowledge_space.KnowledgeSpace` and exposes a tiny, stable set of
6
+ agent operations — **search**, **overview**, **get_document** — plus one-call adapters that
7
+ hand those operations to whichever agent framework you use:
8
+
9
+ * :meth:`~KnowledgeConnector.langchain` / :meth:`~KnowledgeConnector.langchain_retriever`
10
+ * :meth:`~KnowledgeConnector.openai` (OpenAI Agents SDK)
11
+ * :meth:`~KnowledgeConnector.pydantic_ai` (Pydantic AI)
12
+ * :meth:`~KnowledgeConnector.claude` (Claude Agent SDK, in-process MCP server)
13
+ * :meth:`~KnowledgeConnector.mcp` (Model Context Protocol — Mastra & any client)
14
+
15
+ For frameworks not covered, :meth:`openai_schema` / :meth:`anthropic_schema` emit raw
16
+ tool specs and :meth:`call` dispatches a tool call by name — enough to wire the bare
17
+ Chat Completions / Messages API by hand.
18
+
19
+ This module imports **no vendor SDKs at top level**; every adapter is imported lazily inside
20
+ its method and gated by :func:`~indx.utils.lazy.require_extra`, so ``import indx.agent`` is
21
+ safe on a bare ``pip install indx`` (file-architecture §5, coding-standards §6.3).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from pathlib import Path
27
+ from typing import TYPE_CHECKING, Any
28
+
29
+ from indx.agent.schema import (
30
+ GET_DOCUMENT_TOOL,
31
+ OVERVIEW_TOOL,
32
+ SEARCH_TOOL,
33
+ TOOLS,
34
+ DocumentCard,
35
+ DocumentDetail,
36
+ Hit,
37
+ SearchResults,
38
+ SpaceOverview,
39
+ ToolDef,
40
+ )
41
+
42
+ if TYPE_CHECKING: # pragma: no cover - typing only
43
+ from indx.core.knowledge_space import KnowledgeSpace
44
+
45
+
46
+ class KnowledgeConnector:
47
+ """An agent-ready handle on a single knowledge space.
48
+
49
+ Construct it directly from an in-memory space, or use :meth:`open` / the module-level
50
+ :func:`connect` to load a ``.indx`` archive (or an output directory) from disk.
51
+
52
+ Attributes:
53
+ name: A short identifier for the space, surfaced to the agent in tool descriptions
54
+ and used as the default MCP server name.
55
+ default_k: The number of hits :meth:`search` returns when ``k`` is unset.
56
+ with_context: When true, every :meth:`search` hit carries its neighbor chunks' text
57
+ in ``hit.context`` for wider grounding windows.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ space: KnowledgeSpace,
63
+ *,
64
+ name: str = "indx",
65
+ default_k: int = 5,
66
+ with_context: bool = False,
67
+ ) -> None:
68
+ self._space = space
69
+ self.name = name
70
+ self.default_k = default_k
71
+ self.with_context = with_context
72
+
73
+ @classmethod
74
+ def open(
75
+ cls,
76
+ source: str | Path | KnowledgeSpace,
77
+ *,
78
+ name: str | None = None,
79
+ default_k: int = 5,
80
+ with_context: bool = False,
81
+ ) -> KnowledgeConnector:
82
+ """Load a knowledge space from ``source`` and wrap it.
83
+
84
+ ``source`` may be an already-loaded :class:`KnowledgeSpace`, a path to a ``.indx``
85
+ archive, or an output directory containing one (the same inputs ``indx inspect`` /
86
+ ``indx query`` accept). ``name`` defaults to the archive's file stem.
87
+ """
88
+ from indx.core.knowledge_space import KnowledgeSpace
89
+
90
+ if isinstance(source, KnowledgeSpace):
91
+ space = source
92
+ label = name or "indx"
93
+ else:
94
+ # Reuse the CLI loader so the connector accepts every on-disk shape the CLI does
95
+ # (a .indx file, a directory holding one, or a jsonl output directory).
96
+ from indx.cli._render import load_space
97
+
98
+ path = Path(source)
99
+ space = load_space(path)
100
+ label = name or (path.stem if path.is_file() else path.name or "indx")
101
+
102
+ return cls(space, name=label, default_k=default_k, with_context=with_context)
103
+
104
+ @property
105
+ def space(self) -> KnowledgeSpace:
106
+ """The wrapped :class:`KnowledgeSpace` (read access for advanced callers)."""
107
+ return self._space
108
+
109
+ # ------------------------------------------------------------------ operations
110
+
111
+ def search(
112
+ self,
113
+ query: str,
114
+ k: int | None = None,
115
+ doc_type: str | None = None,
116
+ *,
117
+ with_context: bool | None = None,
118
+ ) -> SearchResults:
119
+ """Semantic search over the space; the backbone of the ``indx_search`` tool.
120
+
121
+ Routes through :meth:`KnowledgeSpace.search` (CLI ⇄ SDK parity), then flattens each hit
122
+ into a JSON-primitive :class:`~indx.agent.schema.Hit`. When ``doc_type`` is given,
123
+ results are filtered to that detected type, over-fetching first so a full ``k`` can
124
+ still come back.
125
+ """
126
+ k = k or self.default_k
127
+ want_context = self.with_context if with_context is None else with_context
128
+
129
+ raw = self._space.search(query, k=k * 5 if doc_type else k)
130
+
131
+ hits: list[Hit] = []
132
+ for hit in raw:
133
+ doc = self._space.document(hit.chunk.doc_id)
134
+ hit_type = (hit.source.type if hit.source else None) or (doc.doc_type if doc else None)
135
+ if doc_type and (hit_type or "unknown") != doc_type:
136
+ continue
137
+ hits.append(
138
+ Hit(
139
+ chunk_id=hit.chunk.id,
140
+ document_id=hit.chunk.doc_id,
141
+ score=hit.score,
142
+ text=hit.chunk.text,
143
+ source=(hit.source.path if hit.source else (doc.path if doc else None)),
144
+ folder=(hit.source.folder if hit.source else (doc.folder if doc else "")),
145
+ doc_type=hit_type,
146
+ topics=list(doc.topics) if doc else [],
147
+ tags=list(doc.tags) if doc else [],
148
+ context=[c.text for c in hit.neighbors] if want_context else [],
149
+ )
150
+ )
151
+ if len(hits) >= k:
152
+ break
153
+
154
+ return SearchResults(query=query, count=len(hits), hits=hits)
155
+
156
+ def overview(self, sample: int = 10) -> SpaceOverview:
157
+ """Summarize the space; the backbone of the ``indx_overview`` tool."""
158
+ stats = self._space.stats
159
+ cards = [self._card(doc) for doc in self._space.documents()[: max(sample, 0)]]
160
+ return SpaceOverview(
161
+ name=self.name,
162
+ documents=stats.documents,
163
+ chunks=stats.chunks,
164
+ relations=stats.relations,
165
+ embeddings=stats.embeddings,
166
+ embedding_model=self._space.manifest.embedding_model,
167
+ embedding_dim=stats.embed_dim,
168
+ types=dict(stats.types),
169
+ sample_documents=cards,
170
+ )
171
+
172
+ def get_document(self, path_or_id: str) -> DocumentDetail | None:
173
+ """Fetch one document's full text + metadata; the backbone of ``indx_get_document``.
174
+
175
+ Resolves ``path_or_id`` against document ids first, then exact paths, then a path
176
+ suffix match (so ``remote-work.md`` finds ``people/remote-work.md``). Returns ``None``
177
+ when nothing matches.
178
+ """
179
+ doc = self._space.document(path_or_id)
180
+ if doc is None:
181
+ docs = self._space.documents()
182
+ doc = next((d for d in docs if d.path == path_or_id), None)
183
+ if doc is None:
184
+ doc = next((d for d in docs if d.path.endswith(path_or_id)), None)
185
+ if doc is None:
186
+ return None
187
+
188
+ chunks = sorted(self._space.chunks_for(doc.id), key=lambda c: c.position)
189
+ card = self._card(doc)
190
+ return DocumentDetail(
191
+ **card.model_dump(),
192
+ chunk_count=len(chunks),
193
+ text="\n\n".join(c.text for c in chunks),
194
+ )
195
+
196
+ @staticmethod
197
+ def _card(doc: Any) -> DocumentCard:
198
+ return DocumentCard(
199
+ id=doc.id,
200
+ path=doc.path,
201
+ doc_type=doc.doc_type,
202
+ folder=doc.folder,
203
+ topics=list(doc.topics),
204
+ tags=list(doc.tags),
205
+ summary=doc.summary,
206
+ )
207
+
208
+ # ------------------------------------------------------------------ raw specs
209
+
210
+ def tools(self) -> list[ToolDef]:
211
+ """The canonical, framework-agnostic tool definitions for this space."""
212
+ return list(TOOLS)
213
+
214
+ def openai_schema(self) -> list[dict[str, Any]]:
215
+ """Tool specs in OpenAI Chat Completions / Responses ``tools=[...]`` shape."""
216
+ return [{"type": "function", "function": t.model_dump()} for t in self.tools()]
217
+
218
+ def anthropic_schema(self) -> list[dict[str, Any]]:
219
+ """Tool specs in Anthropic Messages API ``tools=[...]`` shape."""
220
+ return [
221
+ {"name": t.name, "description": t.description, "input_schema": t.parameters}
222
+ for t in self.tools()
223
+ ]
224
+
225
+ def call(self, name: str, arguments: dict[str, Any] | None = None) -> dict[str, Any]:
226
+ """Dispatch a tool call by ``name`` and return a JSON-able result.
227
+
228
+ This is the single execution path every adapter and the MCP server funnel through, so
229
+ a tool behaves identically regardless of which framework invoked it. Unknown names
230
+ raise :class:`ValueError`.
231
+ """
232
+ args = arguments or {}
233
+ if name == SEARCH_TOOL.name:
234
+ return self.search(
235
+ query=args["query"],
236
+ k=args.get("k"),
237
+ doc_type=args.get("doc_type"),
238
+ ).model_dump(mode="json")
239
+ if name == OVERVIEW_TOOL.name:
240
+ return self.overview(sample=args.get("sample", 10)).model_dump(mode="json")
241
+ if name == GET_DOCUMENT_TOOL.name:
242
+ detail = self.get_document(args["path_or_id"])
243
+ if detail is None:
244
+ return {"error": f"no document matching {args['path_or_id']!r}"}
245
+ return detail.model_dump(mode="json")
246
+ raise ValueError(f"unknown tool {name!r}; known tools: {[t.name for t in TOOLS]}")
247
+
248
+ # ------------------------------------------------------------------ adapters
249
+
250
+ def langchain(self) -> list[Any]:
251
+ """Return LangChain ``StructuredTool``s for this space (needs ``indx[langchain]``)."""
252
+ from indx.agent.langchain import to_langchain_tools
253
+
254
+ return to_langchain_tools(self)
255
+
256
+ def langchain_retriever(self, k: int | None = None) -> Any:
257
+ """Return a LangChain ``BaseRetriever`` over this space (needs ``indx[langchain]``)."""
258
+ from indx.agent.langchain import to_langchain_retriever
259
+
260
+ return to_langchain_retriever(self, k=k or self.default_k)
261
+
262
+ def openai(self) -> list[Any]:
263
+ """Return OpenAI Agents SDK ``function_tool``s (needs ``indx[openai-agents]``)."""
264
+ from indx.agent.openai_agents import to_openai_agent_tools
265
+
266
+ return to_openai_agent_tools(self)
267
+
268
+ def pydantic_ai(self) -> list[Any]:
269
+ """Return Pydantic AI ``Tool``s for this space (needs ``indx[pydantic-ai]``)."""
270
+ from indx.agent.pydantic_ai import to_pydantic_ai_tools
271
+
272
+ return to_pydantic_ai_tools(self)
273
+
274
+ def claude(self, *, name: str | None = None) -> Any:
275
+ """Return an in-process Claude Agent SDK MCP server (needs ``indx[claude-agent]``)."""
276
+ from indx.agent.claude_agent import to_claude_mcp_server
277
+
278
+ return to_claude_mcp_server(self, name=name or self.name)
279
+
280
+ def mcp(self, *, name: str | None = None) -> Any:
281
+ """Return a ``FastMCP`` server exposing this space (needs ``indx[mcp]``)."""
282
+ from indx.agent.mcp import build_mcp_server
283
+
284
+ return build_mcp_server(self, name=name or self.name)
285
+
286
+ def serve(self, *, transport: str = "stdio", name: str | None = None) -> None:
287
+ """Run an MCP server over ``transport`` until interrupted (needs ``indx[mcp]``).
288
+
289
+ This is what ``indx mcp <archive>`` calls: it turns the knowledge space into a live
290
+ MCP endpoint that Claude Desktop, Mastra, Cursor, or any MCP client can connect to.
291
+ """
292
+ self.mcp(name=name).run(transport=transport)
293
+
294
+
295
+ def connect(
296
+ source: str | Path | KnowledgeSpace,
297
+ *,
298
+ name: str | None = None,
299
+ default_k: int = 5,
300
+ with_context: bool = False,
301
+ ) -> KnowledgeConnector:
302
+ """Plug a knowledge space into an agent in one line — ``connect("space.indx")``.
303
+
304
+ A thin alias for :meth:`KnowledgeConnector.open`; the headline entry point of
305
+ :mod:`indx.agent`.
306
+ """
307
+ return KnowledgeConnector.open(
308
+ source, name=name, default_k=default_k, with_context=with_context
309
+ )
@@ -0,0 +1,106 @@
1
+ """LangChain adapter: expose a knowledge space as tools and a retriever.
2
+
3
+ Two integration points, both built on :class:`~indx.agent.connector.KnowledgeConnector`:
4
+
5
+ * :func:`to_langchain_tools` — ``StructuredTool``s an agent can call (search / overview /
6
+ get_document), the agentic path.
7
+ * :func:`to_langchain_retriever` — a ``BaseRetriever`` that returns LangChain ``Document``s,
8
+ the classic RAG path (drop it into any retrieval chain).
9
+
10
+ ``langchain-core`` is the optional ``langchain`` extra, imported lazily and gated by
11
+ :func:`~indx.utils.lazy.require_extra`; importing this module is always safe.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import TYPE_CHECKING, Any
17
+
18
+ from indx.agent.connector import KnowledgeConnector
19
+ from indx.agent.schema import GET_DOCUMENT_TOOL, OVERVIEW_TOOL, SEARCH_TOOL
20
+ from indx.utils.lazy import require_extra
21
+
22
+ if TYPE_CHECKING: # pragma: no cover - typing only
23
+ from langchain_core.callbacks import ( # type: ignore[import-not-found] # optional extra: langchain
24
+ CallbackManagerForRetrieverRun,
25
+ )
26
+ from langchain_core.documents import ( # type: ignore[import-not-found] # optional extra: langchain
27
+ Document,
28
+ )
29
+ from langchain_core.tools import ( # type: ignore[import-not-found] # optional extra: langchain
30
+ StructuredTool,
31
+ )
32
+
33
+
34
+ def to_langchain_tools(connector: KnowledgeConnector) -> list[StructuredTool]:
35
+ """Build LangChain ``StructuredTool``s (search / overview / get_document) for the space."""
36
+ require_extra("agent connector", "langchain", "langchain", "langchain_core")
37
+ from langchain_core.tools import StructuredTool # optional extra: langchain
38
+
39
+ def _search(query: str, k: int = 5, doc_type: str | None = None) -> dict[str, Any]:
40
+ return connector.call(SEARCH_TOOL.name, {"query": query, "k": k, "doc_type": doc_type})
41
+
42
+ def _overview(sample: int = 10) -> dict[str, Any]:
43
+ return connector.call(OVERVIEW_TOOL.name, {"sample": sample})
44
+
45
+ def _get_document(path_or_id: str) -> dict[str, Any]:
46
+ return connector.call(GET_DOCUMENT_TOOL.name, {"path_or_id": path_or_id})
47
+
48
+ return [
49
+ StructuredTool.from_function(
50
+ func=_search, name=SEARCH_TOOL.name, description=SEARCH_TOOL.description
51
+ ),
52
+ StructuredTool.from_function(
53
+ func=_overview, name=OVERVIEW_TOOL.name, description=OVERVIEW_TOOL.description
54
+ ),
55
+ StructuredTool.from_function(
56
+ func=_get_document,
57
+ name=GET_DOCUMENT_TOOL.name,
58
+ description=GET_DOCUMENT_TOOL.description,
59
+ ),
60
+ ]
61
+
62
+
63
+ def to_langchain_retriever(connector: KnowledgeConnector, *, k: int = 5) -> Any:
64
+ """Build a LangChain ``BaseRetriever`` that returns ``Document``s from the space.
65
+
66
+ Each retrieved ``Document`` carries the chunk text as ``page_content`` and the hit's
67
+ provenance (source path, document type, score, topics, tags) as JSON-primitive
68
+ ``metadata`` — the same metadata shape the ``langchain`` output writer emits.
69
+ """
70
+ require_extra("agent connector", "langchain", "langchain", "langchain_core")
71
+ from langchain_core.documents import Document # optional extra: langchain
72
+ from langchain_core.retrievers import ( # type: ignore[import-not-found] # optional extra: langchain
73
+ BaseRetriever,
74
+ )
75
+
76
+ class IndxRetriever(BaseRetriever): # type: ignore[misc] # BaseRetriever is Any without the extra
77
+ """Retrieve indx knowledge-space chunks as LangChain ``Document``s."""
78
+
79
+ connector: Any
80
+ k: int = 5
81
+
82
+ def _get_relevant_documents(
83
+ self,
84
+ query: str,
85
+ *,
86
+ run_manager: CallbackManagerForRetrieverRun | None = None,
87
+ ) -> list[Document]:
88
+ results = self.connector.search(query, k=self.k)
89
+ return [
90
+ Document(
91
+ id=hit.chunk_id,
92
+ page_content=hit.text,
93
+ metadata={
94
+ "doc_id": hit.document_id,
95
+ "score": hit.score,
96
+ "source": hit.source,
97
+ "folder": hit.folder,
98
+ "doc_type": hit.doc_type,
99
+ "topics": hit.topics,
100
+ "tags": hit.tags,
101
+ },
102
+ )
103
+ for hit in results.hits
104
+ ]
105
+
106
+ return IndxRetriever(connector=connector, k=k)
indx/agent/mcp.py ADDED
@@ -0,0 +1,72 @@
1
+ """MCP server: serve a knowledge space over the Model Context Protocol.
2
+
3
+ MCP is the universal connector — Claude Desktop, Cursor, Mastra (TypeScript), and any other
4
+ MCP client speak it, so one ``indx mcp <archive>`` command plugs a knowledge space into all
5
+ of them, no Python glue on the client side. :func:`build_mcp_server` builds a ``FastMCP``
6
+ server exposing the canonical search / overview / get_document tools;
7
+ :meth:`KnowledgeConnector.serve` runs it.
8
+
9
+ FastMCP does the heavy lifting: it derives each tool's JSON schema from the handler's typed
10
+ signature and owns the transport loop. We prefer the standalone, batteries-included
11
+ ``fastmcp`` package (v2) and fall back to the ``FastMCP`` bundled in the official ``mcp`` SDK
12
+ (v1) — either satisfies the ``indx[mcp]`` extra. Imports are lazy, so importing this module is
13
+ always safe on a core-only install.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any
19
+
20
+ from indx.agent.connector import KnowledgeConnector
21
+ from indx.errors import MissingExtraError
22
+
23
+
24
+ def _load_fastmcp() -> Any:
25
+ """Return a ``FastMCP`` class, preferring the standalone ``fastmcp`` over the bundled one.
26
+
27
+ Raises :class:`MissingExtraError` (the standard ``pip install indx[mcp]`` message) if
28
+ neither is installed — mirroring :func:`~indx.utils.lazy.require_extra` for the
29
+ "either of two modules" case it can't express directly.
30
+ """
31
+ try:
32
+ from fastmcp import FastMCP # type: ignore[import-not-found] # optional extra: mcp
33
+
34
+ return FastMCP
35
+ except ModuleNotFoundError:
36
+ pass
37
+ try:
38
+ from mcp.server.fastmcp import ( # type: ignore[import-not-found] # optional extra: mcp
39
+ FastMCP,
40
+ )
41
+
42
+ return FastMCP
43
+ except ModuleNotFoundError:
44
+ raise MissingExtraError(slot="agent connector", name="mcp", extra="mcp") from None
45
+
46
+
47
+ def build_mcp_server(connector: KnowledgeConnector, *, name: str | None = None) -> Any:
48
+ """Build a ``FastMCP`` server exposing the space's tools (search/overview/get_document).
49
+
50
+ Tools are registered with ``add_tool`` (rather than the ``@server.tool`` decorator) so the
51
+ handler functions keep their static types — mypy stays strict over this module.
52
+ """
53
+ fast_mcp = _load_fastmcp()
54
+ server = fast_mcp(name or connector.name)
55
+
56
+ def indx_search(query: str, k: int = 5, doc_type: str | None = None) -> dict[str, Any]:
57
+ """Semantic search over the indx knowledge space."""
58
+ return connector.search(query, k=k, doc_type=doc_type).model_dump(mode="json")
59
+
60
+ def indx_overview(sample: int = 10) -> dict[str, Any]:
61
+ """Describe the knowledge space: counts, types, sample documents."""
62
+ return connector.overview(sample=sample).model_dump(mode="json")
63
+
64
+ def indx_get_document(path_or_id: str) -> dict[str, Any]:
65
+ """Fetch one document's full text and metadata by path or id."""
66
+ detail = connector.get_document(path_or_id)
67
+ return detail.model_dump(mode="json") if detail else {"error": "not found"}
68
+
69
+ server.add_tool(indx_search)
70
+ server.add_tool(indx_overview)
71
+ server.add_tool(indx_get_document)
72
+ return server