model-tree-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ """
2
+ model-tree MCP server.
3
+
4
+ Exposes two things to the client (Claude Code, etc.):
5
+
6
+ - tool `search_models`: queries the public /api/search endpoint and returns the closest
7
+ models to a described situation, each with its metadata (including `stat_fit`, the
8
+ statistical-fit profile used to match a model to a dataset).
9
+ - prompt `analyze_dataset`: orchestrates a dataset-grounded recommendation. The reasoning
10
+ (investigation + EDA) runs in the AGENT, on the user's tokens; the raw data never leaves
11
+ the machine. The package itself reads no data and holds no secrets.
12
+
13
+ Client config:
14
+ "model-tree": { "command": "uvx", "args": ["model-tree-mcp"] }
15
+
16
+ The endpoint URL can be overridden with the MODEL_TREE_API env var.
17
+ """
18
+
19
+ import os
20
+ import httpx
21
+ from mcp.server.fastmcp import FastMCP
22
+
23
+ ENDPOINT = os.environ.get(
24
+ "MODEL_TREE_API",
25
+ "https://model-tree.vercel.app/api/search",
26
+ )
27
+
28
+ mcp = FastMCP("model-tree")
29
+
30
+
31
+ @mcp.tool()
32
+ def search_models(situation: str, top_k: int = 8) -> list[dict]:
33
+ """Search the taxonomy for the predictive models closest to a situation.
34
+
35
+ Pass an ENGLISH description enriched with technical vocabulary (task, target
36
+ distribution, n/p regime, feature types, desired loss/metric), not the user's
37
+ raw words: this widens recall.
38
+
39
+ Each returned model carries its fields (diff_siblings, strengths, weaknesses,
40
+ recommended_for, not_recommended_for, keywords) and `stat_fit` (statistical-fit
41
+ profile: target type/distribution, data regime, feature types, assumptions,
42
+ supported loss, contraindications). Use `stat_fit` and the desired loss to judge
43
+ how well each candidate fits the dataset.
44
+
45
+ Args:
46
+ situation: technical English description of the problem/data/constraints.
47
+ top_k: how many candidates to return (default 8; pick 3-4 final ones).
48
+ """
49
+ resp = httpx.post(
50
+ ENDPOINT,
51
+ json={"situacao": situation, "topK": top_k},
52
+ timeout=30,
53
+ )
54
+ resp.raise_for_status()
55
+ return resp.json().get("modelos", [])
56
+
57
+
58
+ @mcp.prompt()
59
+ def analyze_dataset(data_path: str = "") -> str:
60
+ """Recommend predictive models by analyzing a local dataset.
61
+
62
+ Investigates the problem, runs an EDA (deep or shallow), searches the taxonomy,
63
+ and recommends 3-4 models with trade-offs, grounded in the dataset's statistics.
64
+ """
65
+ target = data_path or "the data file/directory indicated by the user"
66
+ return f"""You are a senior data-science tutor helping choose predictive models for a real dataset. The data is at: {target}.
67
+
68
+ Work conversationally, like a tutor — never dump everything at once. Follow these phases:
69
+
70
+ PHASE 1 — INVESTIGATE (one decisive question at a time, wait for the answer):
71
+ - Which column is the TARGET (or is this unsupervised)?
72
+ - What LOSS / METRIC matters to the user (e.g. RMSE, MAE, quantile/pinball, log-loss, AUC, CRPS, business cost)? This is a tie-breaker later.
73
+ - DEEP or SHALLOW analysis? Deep = I read the raw data and profile it now. Shallow = I read the user's prior EDA artifacts (notebooks, profiling reports) and infer from them.
74
+ Do not ask all three as a list; ask the most decisive one first and adapt.
75
+
76
+ PHASE 2 — EDA (you do this with your own tools; the raw data NEVER leaves the machine):
77
+ - DEEP: read the data at the path, then profile: target type (continuous / count / binary / multiclass / ordinal / proportion / time-to-event / time-series) and its empirical distribution (e.g. looks Poisson, heavy-tailed, bimodal); n rows and n features; feature types (numeric / categorical incl. high-cardinality / text / image / temporal); missingness; class balance; relevant moments (mean/median/variance) only where they inform the choice; and which features could be DERIVED (dates → seasonality/lags, text → embeddings).
78
+ - SHALLOW: locate and read the user's existing EDA outputs and extract the same profile from them; state what you could not determine.
79
+ - Summarize the profile back to the user briefly before recommending.
80
+
81
+ PHASE 3 — SEARCH:
82
+ - Call the tool `search_models` ONCE with a concise ENGLISH query enriched from the profile (task + target distribution + n/p regime + feature types + desired loss), not the user's raw words.
83
+
84
+ PHASE 4 — RECOMMEND 3-4 models with trade-offs:
85
+ - Ground every recommendation ONLY in the returned candidates and their fields, especially each candidate's `stat_fit` (target/distribution, data regime, feature types, assumptions, supported `loss`, contraindications). Do not invent models.
86
+ - Match against the profile AND the user's loss: a candidate that does not support the desired loss is a weaker fit even if otherwise suitable (e.g. quantile loss → quantile regression or gradient boosting with a quantile objective; calibrated uncertainty / CRPS → probabilistic models like NGBoost).
87
+ - The 3-4 models MUST be meaningfully DISTINCT options that span the decision space, not variants of the same approach (e.g. for count data with excess zeros: a Negative Binomial baseline, a Zero-Inflated model, and a Hurdle model — NOT ZIP and ZINB, which are the same family).
88
+ - Lead with the best fit for the task at the current state of the art; keep valid classics (linear/logistic regression, random forest, ARIMA) as first-class when they fit; flag when a candidate is contraindicated for this dataset (e.g. overdispersed counts → negative binomial over Poisson; tiny n → avoid heavy deep models).
89
+ - For each: one line on WHY it fits this profile + its key trade-off. End by offering to go deeper on any of them.
90
+ - Be honest: if the dataset is ill-posed or no candidate truly fits, say so and explain what would be needed.
91
+
92
+ LANGUAGE: reply in the language the user writes in; default to English. Keep proper names of models, libraries and metrics in their conventional (English) form."""
93
+
94
+
95
+ def main():
96
+ mcp.run()
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: model-tree-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server que recomenda modelos preditivos da árvore model-tree.
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: mcp>=1.2.0
8
+ Description-Content-Type: text/markdown
9
+
10
+ # model-tree-mcp
11
+
12
+ Servidor MCP que recomenda modelos preditivos (ML, Deep Learning, Estatística
13
+ Clássica) a partir da descrição de uma situação em linguagem natural.
14
+
15
+ A tool consulta um endpoint hospedado que faz a busca vetorial na árvore de
16
+ 400+ modelos curados.
17
+
18
+ ## Uso (Claude Code / Claude Desktop)
19
+
20
+ Adicione ao seu config de MCP:
21
+
22
+ ```json
23
+ {
24
+ "mcpServers": {
25
+ "model-tree": {
26
+ "command": "uvx",
27
+ "args": ["model-tree-mcp"]
28
+ }
29
+ }
30
+ }
31
+ ```
32
+
33
+ O `uvx` baixa e roda o pacote num ambiente isolado, sem instalação manual.
34
+
35
+ ### Apontar para outro endpoint
36
+
37
+ Por padrão a tool chama o endpoint público oficial. Para usar outro (ex.: um
38
+ deploy próprio), defina a env var `MODEL_TREE_API`:
39
+
40
+ ```json
41
+ "env": { "MODEL_TREE_API": "https://seu-deploy.vercel.app/api/search" }
42
+ ```
43
+
44
+ ## Tool e prompt
45
+
46
+ - **Tool `search_models(situation: str, top_k: int = 8)`** — devolve os modelos mais
47
+ próximos da situação descrita, cada um com seus campos (diff_siblings, strengths,
48
+ weaknesses, recommended_for, not_recommended_for, keywords) e o `stat_fit` (perfil de
49
+ encaixe estatístico: tipo/distribuição do target, regime n/p, tipos de feature,
50
+ suposições, loss suportada, contraindicações).
51
+ - **Prompt `analyze_dataset(data_path)`** — orquestra a recomendação a partir de um
52
+ dataset local: o agente investiga (target, loss), faz a EDA (profunda no dado cru ou
53
+ rasa numa EDA prévia, com os tokens do usuário) e recomenda 3-4 modelos com tradeoffs.
54
+ Os dados crus nunca saem da máquina.
55
+
56
+ ## Desenvolvimento
57
+
58
+ ```bash
59
+ uv run model-tree-mcp # roda o server localmente (stdio)
60
+ uv build # gera o pacote distribuível
61
+ ```
@@ -0,0 +1,5 @@
1
+ model_tree_mcp/__init__.py,sha256=-3hxToGdlUwmayeO_W6kBJBPf39swSCu6czCbVpSdSs,5865
2
+ model_tree_mcp-0.1.0.dist-info/METADATA,sha256=X4Pk2EcJNZmlph07zwAj4oVgYhWr4M3wyjJ6UuMZhxM,1927
3
+ model_tree_mcp-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
4
+ model_tree_mcp-0.1.0.dist-info/entry_points.txt,sha256=MPqSI46BXJwJ1SkVhzbJ9EXxFFc1f6zsaW2zxKHgWgs,55
5
+ model_tree_mcp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ model-tree-mcp = model_tree_mcp:main