scmcp-shared 0.4.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/.github/workflows/test.yml +9 -3
- scmcp_shared-0.6.0/.pre-commit-config.yaml +29 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/PKG-INFO +6 -2
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/pyproject.toml +8 -1
- scmcp_shared-0.6.0/src/scmcp_shared/__init__.py +1 -0
- scmcp_shared-0.6.0/src/scmcp_shared/agent.py +47 -0
- scmcp_shared-0.6.0/src/scmcp_shared/backend.py +44 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/cli.py +75 -46
- scmcp_shared-0.6.0/src/scmcp_shared/kb.py +139 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/logging_config.py +6 -8
- scmcp_shared-0.6.0/src/scmcp_shared/mcp_base.py +184 -0
- scmcp_shared-0.6.0/src/scmcp_shared/schema/io.py +155 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/schema/pl.py +386 -490
- scmcp_shared-0.6.0/src/scmcp_shared/schema/pp.py +900 -0
- scmcp_shared-0.6.0/src/scmcp_shared/schema/preset/__init__.py +15 -0
- {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/io.py +40 -50
- scmcp_shared-0.6.0/src/scmcp_shared/schema/preset/pl.py +843 -0
- {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/pp.py +227 -262
- {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/tl.py +368 -403
- {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/util.py +57 -72
- scmcp_shared-0.6.0/src/scmcp_shared/schema/tl.py +900 -0
- scmcp_shared-0.6.0/src/scmcp_shared/schema/util.py +123 -0
- scmcp_shared-0.6.0/src/scmcp_shared/server/__init__.py +8 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/server/auto.py +15 -11
- scmcp_shared-0.6.0/src/scmcp_shared/server/code.py +3 -0
- scmcp_shared-0.6.0/src/scmcp_shared/server/preset/__init__.py +14 -0
- {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/io.py +26 -22
- {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/pl.py +162 -78
- {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/pp.py +123 -65
- {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/tl.py +142 -79
- {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/util.py +123 -66
- scmcp_shared-0.6.0/src/scmcp_shared/server/rag.py +13 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/util.py +109 -38
- scmcp_shared-0.6.0/tests/conftest.py +29 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/test_io.py +8 -6
- scmcp_shared-0.6.0/tests/test_pp.py +108 -0
- scmcp_shared-0.6.0/tests/test_select_tool.py +16 -0
- scmcp_shared-0.4.0/src/scmcp_shared/__init__.py +0 -3
- scmcp_shared-0.4.0/src/scmcp_shared/agent.py +0 -30
- scmcp_shared-0.4.0/src/scmcp_shared/server/__init__.py +0 -13
- scmcp_shared-0.4.0/src/scmcp_shared/server/base.py +0 -148
- scmcp_shared-0.4.0/tests/conftest.py +0 -31
- scmcp_shared-0.4.0/tests/test_pp.py +0 -119
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/.github/release.yml +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/.github/workflows/publish.yml +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/LICENSE +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/README.md +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/schema/__init__.py +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/schema/tool.py +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/data/hg19/barcodes.tsv +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/data/hg19/genes.tsv +0 -0
- {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/data/hg19/matrix.mtx +0 -0
@@ -2,13 +2,20 @@ name: test package
|
|
2
2
|
|
3
3
|
on:
|
4
4
|
push:
|
5
|
-
branches: [
|
5
|
+
branches: ["main"]
|
6
6
|
pull_request:
|
7
|
-
branches: [
|
7
|
+
branches: ["main"]
|
8
8
|
|
9
9
|
jobs:
|
10
10
|
test:
|
11
11
|
runs-on: ubuntu-latest
|
12
|
+
env:
|
13
|
+
MODEL: ${{ vars.MODEL }}
|
14
|
+
BASE_URL: ${{ vars.BASE_URL }}
|
15
|
+
API_KEY: ${{ secrets.API_KEY }}
|
16
|
+
EMBEDDER_MODEL: ${{ vars.EMBEDDER_MODEL }}
|
17
|
+
EMBEDDER_BASE_URL: ${{ vars.EMBEDDER_BASE_URL }}
|
18
|
+
EMBEDDER_API_KEY: ${{ secrets.EMBEDDER_API_KEY }}
|
12
19
|
strategy:
|
13
20
|
matrix:
|
14
21
|
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
@@ -31,4 +38,3 @@ jobs:
|
|
31
38
|
|
32
39
|
- name: Run pytest
|
33
40
|
run: uv run --no-sync pytest
|
34
|
-
|
@@ -0,0 +1,29 @@
|
|
1
|
+
fail_fast: true
|
2
|
+
|
3
|
+
repos:
|
4
|
+
- repo: https://github.com/abravalheri/validate-pyproject
|
5
|
+
rev: v0.23
|
6
|
+
hooks:
|
7
|
+
- id: validate-pyproject
|
8
|
+
|
9
|
+
- repo: https://github.com/pre-commit/mirrors-prettier
|
10
|
+
rev: v3.1.0
|
11
|
+
hooks:
|
12
|
+
- id: prettier
|
13
|
+
types_or: [yaml, json5]
|
14
|
+
|
15
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
16
|
+
# Ruff version.
|
17
|
+
rev: v0.12.1
|
18
|
+
hooks:
|
19
|
+
# Run the linter.
|
20
|
+
- id: ruff-check
|
21
|
+
args: [--fix]
|
22
|
+
# Run the formatter.
|
23
|
+
- id: ruff-format
|
24
|
+
|
25
|
+
# - repo: https://github.com/northisup/pyright-pretty
|
26
|
+
# rev: v0.1.0
|
27
|
+
# hooks:
|
28
|
+
# - id: pyright-pretty
|
29
|
+
# files: ^src/|^tests/
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: scmcp_shared
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: A shared function libray for scmcphub
|
5
5
|
Project-URL: Homepage, http://scmcphub.org/
|
6
6
|
Project-URL: Repository, https://github.com/scmcphub/scmcp-shared
|
@@ -37,12 +37,16 @@ License: BSD 3-Clause License
|
|
37
37
|
License-File: LICENSE
|
38
38
|
Keywords: AI,agent,bioinformatics,llm,mcp,model context protocol,scRNA-seq,single cell
|
39
39
|
Requires-Python: >=3.10
|
40
|
+
Requires-Dist: abcoder
|
41
|
+
Requires-Dist: agno
|
40
42
|
Requires-Dist: fastmcp>=2.7.0
|
41
43
|
Requires-Dist: igraph
|
42
|
-
Requires-Dist:
|
44
|
+
Requires-Dist: lancedb
|
43
45
|
Requires-Dist: leidenalg
|
44
46
|
Requires-Dist: mcp>=1.8.0
|
45
47
|
Requires-Dist: nest-asyncio
|
48
|
+
Requires-Dist: openai
|
49
|
+
Requires-Dist: requests
|
46
50
|
Requires-Dist: scanpy
|
47
51
|
Description-Content-Type: text/markdown
|
48
52
|
|
@@ -26,8 +26,12 @@ dependencies = [
|
|
26
26
|
"leidenalg",
|
27
27
|
"mcp>=1.8.0",
|
28
28
|
"fastmcp>=2.7.0",
|
29
|
+
"openai",
|
30
|
+
"lancedb",
|
31
|
+
"agno",
|
29
32
|
"nest_asyncio",
|
30
|
-
"
|
33
|
+
"abcoder",
|
34
|
+
"requests"
|
31
35
|
]
|
32
36
|
|
33
37
|
[build-system]
|
@@ -53,3 +57,6 @@ Documentation = "https://docs.scmcphub.org/"
|
|
53
57
|
[tool.pytest.ini_options]
|
54
58
|
asyncio_mode = "strict"
|
55
59
|
asyncio_default_fixture_loop_scope = "function"
|
60
|
+
|
61
|
+
[tool.ruff]
|
62
|
+
lint.ignore = ["F403", "F405"]
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.6.0"
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from .schema.tool import ToolList
|
2
|
+
import os
|
3
|
+
|
4
|
+
|
5
|
+
from agno.agent import Agent
|
6
|
+
from agno.models.openai import OpenAILike
|
7
|
+
from scmcp_shared.kb import load_kb
|
8
|
+
|
9
|
+
model = OpenAILike(
|
10
|
+
id=os.getenv("MODEL"),
|
11
|
+
base_url=os.getenv("BASE_URL"),
|
12
|
+
api_key=os.getenv("API_KEY"),
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
def rag_agent(task, software=None):
|
17
|
+
knowledge_base = load_kb(software=software)
|
18
|
+
agent = Agent(
|
19
|
+
model=model,
|
20
|
+
knowledge=knowledge_base,
|
21
|
+
show_tool_calls=True,
|
22
|
+
search_knowledge=True,
|
23
|
+
)
|
24
|
+
query = f"""
|
25
|
+
<task>
|
26
|
+
{task}
|
27
|
+
</task>
|
28
|
+
查询知识库,给出一个用于解决任务的代码示例。返回结果格式为:
|
29
|
+
<code_example>
|
30
|
+
[code_example]
|
31
|
+
</code_example>
|
32
|
+
"""
|
33
|
+
rep = agent.run(query)
|
34
|
+
return rep.content
|
35
|
+
|
36
|
+
|
37
|
+
def select_tool(query):
|
38
|
+
agent = Agent(
|
39
|
+
model=model,
|
40
|
+
response_model=ToolList,
|
41
|
+
use_json_mode=True,
|
42
|
+
instructions="""
|
43
|
+
you are a bioinformatician, you are given a task and a list of tools, you need to select the most directly relevant tools to use to solve the task
|
44
|
+
""",
|
45
|
+
)
|
46
|
+
rep = agent.run(query)
|
47
|
+
return rep.content
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from collections.abc import Iterable
|
2
|
+
from abcoder.backend import NotebookManager
|
3
|
+
|
4
|
+
__all__ = ["AdataManager", "NotebookManager"]
|
5
|
+
|
6
|
+
|
7
|
+
class AdataManager:
|
8
|
+
def __init__(self, add_adtypes=None):
|
9
|
+
self.adata_dic = {"exp": {}, "activity": {}, "cnv": {}, "splicing": {}}
|
10
|
+
if isinstance(add_adtypes, str):
|
11
|
+
self.adata_dic[add_adtypes] = {}
|
12
|
+
elif isinstance(add_adtypes, Iterable):
|
13
|
+
self.adata_dic.update({adtype: {} for adtype in add_adtypes})
|
14
|
+
self.active_id = None
|
15
|
+
self.metadatWa = {}
|
16
|
+
self.cr_kernel = {}
|
17
|
+
self.cr_estimator = {}
|
18
|
+
|
19
|
+
def get_adata(self, sampleid=None, adtype="exp", adinfo=None):
|
20
|
+
if adinfo is not None:
|
21
|
+
kwargs = adinfo.model_dump()
|
22
|
+
sampleid = kwargs.get("sampleid", None)
|
23
|
+
adtype = kwargs.get("adtype", "exp")
|
24
|
+
try:
|
25
|
+
if self.active_id is None:
|
26
|
+
return None
|
27
|
+
sampleid = sampleid or self.active_id
|
28
|
+
return self.adata_dic[adtype][sampleid]
|
29
|
+
except KeyError as e:
|
30
|
+
raise KeyError(
|
31
|
+
f"Key {e} not found in adata_dic[{adtype}].Please check the sampleid or adtype."
|
32
|
+
)
|
33
|
+
except Exception as e:
|
34
|
+
raise Exception(f"fuck {e} {type(e)}")
|
35
|
+
|
36
|
+
def set_adata(self, adata, sampleid=None, sdtype="exp", adinfo=None):
|
37
|
+
if adinfo is not None:
|
38
|
+
kwargs = adinfo.model_dump()
|
39
|
+
sampleid = kwargs.get("sampleid", None)
|
40
|
+
sdtype = kwargs.get("adtype", "exp")
|
41
|
+
sampleid = sampleid or self.active_id
|
42
|
+
if sdtype not in self.adata_dic:
|
43
|
+
self.adata_dic[sdtype] = {}
|
44
|
+
self.adata_dic[sdtype][sampleid] = adata
|
@@ -1,51 +1,69 @@
|
|
1
1
|
import argparse
|
2
|
-
from typing import Optional,
|
3
|
-
from enum import Enum
|
2
|
+
from typing import Optional, Dict, Callable
|
4
3
|
from .util import add_figure_route, set_env
|
5
4
|
import os
|
6
5
|
|
7
6
|
|
8
7
|
class MCPCLI:
|
9
8
|
"""Base class for CLI applications with support for dynamic modules and parameters."""
|
10
|
-
|
9
|
+
|
11
10
|
def __init__(self, name: str, help_text: str, mcp=None, manager=None):
|
12
11
|
self.name = name
|
13
12
|
self.mcp = mcp
|
14
13
|
self.manager = manager
|
15
|
-
self.parser = argparse.ArgumentParser(
|
16
|
-
description=help_text,
|
17
|
-
prog=name
|
18
|
-
)
|
14
|
+
self.parser = argparse.ArgumentParser(description=help_text, prog=name)
|
19
15
|
self.subcommands: Dict[str, tuple[argparse.ArgumentParser, Callable]] = {}
|
20
16
|
self._setup_commands()
|
21
|
-
|
17
|
+
|
22
18
|
def _setup_commands(self):
|
23
19
|
"""Setup the main commands for the CLI."""
|
24
|
-
subparsers = self.parser.add_subparsers(
|
25
|
-
|
20
|
+
subparsers = self.parser.add_subparsers(
|
21
|
+
dest="command", help="Available commands"
|
22
|
+
)
|
23
|
+
run_parser = subparsers.add_parser(
|
24
|
+
"run", help="Start the server with the specified configuration"
|
25
|
+
)
|
26
26
|
self._setup_run_command(run_parser)
|
27
|
-
self.subcommands[
|
28
|
-
|
27
|
+
self.subcommands["run"] = (run_parser, self._run_command)
|
28
|
+
|
29
29
|
def _setup_run_command(self, parser: argparse.ArgumentParser):
|
30
30
|
"""Setup run command arguments."""
|
31
|
-
parser.add_argument(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
parser.add_argument(
|
39
|
-
|
40
|
-
|
41
|
-
|
31
|
+
parser.add_argument(
|
32
|
+
"-t",
|
33
|
+
"--transport",
|
34
|
+
default="stdio",
|
35
|
+
choices=["stdio", "shttp", "sse"],
|
36
|
+
help="specify transport type",
|
37
|
+
)
|
38
|
+
parser.add_argument(
|
39
|
+
"-p", "--port", type=int, default=8000, help="transport port"
|
40
|
+
)
|
41
|
+
parser.add_argument("--host", default="127.0.0.1", help="transport host")
|
42
|
+
parser.add_argument("-f", "--forward", help="forward request to another server")
|
43
|
+
parser.add_argument(
|
44
|
+
"-wd", "--working-dir", default=".", help="working directory"
|
45
|
+
)
|
46
|
+
parser.add_argument(
|
47
|
+
"--run-mode", choices=["tool", "code"], default="code", help="run mode"
|
48
|
+
)
|
49
|
+
parser.add_argument(
|
50
|
+
"--tool-mode",
|
51
|
+
choices=["auto", "normal"],
|
52
|
+
default="normal",
|
53
|
+
help="tool selection mode",
|
54
|
+
)
|
55
|
+
parser.add_argument("--log-file", help="log file path, use stdout if None")
|
56
|
+
|
57
|
+
def add_command(
|
58
|
+
self, name: str, help_text: str, handler: Callable
|
59
|
+
) -> argparse.ArgumentParser:
|
42
60
|
"""add new subcommand
|
43
|
-
|
61
|
+
|
44
62
|
Args:
|
45
63
|
name: subcommand name
|
46
64
|
help_text: help text
|
47
65
|
handler: handler function
|
48
|
-
|
66
|
+
|
49
67
|
Returns:
|
50
68
|
ArgumentParser: parser for the subcommand
|
51
69
|
"""
|
@@ -53,24 +71,24 @@ class MCPCLI:
|
|
53
71
|
parser = subparsers.add_parser(name, help=help_text)
|
54
72
|
self.subcommands[name] = (parser, handler)
|
55
73
|
return parser
|
56
|
-
|
74
|
+
|
57
75
|
def get_command_parser(self, name: str) -> Optional[argparse.ArgumentParser]:
|
58
76
|
"""get the parser for the subcommand
|
59
|
-
|
77
|
+
|
60
78
|
Args:
|
61
79
|
name: subcommand name
|
62
|
-
|
80
|
+
|
63
81
|
Returns:
|
64
82
|
ArgumentParser: parser for the subcommand, return None if the subcommand does not exist
|
65
83
|
"""
|
66
84
|
if name in self.subcommands:
|
67
85
|
return self.subcommands[name][0]
|
68
86
|
return None
|
69
|
-
|
87
|
+
|
70
88
|
def _run_command(self, args):
|
71
89
|
"""Start the server with the specified configuration."""
|
72
90
|
os.chdir(args.working_dir)
|
73
|
-
if hasattr(args,
|
91
|
+
if hasattr(args, "module"):
|
74
92
|
if "all" in args.module:
|
75
93
|
modules = None
|
76
94
|
elif isinstance(args.module, list) and bool(args.module):
|
@@ -78,16 +96,31 @@ class MCPCLI:
|
|
78
96
|
else:
|
79
97
|
modules = None
|
80
98
|
if self.manager is not None:
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
99
|
+
from .backend import NotebookManager, AdataManager
|
100
|
+
|
101
|
+
if args.run_mode == "code":
|
102
|
+
backend = NotebookManager
|
103
|
+
self.mcp = self.manager(
|
104
|
+
self.name, include_tags=["nb", "rag"], backend=backend
|
105
|
+
).mcp
|
88
106
|
else:
|
89
|
-
|
90
|
-
|
107
|
+
backend = AdataManager
|
108
|
+
self.mcp = self.manager(
|
109
|
+
self.name, include_modules=modules, backend=backend
|
110
|
+
).mcp
|
111
|
+
all_tools = self.mcp._tool_manager._tools
|
112
|
+
auto_tools = {
|
113
|
+
tool: all_tools[tool]
|
114
|
+
for tool in all_tools
|
115
|
+
if "auto" in all_tools[tool].tags
|
116
|
+
}
|
117
|
+
if args.tool_mode == "auto":
|
118
|
+
all_tools = self.mcp._tool_manager._tools
|
119
|
+
self.mcp._tool_manager._all_tools = all_tools
|
120
|
+
self.mcp._tool_manager._tools = auto_tools
|
121
|
+
else:
|
122
|
+
for name in auto_tools:
|
123
|
+
self.mcp._tool_manager.remove_tool(name)
|
91
124
|
elif self.mcp is not None:
|
92
125
|
pass
|
93
126
|
else:
|
@@ -98,19 +131,15 @@ class MCPCLI:
|
|
98
131
|
def run_mcp(self, log_file, forward, transport, host, port):
|
99
132
|
set_env(log_file, forward, transport, host, port)
|
100
133
|
from .logging_config import setup_logger
|
134
|
+
|
101
135
|
setup_logger(log_file)
|
102
136
|
if transport == "stdio":
|
103
137
|
self.mcp.run()
|
104
138
|
elif transport in ["sse", "shttp"]:
|
105
139
|
transport = "streamable-http" if transport == "shttp" else transport
|
106
140
|
add_figure_route(self.mcp)
|
107
|
-
self.mcp.run(
|
108
|
-
|
109
|
-
host=host,
|
110
|
-
port=port,
|
111
|
-
log_level="info"
|
112
|
-
)
|
113
|
-
|
141
|
+
self.mcp.run(transport=transport, host=host, port=port, log_level="info")
|
142
|
+
|
114
143
|
def run(self):
|
115
144
|
"""Run the CLI application."""
|
116
145
|
args = self.parser.parse_args()
|
@@ -0,0 +1,139 @@
|
|
1
|
+
from agno.document.chunking.agentic import AgenticChunking
|
2
|
+
from agno.embedder.openai import OpenAIEmbedder
|
3
|
+
from agno.models.deepseek import DeepSeek
|
4
|
+
from agno.vectordb.lancedb import LanceDb
|
5
|
+
from agno.knowledge.agent import AgentKnowledge
|
6
|
+
import importlib.resources
|
7
|
+
import os
|
8
|
+
import requests
|
9
|
+
import zipfile
|
10
|
+
import tempfile
|
11
|
+
import shutil
|
12
|
+
from pathlib import Path
|
13
|
+
import logging
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
embedder_id = os.getenv("EMBEDDER_MODEL")
|
18
|
+
embedder_api_key = os.getenv("EMBEDDER_API_KEY")
|
19
|
+
embedder_base_url = os.getenv("EMBEDDER_BASE_URL")
|
20
|
+
model_id = os.getenv("MODEL")
|
21
|
+
model_api_key = os.getenv("API_KEY")
|
22
|
+
model_base_url = os.getenv("BASE_URL")
|
23
|
+
|
24
|
+
# 配置信息
|
25
|
+
config = {
|
26
|
+
"local_dir": "vector_db",
|
27
|
+
"huggingface_url": "https://huggingface.co/datasets/huangshing/scmcp_vector_db/resolve/main/vector_db.zip",
|
28
|
+
}
|
29
|
+
|
30
|
+
|
31
|
+
def download_vector_db(source="huggingface"):
|
32
|
+
"""
|
33
|
+
下载向量数据库文件
|
34
|
+
|
35
|
+
Args:
|
36
|
+
source: 下载源 ("huggingface" 或 "github")
|
37
|
+
"""
|
38
|
+
|
39
|
+
# 获取本地存储路径
|
40
|
+
package_path = importlib.resources.path("scmcp_shared", "")
|
41
|
+
local_dir = Path(package_path) / config["local_dir"]
|
42
|
+
local_dir.mkdir(exist_ok=True)
|
43
|
+
|
44
|
+
# 检查是否已存在
|
45
|
+
if (local_dir / "scmcp.lance").exists():
|
46
|
+
logger.info("Vector database already exists locally")
|
47
|
+
return str(local_dir)
|
48
|
+
|
49
|
+
logger.info(f"Downloading vector database from {source}...")
|
50
|
+
|
51
|
+
# 创建临时目录用于下载和解压
|
52
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
53
|
+
temp_path = Path(temp_dir)
|
54
|
+
zip_path = temp_path / "vector_db.zip"
|
55
|
+
|
56
|
+
try:
|
57
|
+
# 下载文件
|
58
|
+
if source == "huggingface":
|
59
|
+
url = config["huggingface_url"]
|
60
|
+
else:
|
61
|
+
raise ValueError(f"Unsupported source: {source}")
|
62
|
+
|
63
|
+
logger.info(f"Downloading from: {url}")
|
64
|
+
response = requests.get(url, stream=True)
|
65
|
+
response.raise_for_status()
|
66
|
+
|
67
|
+
with open(zip_path, "wb") as f:
|
68
|
+
for chunk in response.iter_content(chunk_size=8192):
|
69
|
+
f.write(chunk)
|
70
|
+
|
71
|
+
# 解压文件
|
72
|
+
logger.info("Extracting downloaded archive...")
|
73
|
+
_extract_archive(zip_path, local_dir)
|
74
|
+
|
75
|
+
logger.info(f"Vector database downloaded and extracted to: {local_dir}")
|
76
|
+
return str(local_dir)
|
77
|
+
|
78
|
+
except requests.RequestException as e:
|
79
|
+
raise RuntimeError(f"Failed to download vector database: {e}")
|
80
|
+
except Exception as e:
|
81
|
+
raise RuntimeError(f"Failed to process vector database: {e}")
|
82
|
+
|
83
|
+
|
84
|
+
def _extract_archive(archive_path, extract_dir):
|
85
|
+
"""解压归档文件"""
|
86
|
+
with zipfile.ZipFile(archive_path, "r") as zip_ref:
|
87
|
+
zip_ref.extractall(extract_dir)
|
88
|
+
|
89
|
+
# 如果解压后只有一个子目录,移动内容到目标目录
|
90
|
+
extracted_items = list(Path(extract_dir).iterdir())
|
91
|
+
if len(extracted_items) == 1 and extracted_items[0].is_dir():
|
92
|
+
subdir = extracted_items[0]
|
93
|
+
for item in subdir.iterdir():
|
94
|
+
shutil.move(str(item), str(extract_dir / item.name))
|
95
|
+
subdir.rmdir()
|
96
|
+
|
97
|
+
|
98
|
+
def load_kb(software=None, auto_download=True, download_source="huggingface"):
|
99
|
+
"""
|
100
|
+
加载知识库
|
101
|
+
|
102
|
+
Args:
|
103
|
+
software: 软件名称
|
104
|
+
auto_download: 是否自动下载向量数据库
|
105
|
+
download_source: 下载源 ("huggingface" 或 "github")
|
106
|
+
"""
|
107
|
+
# 获取向量数据库路径
|
108
|
+
try:
|
109
|
+
vector_db_path = importlib.resources.path("scmcp_shared", "vector_db")
|
110
|
+
except FileNotFoundError:
|
111
|
+
if auto_download:
|
112
|
+
logger.info("Vector database not found in package, attempting download...")
|
113
|
+
vector_db_path = download_vector_db(download_source)
|
114
|
+
else:
|
115
|
+
raise FileNotFoundError(
|
116
|
+
"Vector database not found. Set auto_download=True to download automatically, "
|
117
|
+
"or manually place the vector database in the scmcp_shared package."
|
118
|
+
)
|
119
|
+
|
120
|
+
vector_db = LanceDb(
|
121
|
+
table_name=software,
|
122
|
+
uri=vector_db_path,
|
123
|
+
embedder=OpenAIEmbedder(
|
124
|
+
id=embedder_id,
|
125
|
+
base_url=embedder_base_url,
|
126
|
+
api_key=embedder_api_key,
|
127
|
+
),
|
128
|
+
)
|
129
|
+
model = DeepSeek(
|
130
|
+
id=model_id,
|
131
|
+
base_url=model_base_url,
|
132
|
+
api_key=model_api_key,
|
133
|
+
)
|
134
|
+
knowledge_base = AgentKnowledge(
|
135
|
+
chunking_strategy=AgenticChunking(model=model),
|
136
|
+
vector_db=vector_db,
|
137
|
+
)
|
138
|
+
|
139
|
+
return knowledge_base
|
@@ -1,18 +1,17 @@
|
|
1
1
|
import logging
|
2
2
|
import sys
|
3
|
-
|
3
|
+
|
4
4
|
from .util import get_env
|
5
5
|
|
6
|
-
def setup_logger(name="sc-mcp-server", log_file=None):
|
7
6
|
|
7
|
+
def setup_logger(name="sc-mcp-server", log_file=None):
|
8
8
|
logger = logging.getLogger(name)
|
9
9
|
logger.setLevel(logging.INFO)
|
10
10
|
if logger.handlers:
|
11
11
|
return logger
|
12
|
-
|
12
|
+
|
13
13
|
formatter = logging.Formatter(
|
14
|
-
|
15
|
-
'%Y-%m-%d %H:%M:%S'
|
14
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", "%Y-%m-%d %H:%M:%S"
|
16
15
|
)
|
17
16
|
if log_file is None:
|
18
17
|
log_file = get_env("LOG_FILE")
|
@@ -20,12 +19,11 @@ def setup_logger(name="sc-mcp-server", log_file=None):
|
|
20
19
|
log_handler = logging.FileHandler(log_file)
|
21
20
|
log_handler.setFormatter(formatter)
|
22
21
|
logger.addHandler(log_handler)
|
23
|
-
|
22
|
+
|
24
23
|
logger.info(f"logging output: {log_file}")
|
25
24
|
else:
|
26
25
|
log_handler = logging.StreamHandler(sys.stdout)
|
27
26
|
log_handler.setFormatter(formatter)
|
28
27
|
logger.addHandler(log_handler)
|
29
|
-
logger.info(
|
28
|
+
logger.info("loggin file output: stdout")
|
30
29
|
return logger
|
31
|
-
|