scmcp-shared 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/.github/workflows/test.yml +9 -3
  2. scmcp_shared-0.6.0/.pre-commit-config.yaml +29 -0
  3. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/PKG-INFO +6 -2
  4. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/pyproject.toml +8 -1
  5. scmcp_shared-0.6.0/src/scmcp_shared/__init__.py +1 -0
  6. scmcp_shared-0.6.0/src/scmcp_shared/agent.py +47 -0
  7. scmcp_shared-0.6.0/src/scmcp_shared/backend.py +44 -0
  8. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/cli.py +75 -46
  9. scmcp_shared-0.6.0/src/scmcp_shared/kb.py +139 -0
  10. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/logging_config.py +6 -8
  11. scmcp_shared-0.6.0/src/scmcp_shared/mcp_base.py +184 -0
  12. scmcp_shared-0.6.0/src/scmcp_shared/schema/io.py +155 -0
  13. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/schema/pl.py +386 -490
  14. scmcp_shared-0.6.0/src/scmcp_shared/schema/pp.py +900 -0
  15. scmcp_shared-0.6.0/src/scmcp_shared/schema/preset/__init__.py +15 -0
  16. {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/io.py +40 -50
  17. scmcp_shared-0.6.0/src/scmcp_shared/schema/preset/pl.py +843 -0
  18. {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/pp.py +227 -262
  19. {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/tl.py +368 -403
  20. {scmcp_shared-0.4.0/src/scmcp_shared/schema → scmcp_shared-0.6.0/src/scmcp_shared/schema/preset}/util.py +57 -72
  21. scmcp_shared-0.6.0/src/scmcp_shared/schema/tl.py +900 -0
  22. scmcp_shared-0.6.0/src/scmcp_shared/schema/util.py +123 -0
  23. scmcp_shared-0.6.0/src/scmcp_shared/server/__init__.py +8 -0
  24. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/server/auto.py +15 -11
  25. scmcp_shared-0.6.0/src/scmcp_shared/server/code.py +3 -0
  26. scmcp_shared-0.6.0/src/scmcp_shared/server/preset/__init__.py +14 -0
  27. {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/io.py +26 -22
  28. {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/pl.py +162 -78
  29. {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/pp.py +123 -65
  30. {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/tl.py +142 -79
  31. {scmcp_shared-0.4.0/src/scmcp_shared/server → scmcp_shared-0.6.0/src/scmcp_shared/server/preset}/util.py +123 -66
  32. scmcp_shared-0.6.0/src/scmcp_shared/server/rag.py +13 -0
  33. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/util.py +109 -38
  34. scmcp_shared-0.6.0/tests/conftest.py +29 -0
  35. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/test_io.py +8 -6
  36. scmcp_shared-0.6.0/tests/test_pp.py +108 -0
  37. scmcp_shared-0.6.0/tests/test_select_tool.py +16 -0
  38. scmcp_shared-0.4.0/src/scmcp_shared/__init__.py +0 -3
  39. scmcp_shared-0.4.0/src/scmcp_shared/agent.py +0 -30
  40. scmcp_shared-0.4.0/src/scmcp_shared/server/__init__.py +0 -13
  41. scmcp_shared-0.4.0/src/scmcp_shared/server/base.py +0 -148
  42. scmcp_shared-0.4.0/tests/conftest.py +0 -31
  43. scmcp_shared-0.4.0/tests/test_pp.py +0 -119
  44. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/.github/release.yml +0 -0
  45. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/.github/workflows/publish.yml +0 -0
  46. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/LICENSE +0 -0
  47. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/README.md +0 -0
  48. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/schema/__init__.py +0 -0
  49. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/src/scmcp_shared/schema/tool.py +0 -0
  50. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/data/hg19/barcodes.tsv +0 -0
  51. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/data/hg19/genes.tsv +0 -0
  52. {scmcp_shared-0.4.0 → scmcp_shared-0.6.0}/tests/data/hg19/matrix.mtx +0 -0
@@ -2,13 +2,20 @@ name: test package
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ "main" ]
5
+ branches: ["main"]
6
6
  pull_request:
7
- branches: [ "main" ]
7
+ branches: ["main"]
8
8
 
9
9
  jobs:
10
10
  test:
11
11
  runs-on: ubuntu-latest
12
+ env:
13
+ MODEL: ${{ vars.MODEL }}
14
+ BASE_URL: ${{ vars.BASE_URL }}
15
+ API_KEY: ${{ secrets.API_KEY }}
16
+ EMBEDDER_MODEL: ${{ vars.EMBEDDER_MODEL }}
17
+ EMBEDDER_BASE_URL: ${{ vars.EMBEDDER_BASE_URL }}
18
+ EMBEDDER_API_KEY: ${{ secrets.EMBEDDER_API_KEY }}
12
19
  strategy:
13
20
  matrix:
14
21
  python-version: ["3.10", "3.11", "3.12", "3.13"]
@@ -31,4 +38,3 @@ jobs:
31
38
 
32
39
  - name: Run pytest
33
40
  run: uv run --no-sync pytest
34
-
@@ -0,0 +1,29 @@
1
+ fail_fast: true
2
+
3
+ repos:
4
+ - repo: https://github.com/abravalheri/validate-pyproject
5
+ rev: v0.23
6
+ hooks:
7
+ - id: validate-pyproject
8
+
9
+ - repo: https://github.com/pre-commit/mirrors-prettier
10
+ rev: v3.1.0
11
+ hooks:
12
+ - id: prettier
13
+ types_or: [yaml, json5]
14
+
15
+ - repo: https://github.com/astral-sh/ruff-pre-commit
16
+ # Ruff version.
17
+ rev: v0.12.1
18
+ hooks:
19
+ # Run the linter.
20
+ - id: ruff-check
21
+ args: [--fix]
22
+ # Run the formatter.
23
+ - id: ruff-format
24
+
25
+ # - repo: https://github.com/northisup/pyright-pretty
26
+ # rev: v0.1.0
27
+ # hooks:
28
+ # - id: pyright-pretty
29
+ # files: ^src/|^tests/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scmcp_shared
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: A shared function libray for scmcphub
5
5
  Project-URL: Homepage, http://scmcphub.org/
6
6
  Project-URL: Repository, https://github.com/scmcphub/scmcp-shared
@@ -37,12 +37,16 @@ License: BSD 3-Clause License
37
37
  License-File: LICENSE
38
38
  Keywords: AI,agent,bioinformatics,llm,mcp,model context protocol,scRNA-seq,single cell
39
39
  Requires-Python: >=3.10
40
+ Requires-Dist: abcoder
41
+ Requires-Dist: agno
40
42
  Requires-Dist: fastmcp>=2.7.0
41
43
  Requires-Dist: igraph
42
- Requires-Dist: instructor>=1.8.3
44
+ Requires-Dist: lancedb
43
45
  Requires-Dist: leidenalg
44
46
  Requires-Dist: mcp>=1.8.0
45
47
  Requires-Dist: nest-asyncio
48
+ Requires-Dist: openai
49
+ Requires-Dist: requests
46
50
  Requires-Dist: scanpy
47
51
  Description-Content-Type: text/markdown
48
52
 
@@ -26,8 +26,12 @@ dependencies = [
26
26
  "leidenalg",
27
27
  "mcp>=1.8.0",
28
28
  "fastmcp>=2.7.0",
29
+ "openai",
30
+ "lancedb",
31
+ "agno",
29
32
  "nest_asyncio",
30
- "instructor>=1.8.3",
33
+ "abcoder",
34
+ "requests"
31
35
  ]
32
36
 
33
37
  [build-system]
@@ -53,3 +57,6 @@ Documentation = "https://docs.scmcphub.org/"
53
57
  [tool.pytest.ini_options]
54
58
  asyncio_mode = "strict"
55
59
  asyncio_default_fixture_loop_scope = "function"
60
+
61
+ [tool.ruff]
62
+ lint.ignore = ["F403", "F405"]
@@ -0,0 +1 @@
1
+ __version__ = "0.6.0"
@@ -0,0 +1,47 @@
1
+ from .schema.tool import ToolList
2
+ import os
3
+
4
+
5
+ from agno.agent import Agent
6
+ from agno.models.openai import OpenAILike
7
+ from scmcp_shared.kb import load_kb
8
+
9
+ model = OpenAILike(
10
+ id=os.getenv("MODEL"),
11
+ base_url=os.getenv("BASE_URL"),
12
+ api_key=os.getenv("API_KEY"),
13
+ )
14
+
15
+
16
+ def rag_agent(task, software=None):
17
+ knowledge_base = load_kb(software=software)
18
+ agent = Agent(
19
+ model=model,
20
+ knowledge=knowledge_base,
21
+ show_tool_calls=True,
22
+ search_knowledge=True,
23
+ )
24
+ query = f"""
25
+ <task>
26
+ {task}
27
+ </task>
28
+ 查询知识库,给出一个用于解决任务的代码示例。返回结果格式为:
29
+ <code_example>
30
+ [code_example]
31
+ </code_example>
32
+ """
33
+ rep = agent.run(query)
34
+ return rep.content
35
+
36
+
37
+ def select_tool(query):
38
+ agent = Agent(
39
+ model=model,
40
+ response_model=ToolList,
41
+ use_json_mode=True,
42
+ instructions="""
43
+ you are a bioinformatician, you are given a task and a list of tools, you need to select the most directly relevant tools to use to solve the task
44
+ """,
45
+ )
46
+ rep = agent.run(query)
47
+ return rep.content
@@ -0,0 +1,44 @@
1
+ from collections.abc import Iterable
2
+ from abcoder.backend import NotebookManager
3
+
4
+ __all__ = ["AdataManager", "NotebookManager"]
5
+
6
+
7
+ class AdataManager:
8
+ def __init__(self, add_adtypes=None):
9
+ self.adata_dic = {"exp": {}, "activity": {}, "cnv": {}, "splicing": {}}
10
+ if isinstance(add_adtypes, str):
11
+ self.adata_dic[add_adtypes] = {}
12
+ elif isinstance(add_adtypes, Iterable):
13
+ self.adata_dic.update({adtype: {} for adtype in add_adtypes})
14
+ self.active_id = None
15
+ self.metadatWa = {}
16
+ self.cr_kernel = {}
17
+ self.cr_estimator = {}
18
+
19
+ def get_adata(self, sampleid=None, adtype="exp", adinfo=None):
20
+ if adinfo is not None:
21
+ kwargs = adinfo.model_dump()
22
+ sampleid = kwargs.get("sampleid", None)
23
+ adtype = kwargs.get("adtype", "exp")
24
+ try:
25
+ if self.active_id is None:
26
+ return None
27
+ sampleid = sampleid or self.active_id
28
+ return self.adata_dic[adtype][sampleid]
29
+ except KeyError as e:
30
+ raise KeyError(
31
+ f"Key {e} not found in adata_dic[{adtype}].Please check the sampleid or adtype."
32
+ )
33
+ except Exception as e:
34
+ raise Exception(f"fuck {e} {type(e)}")
35
+
36
+ def set_adata(self, adata, sampleid=None, sdtype="exp", adinfo=None):
37
+ if adinfo is not None:
38
+ kwargs = adinfo.model_dump()
39
+ sampleid = kwargs.get("sampleid", None)
40
+ sdtype = kwargs.get("adtype", "exp")
41
+ sampleid = sampleid or self.active_id
42
+ if sdtype not in self.adata_dic:
43
+ self.adata_dic[sdtype] = {}
44
+ self.adata_dic[sdtype][sampleid] = adata
@@ -1,51 +1,69 @@
1
1
  import argparse
2
- from typing import Optional, Union, Type, Dict, Callable
3
- from enum import Enum
2
+ from typing import Optional, Dict, Callable
4
3
  from .util import add_figure_route, set_env
5
4
  import os
6
5
 
7
6
 
8
7
  class MCPCLI:
9
8
  """Base class for CLI applications with support for dynamic modules and parameters."""
10
-
9
+
11
10
  def __init__(self, name: str, help_text: str, mcp=None, manager=None):
12
11
  self.name = name
13
12
  self.mcp = mcp
14
13
  self.manager = manager
15
- self.parser = argparse.ArgumentParser(
16
- description=help_text,
17
- prog=name
18
- )
14
+ self.parser = argparse.ArgumentParser(description=help_text, prog=name)
19
15
  self.subcommands: Dict[str, tuple[argparse.ArgumentParser, Callable]] = {}
20
16
  self._setup_commands()
21
-
17
+
22
18
  def _setup_commands(self):
23
19
  """Setup the main commands for the CLI."""
24
- subparsers = self.parser.add_subparsers(dest='command', help='Available commands')
25
- run_parser = subparsers.add_parser('run', help='Start the server with the specified configuration')
20
+ subparsers = self.parser.add_subparsers(
21
+ dest="command", help="Available commands"
22
+ )
23
+ run_parser = subparsers.add_parser(
24
+ "run", help="Start the server with the specified configuration"
25
+ )
26
26
  self._setup_run_command(run_parser)
27
- self.subcommands['run'] = (run_parser, self._run_command)
28
-
27
+ self.subcommands["run"] = (run_parser, self._run_command)
28
+
29
29
  def _setup_run_command(self, parser: argparse.ArgumentParser):
30
30
  """Setup run command arguments."""
31
- parser.add_argument('-t', '--transport', default="stdio",
32
- choices=["stdio", "shttp", "sse"],
33
- help='specify transport type')
34
- parser.add_argument('-p', '--port', type=int, default=8000, help='transport port')
35
- parser.add_argument('--host', default='127.0.0.1', help='transport host')
36
- parser.add_argument('-f', '--forward', help='forward request to another server')
37
- parser.add_argument('-wd', '--working-dir', default=".", help='working directory')
38
- parser.add_argument('--tool-mode', choices=["auto", "normal"], default="normal", help='tool selection mode')
39
- parser.add_argument('--log-file', help='log file path, use stdout if None')
40
-
41
- def add_command(self, name: str, help_text: str, handler: Callable) -> argparse.ArgumentParser:
31
+ parser.add_argument(
32
+ "-t",
33
+ "--transport",
34
+ default="stdio",
35
+ choices=["stdio", "shttp", "sse"],
36
+ help="specify transport type",
37
+ )
38
+ parser.add_argument(
39
+ "-p", "--port", type=int, default=8000, help="transport port"
40
+ )
41
+ parser.add_argument("--host", default="127.0.0.1", help="transport host")
42
+ parser.add_argument("-f", "--forward", help="forward request to another server")
43
+ parser.add_argument(
44
+ "-wd", "--working-dir", default=".", help="working directory"
45
+ )
46
+ parser.add_argument(
47
+ "--run-mode", choices=["tool", "code"], default="code", help="run mode"
48
+ )
49
+ parser.add_argument(
50
+ "--tool-mode",
51
+ choices=["auto", "normal"],
52
+ default="normal",
53
+ help="tool selection mode",
54
+ )
55
+ parser.add_argument("--log-file", help="log file path, use stdout if None")
56
+
57
+ def add_command(
58
+ self, name: str, help_text: str, handler: Callable
59
+ ) -> argparse.ArgumentParser:
42
60
  """add new subcommand
43
-
61
+
44
62
  Args:
45
63
  name: subcommand name
46
64
  help_text: help text
47
65
  handler: handler function
48
-
66
+
49
67
  Returns:
50
68
  ArgumentParser: parser for the subcommand
51
69
  """
@@ -53,24 +71,24 @@ class MCPCLI:
53
71
  parser = subparsers.add_parser(name, help=help_text)
54
72
  self.subcommands[name] = (parser, handler)
55
73
  return parser
56
-
74
+
57
75
  def get_command_parser(self, name: str) -> Optional[argparse.ArgumentParser]:
58
76
  """get the parser for the subcommand
59
-
77
+
60
78
  Args:
61
79
  name: subcommand name
62
-
80
+
63
81
  Returns:
64
82
  ArgumentParser: parser for the subcommand, return None if the subcommand does not exist
65
83
  """
66
84
  if name in self.subcommands:
67
85
  return self.subcommands[name][0]
68
86
  return None
69
-
87
+
70
88
  def _run_command(self, args):
71
89
  """Start the server with the specified configuration."""
72
90
  os.chdir(args.working_dir)
73
- if hasattr(args, 'module'):
91
+ if hasattr(args, "module"):
74
92
  if "all" in args.module:
75
93
  modules = None
76
94
  elif isinstance(args.module, list) and bool(args.module):
@@ -78,16 +96,31 @@ class MCPCLI:
78
96
  else:
79
97
  modules = None
80
98
  if self.manager is not None:
81
- self.mcp = self.manager(self.name, include_modules=modules).mcp
82
- all_tools = self.mcp._tool_manager._tools
83
- auto_tools = {tool: all_tools[tool] for tool in all_tools if all_tools[tool].name in ["search_tool", "run_tool"]}
84
- if args.tool_mode == "auto":
85
- all_tools = self.mcp._tool_manager._tools
86
- self.mcp._tool_manager._all_tools = all_tools
87
- self.mcp._tool_manager._tools = auto_tools
99
+ from .backend import NotebookManager, AdataManager
100
+
101
+ if args.run_mode == "code":
102
+ backend = NotebookManager
103
+ self.mcp = self.manager(
104
+ self.name, include_tags=["nb", "rag"], backend=backend
105
+ ).mcp
88
106
  else:
89
- for name in auto_tools:
90
- self.mcp._tool_manager.remove_tool(name)
107
+ backend = AdataManager
108
+ self.mcp = self.manager(
109
+ self.name, include_modules=modules, backend=backend
110
+ ).mcp
111
+ all_tools = self.mcp._tool_manager._tools
112
+ auto_tools = {
113
+ tool: all_tools[tool]
114
+ for tool in all_tools
115
+ if "auto" in all_tools[tool].tags
116
+ }
117
+ if args.tool_mode == "auto":
118
+ all_tools = self.mcp._tool_manager._tools
119
+ self.mcp._tool_manager._all_tools = all_tools
120
+ self.mcp._tool_manager._tools = auto_tools
121
+ else:
122
+ for name in auto_tools:
123
+ self.mcp._tool_manager.remove_tool(name)
91
124
  elif self.mcp is not None:
92
125
  pass
93
126
  else:
@@ -98,19 +131,15 @@ class MCPCLI:
98
131
  def run_mcp(self, log_file, forward, transport, host, port):
99
132
  set_env(log_file, forward, transport, host, port)
100
133
  from .logging_config import setup_logger
134
+
101
135
  setup_logger(log_file)
102
136
  if transport == "stdio":
103
137
  self.mcp.run()
104
138
  elif transport in ["sse", "shttp"]:
105
139
  transport = "streamable-http" if transport == "shttp" else transport
106
140
  add_figure_route(self.mcp)
107
- self.mcp.run(
108
- transport=transport,
109
- host=host,
110
- port=port,
111
- log_level="info"
112
- )
113
-
141
+ self.mcp.run(transport=transport, host=host, port=port, log_level="info")
142
+
114
143
  def run(self):
115
144
  """Run the CLI application."""
116
145
  args = self.parser.parse_args()
@@ -0,0 +1,139 @@
1
+ from agno.document.chunking.agentic import AgenticChunking
2
+ from agno.embedder.openai import OpenAIEmbedder
3
+ from agno.models.deepseek import DeepSeek
4
+ from agno.vectordb.lancedb import LanceDb
5
+ from agno.knowledge.agent import AgentKnowledge
6
+ import importlib.resources
7
+ import os
8
+ import requests
9
+ import zipfile
10
+ import tempfile
11
+ import shutil
12
+ from pathlib import Path
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ embedder_id = os.getenv("EMBEDDER_MODEL")
18
+ embedder_api_key = os.getenv("EMBEDDER_API_KEY")
19
+ embedder_base_url = os.getenv("EMBEDDER_BASE_URL")
20
+ model_id = os.getenv("MODEL")
21
+ model_api_key = os.getenv("API_KEY")
22
+ model_base_url = os.getenv("BASE_URL")
23
+
24
+ # 配置信息
25
+ config = {
26
+ "local_dir": "vector_db",
27
+ "huggingface_url": "https://huggingface.co/datasets/huangshing/scmcp_vector_db/resolve/main/vector_db.zip",
28
+ }
29
+
30
+
31
+ def download_vector_db(source="huggingface"):
32
+ """
33
+ 下载向量数据库文件
34
+
35
+ Args:
36
+ source: 下载源 ("huggingface" 或 "github")
37
+ """
38
+
39
+ # 获取本地存储路径
40
+ package_path = importlib.resources.path("scmcp_shared", "")
41
+ local_dir = Path(package_path) / config["local_dir"]
42
+ local_dir.mkdir(exist_ok=True)
43
+
44
+ # 检查是否已存在
45
+ if (local_dir / "scmcp.lance").exists():
46
+ logger.info("Vector database already exists locally")
47
+ return str(local_dir)
48
+
49
+ logger.info(f"Downloading vector database from {source}...")
50
+
51
+ # 创建临时目录用于下载和解压
52
+ with tempfile.TemporaryDirectory() as temp_dir:
53
+ temp_path = Path(temp_dir)
54
+ zip_path = temp_path / "vector_db.zip"
55
+
56
+ try:
57
+ # 下载文件
58
+ if source == "huggingface":
59
+ url = config["huggingface_url"]
60
+ else:
61
+ raise ValueError(f"Unsupported source: {source}")
62
+
63
+ logger.info(f"Downloading from: {url}")
64
+ response = requests.get(url, stream=True)
65
+ response.raise_for_status()
66
+
67
+ with open(zip_path, "wb") as f:
68
+ for chunk in response.iter_content(chunk_size=8192):
69
+ f.write(chunk)
70
+
71
+ # 解压文件
72
+ logger.info("Extracting downloaded archive...")
73
+ _extract_archive(zip_path, local_dir)
74
+
75
+ logger.info(f"Vector database downloaded and extracted to: {local_dir}")
76
+ return str(local_dir)
77
+
78
+ except requests.RequestException as e:
79
+ raise RuntimeError(f"Failed to download vector database: {e}")
80
+ except Exception as e:
81
+ raise RuntimeError(f"Failed to process vector database: {e}")
82
+
83
+
84
+ def _extract_archive(archive_path, extract_dir):
85
+ """解压归档文件"""
86
+ with zipfile.ZipFile(archive_path, "r") as zip_ref:
87
+ zip_ref.extractall(extract_dir)
88
+
89
+ # 如果解压后只有一个子目录,移动内容到目标目录
90
+ extracted_items = list(Path(extract_dir).iterdir())
91
+ if len(extracted_items) == 1 and extracted_items[0].is_dir():
92
+ subdir = extracted_items[0]
93
+ for item in subdir.iterdir():
94
+ shutil.move(str(item), str(extract_dir / item.name))
95
+ subdir.rmdir()
96
+
97
+
98
+ def load_kb(software=None, auto_download=True, download_source="huggingface"):
99
+ """
100
+ 加载知识库
101
+
102
+ Args:
103
+ software: 软件名称
104
+ auto_download: 是否自动下载向量数据库
105
+ download_source: 下载源 ("huggingface" 或 "github")
106
+ """
107
+ # 获取向量数据库路径
108
+ try:
109
+ vector_db_path = importlib.resources.path("scmcp_shared", "vector_db")
110
+ except FileNotFoundError:
111
+ if auto_download:
112
+ logger.info("Vector database not found in package, attempting download...")
113
+ vector_db_path = download_vector_db(download_source)
114
+ else:
115
+ raise FileNotFoundError(
116
+ "Vector database not found. Set auto_download=True to download automatically, "
117
+ "or manually place the vector database in the scmcp_shared package."
118
+ )
119
+
120
+ vector_db = LanceDb(
121
+ table_name=software,
122
+ uri=vector_db_path,
123
+ embedder=OpenAIEmbedder(
124
+ id=embedder_id,
125
+ base_url=embedder_base_url,
126
+ api_key=embedder_api_key,
127
+ ),
128
+ )
129
+ model = DeepSeek(
130
+ id=model_id,
131
+ base_url=model_base_url,
132
+ api_key=model_api_key,
133
+ )
134
+ knowledge_base = AgentKnowledge(
135
+ chunking_strategy=AgenticChunking(model=model),
136
+ vector_db=vector_db,
137
+ )
138
+
139
+ return knowledge_base
@@ -1,18 +1,17 @@
1
1
  import logging
2
2
  import sys
3
- import os
3
+
4
4
  from .util import get_env
5
5
 
6
- def setup_logger(name="sc-mcp-server", log_file=None):
7
6
 
7
+ def setup_logger(name="sc-mcp-server", log_file=None):
8
8
  logger = logging.getLogger(name)
9
9
  logger.setLevel(logging.INFO)
10
10
  if logger.handlers:
11
11
  return logger
12
-
12
+
13
13
  formatter = logging.Formatter(
14
- '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
- '%Y-%m-%d %H:%M:%S'
14
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s", "%Y-%m-%d %H:%M:%S"
16
15
  )
17
16
  if log_file is None:
18
17
  log_file = get_env("LOG_FILE")
@@ -20,12 +19,11 @@ def setup_logger(name="sc-mcp-server", log_file=None):
20
19
  log_handler = logging.FileHandler(log_file)
21
20
  log_handler.setFormatter(formatter)
22
21
  logger.addHandler(log_handler)
23
-
22
+
24
23
  logger.info(f"logging output: {log_file}")
25
24
  else:
26
25
  log_handler = logging.StreamHandler(sys.stdout)
27
26
  log_handler.setFormatter(formatter)
28
27
  logger.addHandler(log_handler)
29
- logger.info(f"loggin file output: stdout")
28
+ logger.info("loggin file output: stdout")
30
29
  return logger
31
-