PyPI - aigroup-stata-mcp - Versions diffs - 1.0.3__py3-none-any.whl - Mend

aigroup-stata-mcp 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

aigroup_stata_mcp-1.0.3.dist-info/METADATA +345 -0
aigroup_stata_mcp-1.0.3.dist-info/RECORD +38 -0
aigroup_stata_mcp-1.0.3.dist-info/WHEEL +4 -0
aigroup_stata_mcp-1.0.3.dist-info/entry_points.txt +5 -0
aigroup_stata_mcp-1.0.3.dist-info/licenses/LICENSE +21 -0
stata_mcp/__init__.py +18 -0
stata_mcp/cli/__init__.py +8 -0
stata_mcp/cli/_cli.py +95 -0
stata_mcp/core/__init__.py +14 -0
stata_mcp/core/data_info/__init__.py +11 -0
stata_mcp/core/data_info/_base.py +288 -0
stata_mcp/core/data_info/csv.py +123 -0
stata_mcp/core/data_info/dta.py +70 -0
stata_mcp/core/stata/__init__.py +13 -0
stata_mcp/core/stata/stata_controller/__init__.py +9 -0
stata_mcp/core/stata/stata_controller/controller.py +208 -0
stata_mcp/core/stata/stata_do/__init__.py +9 -0
stata_mcp/core/stata/stata_do/do.py +177 -0
stata_mcp/core/stata/stata_finder/__init__.py +9 -0
stata_mcp/core/stata/stata_finder/base.py +294 -0
stata_mcp/core/stata/stata_finder/finder.py +193 -0
stata_mcp/core/stata/stata_finder/linux.py +43 -0
stata_mcp/core/stata/stata_finder/macos.py +88 -0
stata_mcp/core/stata/stata_finder/windows.py +191 -0
stata_mcp/server/__init__.py +8 -0
stata_mcp/server/main.py +153 -0
stata_mcp/server/prompts/__init__.py +8 -0
stata_mcp/server/prompts/core_prompts.py +122 -0
stata_mcp/server/tools/__init__.py +10 -0
stata_mcp/server/tools/core_tools.py +59 -0
stata_mcp/server/tools/file_tools.py +163 -0
stata_mcp/server/tools/stata_tools.py +221 -0
stata_mcp/utils/Installer/__init__.py +7 -0
stata_mcp/utils/Installer/installer.py +85 -0
stata_mcp/utils/Prompt/__init__.py +74 -0
stata_mcp/utils/Prompt/string.py +91 -0
stata_mcp/utils/__init__.py +23 -0
stata_mcp/utils/usable.py +244 -0

stata_mcp/core/stata/stata_controller/controller.py ADDED Viewed

@@ -0,0 +1,208 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import time
+import pexpect
+class StataController:
+    """Stata控制器类，用于管理Stata会话和执行命令"""
+    def __init__(self, stata_cli: str = None, timeout: int = 30):
+        """
+        Initialize the Stata controller.
+        Args:
+            stata_cli (str): Path to the Stata command-line executable.
+            timeout (int): Timeout for command execution (in seconds).
+        """
+        self.stata_cli_path = stata_cli
+        self.child = None
+        self.timeout = timeout
+        self.start()
+    @property
+    def STATA_CLI(self):
+        """获取Stata CLI路径"""
+        return self.stata_cli_path
+    def _expect_prompt(self, timeout=None):
+        """
+        Wait for the Stata prompt, indicating command completion.
+        Args:
+            timeout (int, optional): Timeout for this wait; if not provided, use default.
+        Returns:
+            int: The index returned by pexpect.expect.
+        """
+        if timeout is None:
+            timeout = self.timeout
+        # Use a set of patterns to match various prompt scenarios
+        patterns = [
+            r"\r\n\. ",  # Standard prompt
+            r"\r\n: ",  # Continuation prompt
+            r"\r\n--more--",  # More content prompt
+            r"r\(\d+\);",  # Error prompt
+            pexpect.TIMEOUT,  # Timeout
+            pexpect.EOF,  # End of program
+        ]
+        index = self.child.expect(patterns, timeout=timeout)
+        # Handle matched patterns
+        if index == 2:  # --more-- prompt; send space to continue
+            self.child.send(" ")
+            return self._expect_prompt(timeout)  # Recurse until actual prompt
+        elif index == 3:  # Error prompt
+            # Continue waiting until standard prompt appears
+            try:
+                self.child.expect(r"\r\n\. ", timeout=5)
+            except pexpect.TIMEOUT:
+                pass  # Ignore timeout and return error index
+            return index
+        elif index == 4:  # Timeout
+            # Try sending a newline to trigger the prompt
+            self.child.sendline("")
+            try:
+                return self.child.expect(
+                    [r"\r\n\. ", pexpect.TIMEOUT], timeout=5)
+            except pexpect.TIMEOUT:
+                return index
+        return index
+    def run(self, command, timeout=None):
+        """
+        Execute a Stata command and wait for completion.
+        Args:
+            command (str): The Stata command to execute.
+            timeout (int, optional): Timeout for this command.
+        Returns:
+            str: The output of the command execution.
+        Raises:
+            RuntimeError: If the command times out or other errors occur.
+        """
+        if timeout is None:
+            timeout = self.timeout
+        # Send the command
+        self.child.sendline(command)
+        # Wait for the command to complete
+        result = self._expect_prompt(timeout)
+        # Capture the output
+        output = self.child.before.strip()
+        # Check for errors
+        if result == 3:  # Error prompt index
+            error_match = re.search(r"r\((\d+)\);", output)
+            if error_match:
+                error_code = error_match.group(1)
+                raise RuntimeError(f"Stata error r({error_code}): {output}")
+        elif result == 4:  # Timeout
+            raise RuntimeError(f"Command timed out (> {timeout}s): {command}")
+        elif result == 5:  # EOF
+            raise RuntimeError(
+                f"Stata session terminated unexpectedly: {output}")
+        return output
+    def run_with_retry(self, command, max_retries=3, timeout=None):
+        """
+        Execute a command with a retry mechanism.
+        Args:
+            command (str): The Stata command to execute.
+            max_retries (int): Maximum number of retry attempts.
+            timeout (int, optional): Timeout for this command.
+        Returns:
+            str: The output of the command execution.
+        Raises:
+            RuntimeError: If all retry attempts fail.
+        """
+        retries = 0
+        last_error = None
+        while retries < max_retries:
+            try:
+                return self.run(command, timeout)
+            except RuntimeError as e:
+                last_error = e
+                retries += 1
+                # If it's a timeout error and we can retry, restart the session
+                if "timed out" in str(e) and retries < max_retries:
+                    self.restart()
+                time.sleep(1)  # Brief pause before retry
+        # All retries failed
+        raise RuntimeError(
+            f"Command failed after {max_retries} attempts: {last_error}")
+    def start(self):
+        """
+        Start the Stata session.
+        """
+        self.child = pexpect.spawn(
+            self.STATA_CLI, encoding="utf-8", timeout=self.timeout
+        )
+        self._expect_prompt()
+    def restart(self):
+        """
+        Restart the Stata session.
+        """
+        self.close()
+        self.start()
+    def close(self):
+        """
+        Close the Stata session.
+        """
+        if self.child and not self.child.closed:
+            try:
+                self.child.sendline("exit, clear")
+                self.child.expect(pexpect.EOF, timeout=5)
+            except Exception as e:
+                print(
+                    f"Warning: Could not close Stata session with error: {e}")
+            finally:
+                self.child.close()
+if __name__ == "__main__":
+    url = "https://pub-b55c5837ee41480ba0f902096dd9725d.r2.dev/01_OLS.dta"
+    stata_cli = "stata-mp"
+    var_list = []  # e.g., ["weight", "height"]
+    var_str = " ".join(var_list) if var_list else ""
+    # Use a longer timeout for the session
+    temp_stata_session = StataController(stata_cli=stata_cli, timeout=60)
+    try:
+        # Execute command with retry
+        use_data = temp_stata_session.run_with_retry(f"use {url}, clear")
+        if "not found" in use_data or "server reported server error" in use_data:
+            print("Stata data not found. Please check the path.")
+        else:
+            # For commands that may require more time, specify a longer timeout
+            summarize = temp_stata_session.run(
+                f"summarize {var_str}", timeout=120)
+            describe = temp_stata_session.run(f"describe {var_str}")
+            result = {"summarize": summarize, "describe": describe}
+            print(result.get("summarize"))
+    except Exception as e:
+        print(f"Error: {e}")
+    finally:
+        # Ensure the session is properly closed
+        temp_stata_session.close()

stata_mcp/core/stata/stata_do/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+from .do import StataDo
+__all__ = [
+    "StataDo"
+]

stata_mcp/core/stata/stata_do/do.py ADDED Viewed

@@ -0,0 +1,177 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import logging
+import os
+import subprocess
+from ....utils import get_nowtime
+class StataDo:
+    """Stata do文件执行器，用于执行Stata脚本并管理日志"""
+    def __init__(self,
+                 stata_cli: str,
+                 log_file_path: str,
+                 dofile_base_path: str,
+                 sys_os: str = None):
+        """
+        Initialize Stata executor
+        Args:
+            stata_cli: Path to Stata command line tool
+            log_file_path: Path for storing log files
+            dofile_base_path: Base path for do files
+            sys_os: Operating system type
+        """
+        self.stata_cli = stata_cli
+        self.log_file_path = log_file_path
+        self.dofile_base_path = dofile_base_path
+        if sys_os:
+            self.sys_os = sys_os
+        else:
+            from ....utils import get_os
+            self.sys_os = get_os()
+    def set_cli(self, cli_path):
+        """设置Stata CLI路径"""
+        self.stata_cli = cli_path
+    @property
+    def STATA_CLI(self):
+        """获取Stata CLI路径"""
+        return self.stata_cli
+    def execute_dofile(self,
+                       dofile_path: str,
+                       log_file_name: str = None,
+                       is_replace: bool = True) -> str:
+        """
+        Execute Stata do file and return log file path
+        Args:
+            dofile_path (str): Path to do file
+            log_file_name (str, optional): File name of log
+            is_replace (bool): Whether replace the log file if exists before. Default is True
+        Returns:
+            str: Path to generated log file
+        Raises:
+            ValueError: Unsupported operating system
+            RuntimeError: Stata execution error
+        """
+        nowtime = get_nowtime()
+        log_name = log_file_name or nowtime
+        log_file = os.path.join(self.log_file_path, f"{log_name}.log")
+        if self.sys_os == "Darwin" or self.sys_os == "Linux":
+            self._execute_unix_like(dofile_path, log_file, is_replace)
+        elif self.sys_os == "Windows":
+            self._execute_windows(dofile_path, log_file, nowtime, is_replace)
+        else:
+            raise ValueError(f"Unsupported operating system: {self.sys_os}")
+        return log_file
+    def _execute_unix_like(self, dofile_path: str, log_file: str, is_replace: bool = True):
+        """
+        Execute Stata on macOS/Linux systems
+        Args:
+            dofile_path: Path to do file
+            log_file: Path to log file
+            is_replace: Whether replace the log file if exists.
+        Raises:
+            RuntimeError: Stata execution error
+        """
+        proc = subprocess.Popen(
+            [self.STATA_CLI],  # Launch the Stata CLI
+            stdin=subprocess.PIPE,  # Prepare to send commands
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            shell=True,  # Required when the path contains spaces
+        )
+        # Execute commands sequentially in Stata
+        replace_clause = ", replace" if is_replace else ""
+        commands = f"""
+        log using "{log_file}"{replace_clause}
+        do "{dofile_path}"
+        log close
+        exit, STATA
+        """
+        stdout, stderr = proc.communicate(
+            input=commands
+        )  # Send commands and wait for completion
+        if proc.returncode != 0:
+            logging.error(f"Stata execution failed: {stderr}")
+            raise RuntimeError(f"Something went wrong: {stderr}")
+        else:
+            logging.info(
+                f"Stata execution completed successfully. Log file: {log_file}")
+    def _execute_windows(self, dofile_path: str, log_file: str, nowtime: str, is_replace: bool = True):
+        """
+        Execute Stata on Windows systems
+        Args:
+            dofile_path: Path to do file
+            log_file: Path to log file
+            nowtime: Timestamp for generating temporary file names
+        """
+        # Windows approach - use the /e /q flags for clean batch processing
+        # Create a temporary batch file
+        batch_file = os.path.join(self.dofile_base_path, f"{nowtime}_batch.do")
+        replace_clause = ", replace" if is_replace else ""
+        try:
+            with open(batch_file, "w", encoding="utf-8") as f:
+                f.write(f'log using "{log_file}"{replace_clause}\n')
+                f.write(f'do "{dofile_path}"\n')
+                f.write("log close\n")
+                f.write("exit, STATA\n")
+            # Run Stata on Windows using /e /q for clean batch processing
+            # /e: batch mode (execute and exit)
+            # /q: quiet mode (no startup messages)
+            cmd = f'"{self.STATA_CLI}" /e /q do "{batch_file}"'
+            result = subprocess.run(
+                cmd, shell=True, capture_output=True, text=True)
+            if result.returncode != 0:
+                logging.error(
+                    f"Stata execution failed on Windows: {result.stderr}")
+                raise RuntimeError(
+                    f"Windows Stata execution failed: {result.stderr}")
+            else:
+                logging.info(
+                    f"Stata execution completed successfully on Windows. Log file: {log_file}")
+        except Exception as e:
+            logging.error(f"Error during Windows Stata execution: {str(e)}")
+            raise
+        finally:
+            # Clean up temporary batch file
+            if os.path.exists(batch_file):
+                try:
+                    os.remove(batch_file)
+                    logging.debug(
+                        f"Temporary batch file removed: {batch_file}")
+                except Exception as e:
+                    logging.warning(
+                        f"Failed to remove temporary batch file "
+                        f"{batch_file}: {str(e)}")
+    @staticmethod
+    def read_log(log_file_path, mode="r", encoding="utf-8") -> str:
+        """读取Stata日志文件内容"""
+        with open(log_file_path, mode, encoding=encoding) as file:
+            log_content = file.read()
+        return log_content

stata_mcp/core/stata/stata_finder/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+from .finder import StataFinder
+__all__ = [
+    "StataFinder",
+]

stata_mcp/core/stata/stata_finder/base.py ADDED Viewed

@@ -0,0 +1,294 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import os
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Union
+@dataclass
+class StataEditionConfig:
+    """
+    StataEditionConfig class for comparing Stata versions with sorting support.
+    Attributes:
+        edition (str): Edition type (mp > se > be > ic > default)
+        version (Union[int, float]): Version number (e.g., 18, 19.5). Default 99 if not found, indicating current default version
+        path (str): Full path to Stata executable
+    Comparison Rules:
+        1. First compare edition priority: mp > se > be > ic > default (edition type always has priority)
+        2. Then compare numeric version: higher > lower (only for same edition type)
+        3. Support float versions like 19.5 > 19
+        4. Version 99 is used when version info is not available (default edition gets highest version within its type)
+    Example:
+        >>> p1 = StataEditionConfig("mp", 18, "/usr/local/bin/stata-mp")
+        >>> p2 = StataEditionConfig.from_path("/usr/local/bin/stata")  # Auto-detect as default with version 99
+        >>> p1 > p2  # True (mp edition has higher priority than default, regardless of version numbers)
+    """
+    edition: str
+    version: Union[int, float]
+    path: str
+    # Edition priority mapping
+    _EDITION_PRIORITY = {
+        "mp": 5,
+        "se": 4,
+        "be": 3,
+        "ic": 2,
+        "default": 1,
+        "unknown": 0,
+    }
+    def __post_init__(self):
+        """Validation and processing after initialization."""
+        # Normalize edition type to lowercase
+        self.edition = self.edition.lower()
+        # If edition type is not in priority mapping, mark as unknown
+        if self.edition not in self._EDITION_PRIORITY:
+            self.edition = "unknown"
+    @classmethod
+    def from_path(cls, path: str) -> 'StataEditionConfig':
+        """
+        Create StataEditionConfig from path, automatically extracting edition and version.
+        Args:
+            path: Full path to Stata executable
+        Returns:
+            StataEditionConfig with auto-detected edition and version
+        """
+        import os
+        filename = os.path.basename(path).lower()
+        full_path_lower = path.lower()
+        # Extract edition
+        edition = "default"
+        edition_patterns = [
+            (r'stata-mp', 'mp'),
+            (r'satamp', 'mp'),
+            (r'stata-se', 'se'),
+            (r'statase', 'se'),
+            (r'sata-be', 'be'),
+            (r'satabe', 'be'),
+            (r'sata-ic', 'ic'),
+            (r'sataic', 'ic'),
+        ]
+        for pattern, ed in edition_patterns:
+            if re.search(pattern, filename):
+                edition = ed
+                break
+        # Extract version (default to 99 if not found, indicating current default version)
+        version = 99
+        # Try to extract version from directory name first
+        dir_version_match = re.search(r'stata(\d+(?:\.\d+)?)', full_path_lower)
+        if dir_version_match:
+            try:
+                version = float(dir_version_match.group(1))
+            except ValueError:
+                pass
+        # Try to extract version from filename
+        file_version_patterns = [
+            r'stata-[a-z]+-(\d+(?:\.\d+)?)',  # stata-mp-17.5
+            r'stata(\d+(?:\.\d+)?)',          # stata17.5
+        ]
+        for pattern in file_version_patterns:
+            file_version_match = re.search(pattern, filename)
+            if file_version_match:
+                try:
+                    file_version = float(file_version_match.group(1))
+                    # Only use reasonable version numbers (1-30)
+                    if 1 <= file_version <= 30:
+                        version = max(version, file_version)
+                        break
+                except ValueError:
+                    continue
+        return cls(edition=edition, version=version, path=path)
+    @property
+    def edition_priority(self) -> int:
+        """Get the priority value of the edition type."""
+        return self._EDITION_PRIORITY[self.edition]
+    def __lt__(self, other) -> bool:
+        """Less than comparison for sorting."""
+        if not isinstance(other, StataEditionConfig):
+            return NotImplemented
+        # First compare edition priority
+        if self.edition_priority != other.edition_priority:
+            return self.edition_priority < other.edition_priority
+        # Same edition, compare version number
+        return self.version < other.version
+    def __le__(self, other) -> bool:
+        """Less than or equal comparison."""
+        return self < other or self == other
+    def __gt__(self, other) -> bool:
+        """Greater than comparison."""
+        if not isinstance(other, StataEditionConfig):
+            return NotImplemented
+        # First compare edition priority
+        if self.edition_priority != other.edition_priority:
+            return self.edition_priority > other.edition_priority
+        # Same edition, compare version number
+        return self.version > other.version
+    def __ge__(self, other) -> bool:
+        """Greater than or equal comparison."""
+        return self > other or self == other
+    def __eq__(self, other) -> bool:
+        """Equality comparison."""
+        if not isinstance(other, StataEditionConfig):
+            return NotImplemented
+        return (self.edition_priority == other.edition_priority and
+                self.version == other.version)
+    def __str__(self) -> str:
+        """String representation - returns the path."""
+        return self.path
+    def __repr__(self) -> str:
+        """Detailed string representation - returns the path."""
+        return self.path
+    def __int__(self) -> int:
+        """Integer conversion - returns the version number."""
+        return int(self.version)
+    def __float__(self) -> float:
+        """Float conversion - returns the version number."""
+        return float(self.version)
+    @property
+    def stata_cli_path(self) -> str:
+        """Get the Stata CLI path."""
+        return self.path
+class FinderBase(ABC):
+    stata_cli: str = None
+    def __init__(self, stata_cli: str = None):
+        # If there is any setting, use the input and environment first
+        self.stata_cli = stata_cli or os.getenv("STATA_CLI") or os.getenv("stata_cli")
+    def find_stata(self) -> str | None:
+        if self.stata_cli:
+            return self.stata_cli
+        return self.finder()
+    @abstractmethod
+    def finder(self) -> str:
+        """
+        Find the Stata executable on the current platform.
+        This method must be implemented by each platform-specific finder class
+        to locate Stata installations using platform-appropriate search strategies.
+        Returns:
+            str: The full path to the Stata executable
+        Raises:
+            FileNotFoundError: If no Stata installation is found
+        Note:
+            This is an abstract method and must be implemented by concrete finder classes
+            such as FinderMacOS, FinderWindows, or FinderLinux.
+            Each platform should implement appropriate search strategies for finding
+            Stata installations in their typical locations (e.g., /Applications for macOS,
+            Program Files for Windows, system PATH for Linux).
+        """
+        ...
+    @abstractmethod
+    def find_path_base(self) -> Dict[str, List[str]]:
+        """简要描述函数功能"""
+        ...
+    @staticmethod
+    def priority() -> Dict[str, List[str]]:
+        """简要描述函数功能"""
+        name_priority = {
+            "mp": ["stata-mp"],
+            "se": ["stata-se"],
+            "be": ["stata-be"],
+            "default": ["stata"],
+        }
+        return name_priority
+    @staticmethod
+    def _is_executable(p: Path) -> bool:
+        """简要描述函数功能"""
+        try:
+            return p.is_file() and os.access(p, os.X_OK)
+        except OSError:
+            return False
+    def find_from_bin(self,
+                      *,
+                      priority: Optional[Iterable[str]] = None) -> List[StataEditionConfig]:
+        """
+        Find all available Stata executables in bin directories.
+        Args:
+            priority: Edition priority order (default: ["mp", "se", "be", "default"])
+        Returns:
+            List of all executable Stata paths found in bin directories,
+            ordered by priority. Returns empty list if no executables found.
+        """
+        pr = list(priority) if priority else ["mp", "se", "be", "default"]
+        name_priority = self.priority()
+        bins = self.find_path_base().get("bin")
+        if not bins:
+            return []
+        # Build ordered list of executable names by priority
+        ordered_names: List[str] = []
+        for key in pr:
+            ordered_names.extend(name_priority.get(key, []))
+        found_executables: List[StataEditionConfig] = []
+        # Search for executables in all bin directories
+        for b in bins:
+            base = Path(b)
+            # Check weather the bin directory exists
+            if not base.exists():
+                continue
+            for name in ordered_names:
+                p = base / name
+                if self._is_executable(p):
+                    # Convert path to StataEditionConfig
+                    config = StataEditionConfig.from_path(str(p))
+                    found_executables.append(config)
+        return found_executables