mmcqdata 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .DS_Store
5
+ .mypy_cache/
6
+ .pytest_cache/
7
+ .venv/
8
+ build/
9
+ dist/
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.4
2
+ Name: mmcqdata
3
+ Version: 0.1.0
4
+ Summary: Python SDK for querying StarRocks data.
5
+ Author: MMCQ
6
+ Keywords: data,sdk,sql,starrocks
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Database
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Requires-Python: >=3.10
16
+ Requires-Dist: pymysql
17
+ Provides-Extra: pandas
18
+ Requires-Dist: pandas; extra == 'pandas'
19
+ Provides-Extra: release
20
+ Requires-Dist: build; extra == 'release'
21
+ Requires-Dist: twine; extra == 'release'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # mmcqdata
25
+
26
+ `mmcqdata` 是一个面向 StarRocks 查询场景的轻量 Python SDK,提供连接管理、SQL 查询和可选的 `pandas` 输出能力。
27
+
28
+ ## 安装
29
+
30
+ 安装基础版本:
31
+
32
+ ```bash
33
+ pip install mmcqdata
34
+ ```
35
+
36
+ 如果需要直接返回 `pandas.DataFrame`:
37
+
38
+ ```bash
39
+ pip install "mmcqdata[pandas]"
40
+ ```
41
+
42
+ ## 快速开始
43
+
44
+ ```python
45
+ from mmcqdata import StarRocksClient, StarRocksConfig
46
+
47
+ config = StarRocksConfig(
48
+ host="starrocks.example.com",
49
+ port=9030,
50
+ user="readonly_user",
51
+ password="secret",
52
+ database="analytics",
53
+ )
54
+
55
+ with StarRocksClient(config) as client:
56
+ rows = client.query(
57
+ "SELECT trade_date, symbol, close FROM daily_bar WHERE symbol = %s LIMIT 5",
58
+ ("600519.SH",),
59
+ )
60
+ print(rows)
61
+ ```
62
+
63
+ ## 环境变量初始化
64
+
65
+ 也可以通过环境变量创建客户端:
66
+
67
+ ```bash
68
+ export MMCQDATA_HOST="starrocks.example.com"
69
+ export MMCQDATA_PORT="9030"
70
+ export MMCQDATA_USER="readonly_user"
71
+ export MMCQDATA_PASSWORD="secret"
72
+ export MMCQDATA_DATABASE="analytics"
73
+ ```
74
+
75
+ ```python
76
+ from mmcqdata import StarRocksClient
77
+
78
+ with StarRocksClient.from_env() as client:
79
+ row = client.query_one("SELECT 1 AS ok")
80
+ print(row)
81
+ ```
82
+
83
+ 默认读取以下环境变量:
84
+
85
+ - `MMCQDATA_HOST`
86
+ - `MMCQDATA_PORT`
87
+ - `MMCQDATA_USER`
88
+ - `MMCQDATA_PASSWORD`
89
+ - `MMCQDATA_DATABASE`
90
+ - `MMCQDATA_CONNECT_TIMEOUT`
91
+ - `MMCQDATA_READ_TIMEOUT`
92
+ - `MMCQDATA_WRITE_TIMEOUT`
93
+
94
+ ## 可用接口
95
+
96
+ - `StarRocksClient.query()`:返回多行字典结果。
97
+ - `StarRocksClient.query_one()`:返回单行字典结果。
98
+ - `StarRocksClient.execute()`:执行 SQL 并返回影响行数。
99
+ - `StarRocksClient.query_dataframe()`:返回 `pandas.DataFrame`。
100
+ - `StarRocksClient.ping()`:检查连接是否可用。
101
+
102
+ ## 发布到 PyPI
103
+
104
+ 仓库里已经提供了发布脚本 `scripts/publish.sh`。该脚本会:
105
+
106
+ 1. 安装发布需要的工具。
107
+ 2. 构建源码包和 wheel。
108
+ 3. 执行 `twine check`。
109
+ 4. 使用环境变量中的 `PYPI_TOKEN` 上传到 PyPI。
110
+
111
+ 使用方式:
112
+
113
+ ```bash
114
+ cd /Users/water/Documents/workspace/quant/repos/mmcqdata
115
+ export PYPI_TOKEN="pypi-xxxxxxxxxxxxxxxx"
116
+ ./scripts/publish.sh
117
+ ```
118
+
119
+ 也可以执行:
120
+
121
+ ```bash
122
+ bash scripts/publish.sh
123
+ ```
124
+
125
+ 不建议使用:
126
+
127
+ ```bash
128
+ sh scripts/publish.sh
129
+ ```
130
+
131
+ 如果只想先试传到 TestPyPI,可以额外设置:
132
+
133
+ ```bash
134
+ export PYPI_REPOSITORY_URL="https://test.pypi.org/legacy/"
135
+ ./scripts/publish.sh
136
+ ```
137
+
138
+ 注意:不要把真实 token 写进仓库文件,直接放在终端环境变量里即可。
@@ -0,0 +1,115 @@
1
+ # mmcqdata
2
+
3
+ `mmcqdata` 是一个面向 StarRocks 查询场景的轻量 Python SDK,提供连接管理、SQL 查询和可选的 `pandas` 输出能力。
4
+
5
+ ## 安装
6
+
7
+ 安装基础版本:
8
+
9
+ ```bash
10
+ pip install mmcqdata
11
+ ```
12
+
13
+ 如果需要直接返回 `pandas.DataFrame`:
14
+
15
+ ```bash
16
+ pip install "mmcqdata[pandas]"
17
+ ```
18
+
19
+ ## 快速开始
20
+
21
+ ```python
22
+ from mmcqdata import StarRocksClient, StarRocksConfig
23
+
24
+ config = StarRocksConfig(
25
+ host="starrocks.example.com",
26
+ port=9030,
27
+ user="readonly_user",
28
+ password="secret",
29
+ database="analytics",
30
+ )
31
+
32
+ with StarRocksClient(config) as client:
33
+ rows = client.query(
34
+ "SELECT trade_date, symbol, close FROM daily_bar WHERE symbol = %s LIMIT 5",
35
+ ("600519.SH",),
36
+ )
37
+ print(rows)
38
+ ```
39
+
40
+ ## 环境变量初始化
41
+
42
+ 也可以通过环境变量创建客户端:
43
+
44
+ ```bash
45
+ export MMCQDATA_HOST="starrocks.example.com"
46
+ export MMCQDATA_PORT="9030"
47
+ export MMCQDATA_USER="readonly_user"
48
+ export MMCQDATA_PASSWORD="secret"
49
+ export MMCQDATA_DATABASE="analytics"
50
+ ```
51
+
52
+ ```python
53
+ from mmcqdata import StarRocksClient
54
+
55
+ with StarRocksClient.from_env() as client:
56
+ row = client.query_one("SELECT 1 AS ok")
57
+ print(row)
58
+ ```
59
+
60
+ 默认读取以下环境变量:
61
+
62
+ - `MMCQDATA_HOST`
63
+ - `MMCQDATA_PORT`
64
+ - `MMCQDATA_USER`
65
+ - `MMCQDATA_PASSWORD`
66
+ - `MMCQDATA_DATABASE`
67
+ - `MMCQDATA_CONNECT_TIMEOUT`
68
+ - `MMCQDATA_READ_TIMEOUT`
69
+ - `MMCQDATA_WRITE_TIMEOUT`
70
+
71
+ ## 可用接口
72
+
73
+ - `StarRocksClient.query()`:返回多行字典结果。
74
+ - `StarRocksClient.query_one()`:返回单行字典结果。
75
+ - `StarRocksClient.execute()`:执行 SQL 并返回影响行数。
76
+ - `StarRocksClient.query_dataframe()`:返回 `pandas.DataFrame`。
77
+ - `StarRocksClient.ping()`:检查连接是否可用。
78
+
79
+ ## 发布到 PyPI
80
+
81
+ 仓库里已经提供了发布脚本 `scripts/publish.sh`。该脚本会:
82
+
83
+ 1. 安装发布需要的工具。
84
+ 2. 构建源码包和 wheel。
85
+ 3. 执行 `twine check`。
86
+ 4. 使用环境变量中的 `PYPI_TOKEN` 上传到 PyPI。
87
+
88
+ 使用方式:
89
+
90
+ ```bash
91
+ cd /Users/water/Documents/workspace/quant/repos/mmcqdata
92
+ export PYPI_TOKEN="pypi-xxxxxxxxxxxxxxxx"
93
+ ./scripts/publish.sh
94
+ ```
95
+
96
+ 也可以执行:
97
+
98
+ ```bash
99
+ bash scripts/publish.sh
100
+ ```
101
+
102
+ 不建议使用:
103
+
104
+ ```bash
105
+ sh scripts/publish.sh
106
+ ```
107
+
108
+ 如果只想先试传到 TestPyPI,可以额外设置:
109
+
110
+ ```bash
111
+ export PYPI_REPOSITORY_URL="https://test.pypi.org/legacy/"
112
+ ./scripts/publish.sh
113
+ ```
114
+
115
+ 注意:不要把真实 token 写进仓库文件,直接放在终端环境变量里即可。
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mmcqdata"
7
+ version = "0.1.0"
8
+ description = "Python SDK for querying StarRocks data."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ authors = [
12
+ { name = "MMCQ" }
13
+ ]
14
+ dependencies = [
15
+ "pymysql",
16
+ ]
17
+ keywords = ["starrocks", "sql", "sdk", "data"]
18
+ classifiers = [
19
+ "Development Status :: 3 - Alpha",
20
+ "Intended Audience :: Developers",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Database",
26
+ "Topic :: Software Development :: Libraries :: Python Modules",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ pandas = [
31
+ "pandas",
32
+ ]
33
+ release = [
34
+ "build",
35
+ "twine",
36
+ ]
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/mmcqdata"]
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env bash
2
+
3
+ if [ -z "${BASH_VERSION:-}" ]; then
4
+ exec bash "$0" "$@"
5
+ fi
6
+
7
+ set -euo pipefail
8
+
9
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
10
+ cd "$ROOT_DIR"
11
+
12
+ if [[ -z "${PYPI_TOKEN:-}" ]]; then
13
+ echo "Missing PYPI_TOKEN. Example: export PYPI_TOKEN='pypi-xxxxxxxx'" >&2
14
+ exit 1
15
+ fi
16
+
17
+ python3 -m pip install --upgrade build twine
18
+ rm -rf build dist src/mmcqdata.egg-info
19
+ python3 -m build
20
+ python3 -m twine check dist/*
21
+
22
+ if [[ -n "${PYPI_REPOSITORY_URL:-}" ]]; then
23
+ TWINE_USERNAME="__token__" \
24
+ TWINE_PASSWORD="$PYPI_TOKEN" \
25
+ python3 -m twine upload --repository-url "$PYPI_REPOSITORY_URL" dist/*
26
+ else
27
+ TWINE_USERNAME="__token__" \
28
+ TWINE_PASSWORD="$PYPI_TOKEN" \
29
+ python3 -m twine upload dist/*
30
+ fi
@@ -0,0 +1,13 @@
1
+ from .client import StarRocksClient
2
+ from .config import StarRocksConfig
3
+ from .exceptions import ConfigurationError, MMCQDataError, QueryError
4
+
5
+ __all__ = [
6
+ "ConfigurationError",
7
+ "MMCQDataError",
8
+ "QueryError",
9
+ "StarRocksClient",
10
+ "StarRocksConfig",
11
+ ]
12
+
13
+ __version__ = "0.1.0"
@@ -0,0 +1,129 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
4
+ from contextlib import contextmanager
5
+ from typing import Any, Iterator, TypeAlias
6
+
7
+ import pymysql
8
+ from pymysql.cursors import DictCursor
9
+
10
+ from .config import StarRocksConfig
11
+ from .exceptions import QueryError
12
+
13
+ QueryParams: TypeAlias = Sequence[Any] | Mapping[str, Any]
14
+
15
+
16
+ class StarRocksClient:
17
+ def __init__(self, config: StarRocksConfig, *, connect: bool = True) -> None:
18
+ self.config = config
19
+ self._connection: pymysql.connections.Connection | None = None
20
+
21
+ if connect:
22
+ self.open()
23
+
24
+ @classmethod
25
+ def from_env(
26
+ cls,
27
+ prefix: str = "MMCQDATA_",
28
+ *,
29
+ connect: bool = True,
30
+ ) -> "StarRocksClient":
31
+ return cls(StarRocksConfig.from_env(prefix=prefix), connect=connect)
32
+
33
+ def open(self) -> None:
34
+ if self._connection is not None and self._connection.open:
35
+ return
36
+
37
+ try:
38
+ self._connection = pymysql.connect(
39
+ **self.config.to_pymysql_kwargs(),
40
+ cursorclass=DictCursor,
41
+ )
42
+ except pymysql.MySQLError as exc:
43
+ raise QueryError("Failed to connect to StarRocks.") from exc
44
+
45
+ def close(self) -> None:
46
+ if self._connection is not None:
47
+ self._connection.close()
48
+ self._connection = None
49
+
50
+ def ping(self, reconnect: bool = True) -> None:
51
+ if self._connection is None:
52
+ self.open()
53
+
54
+ if self._connection is None:
55
+ raise QueryError("Connection was not initialized.")
56
+
57
+ try:
58
+ self._connection.ping(reconnect=reconnect)
59
+ except pymysql.MySQLError as exc:
60
+ raise QueryError("Failed to ping StarRocks.") from exc
61
+
62
+ @contextmanager
63
+ def cursor(self) -> Iterator[Any]:
64
+ if self._connection is None or not self._connection.open:
65
+ self.open()
66
+
67
+ if self._connection is None:
68
+ raise QueryError("Connection was not initialized.")
69
+
70
+ cursor = self._connection.cursor()
71
+ try:
72
+ yield cursor
73
+ except pymysql.MySQLError as exc:
74
+ raise QueryError("SQL execution failed.") from exc
75
+ finally:
76
+ cursor.close()
77
+
78
+ def query(
79
+ self,
80
+ sql: str,
81
+ params: QueryParams | None = None,
82
+ ) -> list[dict[str, Any]]:
83
+ with self.cursor() as cursor:
84
+ cursor.execute(sql, params)
85
+ rows = cursor.fetchall()
86
+
87
+ return [dict(row) for row in rows]
88
+
89
+ def query_one(
90
+ self,
91
+ sql: str,
92
+ params: QueryParams | None = None,
93
+ ) -> dict[str, Any] | None:
94
+ with self.cursor() as cursor:
95
+ cursor.execute(sql, params)
96
+ row = cursor.fetchone()
97
+
98
+ return dict(row) if row is not None else None
99
+
100
+ def execute(
101
+ self,
102
+ sql: str,
103
+ params: QueryParams | None = None,
104
+ ) -> int:
105
+ with self.cursor() as cursor:
106
+ affected_rows = cursor.execute(sql, params)
107
+
108
+ return int(affected_rows)
109
+
110
+ def query_dataframe(
111
+ self,
112
+ sql: str,
113
+ params: QueryParams | None = None,
114
+ ) -> Any:
115
+ try:
116
+ import pandas as pd
117
+ except ImportError as exc:
118
+ raise QueryError(
119
+ "pandas is not installed. Please run: pip install 'mmcqdata[pandas]'"
120
+ ) from exc
121
+
122
+ return pd.DataFrame(self.query(sql, params))
123
+
124
+ def __enter__(self) -> "StarRocksClient":
125
+ self.open()
126
+ return self
127
+
128
+ def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
129
+ self.close()
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import os
5
+ from typing import Any
6
+
7
+ from .exceptions import ConfigurationError
8
+
9
+
10
+ def _parse_optional_int(value: str | None, *, field_name: str) -> int | None:
11
+ if value in (None, ""):
12
+ return None
13
+
14
+ try:
15
+ return int(value)
16
+ except ValueError as exc:
17
+ raise ConfigurationError(f"{field_name} must be an integer.") from exc
18
+
19
+
20
+ @dataclass(slots=True)
21
+ class StarRocksConfig:
22
+ host: str
23
+ user: str
24
+ password: str = ""
25
+ port: int = 9030
26
+ database: str | None = None
27
+ charset: str = "utf8mb4"
28
+ connect_timeout: int = 10
29
+ read_timeout: int | None = None
30
+ write_timeout: int | None = None
31
+ autocommit: bool = True
32
+
33
+ @classmethod
34
+ def from_env(cls, prefix: str = "MMCQDATA_") -> "StarRocksConfig":
35
+ host = os.getenv(f"{prefix}HOST")
36
+ user = os.getenv(f"{prefix}USER")
37
+
38
+ if not host:
39
+ raise ConfigurationError(f"Missing environment variable: {prefix}HOST")
40
+ if not user:
41
+ raise ConfigurationError(f"Missing environment variable: {prefix}USER")
42
+
43
+ port_text = os.getenv(f"{prefix}PORT", "9030")
44
+ connect_timeout_text = os.getenv(f"{prefix}CONNECT_TIMEOUT", "10")
45
+
46
+ try:
47
+ port = int(port_text)
48
+ except ValueError as exc:
49
+ raise ConfigurationError(f"{prefix}PORT must be an integer.") from exc
50
+
51
+ try:
52
+ connect_timeout = int(connect_timeout_text)
53
+ except ValueError as exc:
54
+ raise ConfigurationError(
55
+ f"{prefix}CONNECT_TIMEOUT must be an integer."
56
+ ) from exc
57
+
58
+ return cls(
59
+ host=host,
60
+ port=port,
61
+ user=user,
62
+ password=os.getenv(f"{prefix}PASSWORD", ""),
63
+ database=os.getenv(f"{prefix}DATABASE") or None,
64
+ connect_timeout=connect_timeout,
65
+ read_timeout=_parse_optional_int(
66
+ os.getenv(f"{prefix}READ_TIMEOUT"),
67
+ field_name=f"{prefix}READ_TIMEOUT",
68
+ ),
69
+ write_timeout=_parse_optional_int(
70
+ os.getenv(f"{prefix}WRITE_TIMEOUT"),
71
+ field_name=f"{prefix}WRITE_TIMEOUT",
72
+ ),
73
+ )
74
+
75
+ def to_pymysql_kwargs(self) -> dict[str, Any]:
76
+ kwargs: dict[str, Any] = {
77
+ "host": self.host,
78
+ "port": self.port,
79
+ "user": self.user,
80
+ "password": self.password,
81
+ "charset": self.charset,
82
+ "connect_timeout": self.connect_timeout,
83
+ "autocommit": self.autocommit,
84
+ }
85
+
86
+ if self.database:
87
+ kwargs["database"] = self.database
88
+ if self.read_timeout is not None:
89
+ kwargs["read_timeout"] = self.read_timeout
90
+ if self.write_timeout is not None:
91
+ kwargs["write_timeout"] = self.write_timeout
92
+
93
+ return kwargs
@@ -0,0 +1,10 @@
1
+ class MMCQDataError(Exception):
2
+ """Base exception for the mmcqdata package."""
3
+
4
+
5
+ class ConfigurationError(MMCQDataError):
6
+ """Raised when the client configuration is invalid."""
7
+
8
+
9
+ class QueryError(MMCQDataError):
10
+ """Raised when StarRocks query execution fails."""