PyPI - panda-data - Versions diffs - 0.0.3__tar.gz → 0.0.4__tar.gz - Mend

panda-data 0.0.3tar.gz → 0.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{panda_data-0.0.3 → panda_data-0.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: panda_data
-Version: 0.0.3
+Version: 0.0.4
 Summary: PandaAI DataQuant
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

panda_data-0.0.4/panda_data/test.py ADDED Viewed

@@ -0,0 +1,30 @@
+import panda_data
+import time
+if __name__ == "__main__":
+    # 1. pip install panda_data
+    # 2. 账号为：86+官网注册的手机号（eg:8617777777777）,密码与官网同步
+    # 3. 接口文档见官方知识库
+    # panda_data.init_token(username="", password="")
+    # panda_data.init_token(username="super_data_user", password="panda@2026^_^", base_url="http://192.168.1.3:8180")
+    panda_data.init_token(username="kk111", password="lijingyu.", base_url="http://127.0.0.1:8180")
+    # 开始计时
+    start_time = time.time()
+    result = panda_data.get_market_min_data(
+        symbol="000001.SZ",
+        start_date="20250101",
+        end_date="20250131",
+        symbol_type="stock",
+        fields=["symbol", "date", "num_trades", "amount", "volume"],
+        frequency="1m",
+        time_zone=("10:00", "11:00")
+    )
+    print(result)
+    end_time = time.time()
+    print(f"耗时:{end_time - start_time}")
+    print(result)

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/transport/http.py RENAMED Viewed

@@ -11,7 +11,7 @@ import socket
 import threading
 import time
 from dataclasses import dataclass
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 from urllib.parse import urljoin, urlparse
 from urllib.request import (
     Request,
@@ -94,6 +94,7 @@ class HTTPClientConfig:
     proxy_username: Optional[str] = None
     proxy_password: Optional[str] = None
     use_gzip: bool = False  # 是否使用 gzip 压缩请求体
+    local_mode: bool = False  # 本地模式：为 True 时不读 token 文件，不鉴权
 class HTTPClient:
@@ -236,7 +237,7 @@ class HTTPClient:
                         if not self._establish_monitor_connection():
                             consecutive_failures += 1
                             if consecutive_failures >= max_failures:
-                                self._delete_token_file()
+                                # self._delete_token_file()
                                 break
                         else:
                             consecutive_failures = 0
@@ -254,7 +255,7 @@ class HTTPClient:
                         # 连接断开
                         consecutive_failures += 1
                         if consecutive_failures >= max_failures:
-                            self._delete_token_file()
+                            # self._delete_token_file()
                             # 清理连接
                             try:
                                 self._monitor_connection.close()
@@ -266,7 +267,7 @@ class HTTPClient:
                         # 其他异常，也视为连接问题
                         consecutive_failures += 1
                         if consecutive_failures >= max_failures:
-                            self._delete_token_file()
+                            # self._delete_token_file()
                             try:
                                 self._monitor_connection.close()
                             except Exception:
@@ -277,7 +278,7 @@ class HTTPClient:
             except Exception:
                 consecutive_failures += 1
                 if consecutive_failures >= max_failures:
-                    self._delete_token_file()
+                    # self._delete_token_file()
                     break
     def start_connection_monitoring(self) -> None:
@@ -350,8 +351,8 @@ class HTTPClient:
         if accept_encodings:
             headers["Accept-Encoding"] = ", ".join(accept_encodings)
-        # 添加从文件读取的token到Authorization header（登录接口除外）
-        if endpoint and not endpoint.__contains__("login"):
+        # 添加从文件读取的token到Authorization header（本地模式下跳过鉴权）
+        if not self._config.local_mode and endpoint and not endpoint.__contains__("login"):
             user_file_path = self._get_token_file_path()
             if not user_file_path:
                 raise ServiceError("无法确定token文件路径，请重新登录！")
@@ -450,9 +451,7 @@ class HTTPClient:
         if not content_encoding:
             return content
-        decompress_start = time.time()
         content_encoding_lower = content_encoding.lower()
-        compressed_size = len(content)
         try:
             if content_encoding_lower == "gzip":
@@ -499,55 +498,188 @@ class HTTPClient:
         except Exception as e:
             raise
+    def _stream_to_temp_file(self, response, content_encoding: str, tmp_file_path: str) -> int:
+        """
+        将 HTTP 响应流式解压写入临时文件，避免全量数据同时驻留内存。
+        大内存机器受益于更快的 I/O 吞吐；小内存机器（如 8GB）
+        因不将全量解压数据加载到内存而避免 OOM。
+        Returns:
+            写入文件的总字节数
+        """
+        _CHUNK = 512 * 1024
+        total_written = 0
+        encoding = content_encoding.lower() if content_encoding else ""
+        with open(tmp_file_path, 'wb') as out:
+            if encoding in ("zstd", "z-standard"):
+                if not HAS_ZSTD:
+                    raise ServiceError(
+                        "Response is compressed with zstd, but zstandard library is not installed. "
+                        "Please install it with: pip install zstandard"
+                    )
+                dctx = zstd.ZstdDecompressor()
+                reader = dctx.stream_reader(response, read_size=_CHUNK)
+                try:
+                    while True:
+                        chunk = reader.read(_CHUNK)
+                        if not chunk:
+                            break
+                        out.write(chunk)
+                        total_written += len(chunk)
+                finally:
+                    reader.close()
+            elif encoding == "gzip":
+                import zlib
+                decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
+                while True:
+                    compressed_chunk = response.read(_CHUNK)
+                    if not compressed_chunk:
+                        break
+                    decompressed = decompressor.decompress(compressed_chunk)
+                    if decompressed:
+                        out.write(decompressed)
+                        total_written += len(decompressed)
+                remaining = decompressor.flush()
+                if remaining:
+                    out.write(remaining)
+                    total_written += len(remaining)
+            else:
+                while True:
+                    chunk = response.read(_CHUNK)
+                    if not chunk:
+                        break
+                    out.write(chunk)
+                    total_written += len(chunk)
+        return total_written
+    @staticmethod
+    def _parquet_col_ci(columns: List[str], name: str) -> Optional[str]:
+        """按不区分大小写匹配列名，返回 Parquet/DuckDB 中的实际列名。"""
+        nl = name.lower()
+        for c in columns:
+            if c.lower() == nl:
+                return c
+        return None
+    def _infer_parquet_columns(self, conn, tmp_file_path: str) -> List[str]:
+        """读取 Parquet 列名；LIMIT 1 失败时用 pyarrow / DuckDB DESCRIBE 兜底，避免无 ORDER BY 时乱序。"""
+        try:
+            sample_result = conn.execute(
+                f"SELECT * FROM read_parquet('{tmp_file_path}') LIMIT 1"
+            )
+            return list(sample_result.df().columns)
+        except Exception:
+            pass
+        try:
+            import pyarrow.parquet as pq
+            return list(pq.read_schema(tmp_file_path).names)
+        except Exception:
+            pass
+        try:
+            desc = conn.execute(
+                f"DESCRIBE SELECT * FROM read_parquet('{tmp_file_path}')"
+            ).df()
+            return desc.iloc[:, 0].astype(str).tolist()
+        except Exception:
+            pass
+        return []
     def _parse_response(self, response, endpoint: str = "") -> Dict[str, Any]:
         """解析响应，支持标准JSON、流式响应（NDJSON格式）和Parquet格式"""
-        content = response.read()
         content_encoding = response.headers.get("Content-Encoding", "")
-        content = self._decompress_content(content, content_encoding)
-        # 获取 Content-Type 头
         content_type = response.headers.get("Content-Type", "")
-        # 检测是否是 Parquet 格式（通过 Content-Type、endpoint 或文件内容魔数）
-        # Parquet 文件以 "PAR1" 开头（前4个字节）
+        # 已知返回 Parquet 格式的接口关键词
+        _parquet_ep_kws = (
+            "getmultimarketmindata", "getfuturetickdata", "getfinancialstatementdata",
+            "getstockmarkethkdata", "getstockmarkethkmindata",
+            "getstockmarketusdata", "getstockmarketusmindata",
+        )
+        endpoint_lower = endpoint.lower()
+        is_known_parquet_ep = (
+                "parquet" in content_type.lower()
+                or "application/x-parquet" in content_type.lower()
+                or any(kw in endpoint_lower for kw in _parquet_ep_kws)
+        )
+        # 对于已知 Parquet 接口，流式写入临时文件以避免全量数据驻留内存
+        _streamed_tmp_path = None
+        _streamed_size_mb = 0.0
+        if is_known_parquet_ep:
+            import tempfile
+            import os
+            _tmp_fd, _streamed_tmp_path = tempfile.mkstemp(suffix='.parquet')
+            os.close(_tmp_fd)
+            try:
+                _total_written = self._stream_to_temp_file(
+                    response, content_encoding, _streamed_tmp_path
+                )
+            except Exception as _e:
+                try:
+                    os.remove(_streamed_tmp_path)
+                except Exception:
+                    pass
+                raise ServiceError(f"流式写入临时文件失败: {_e}") from _e
+            # 验证文件确实是 Parquet 格式（检查 "PAR1" 魔数）
+            _is_valid_parquet = False
+            if _total_written >= 4:
+                with open(_streamed_tmp_path, 'rb') as _f:
+                    _is_valid_parquet = _f.read(4) == b"PAR1"
+            if _is_valid_parquet:
+                _streamed_size_mb = _total_written / (1024 * 1024)
+                content = None
+            else:
+                # 非 Parquet（可能是 JSON 错误响应），回退到内存解析
+                with open(_streamed_tmp_path, 'rb') as _f:
+                    content = _f.read()
+                try:
+                    os.remove(_streamed_tmp_path)
+                except Exception:
+                    pass
+                _streamed_tmp_path = None
+        else:
+            content = response.read()
+            content = self._decompress_content(content, content_encoding)
+        # 非流式路径：通过文件魔数检测是否为 Parquet
         is_parquet_by_content = False
-        if len(content) >= 4:
-            # 检查文件魔数 "PAR1"
+        if content is not None and len(content) >= 4:
             is_parquet_by_content = content[:4] == b"PAR1"
-        is_parquet = (
-                "parquet" in content_type.lower() or
-                "application/x-parquet" in content_type.lower() or
-                "getmultimarketmindata" in endpoint.lower() or
-                "getfuturetickdata" in endpoint.lower() or
-                "getfinancialstatementdata" in endpoint.lower() or
-                "getfinancialstatementdailydata" in endpoint.lower() or
-                is_parquet_by_content
-        )
+        is_parquet = (_streamed_tmp_path is not None) or is_parquet_by_content
         # 如果是 Parquet 格式，使用 DuckDB 读取
         if is_parquet:
             if not HAS_DUCKDB or duckdb is None:
+                if _streamed_tmp_path:
+                    try:
+                        os.remove(_streamed_tmp_path)
+                    except Exception:
+                        pass
                 raise ServiceError(
                     "响应是 Parquet 格式，但 DuckDB 库未安装。"
                     "请安装: pip install duckdb"
                 )
-            parquet_start = time.time()
             global _active_parquet_reads
             try:
-                # 将字节内容写入临时文件（DuckDB 需要文件路径）
                 import tempfile
-                with tempfile.NamedTemporaryFile(delete=False, suffix='.parquet') as tmp_file:
-                    tmp_file.write(content)
-                    tmp_file_path = tmp_file.name
-                file_size_mb = len(content) / 1024 / 1024
+                if _streamed_tmp_path is not None:
+                    tmp_file_path = _streamed_tmp_path
+                    file_size_mb = _streamed_size_mb
+                else:
+                    with tempfile.NamedTemporaryFile(delete=False, suffix='.parquet') as tmp_file:
+                        tmp_file.write(content)
+                        tmp_file_path = tmp_file.name
+                    file_size_mb = len(content) / 1024 / 1024
+                    del content
-                # 获取信号量，限制最大并发HTTP请求数
                 _parquet_read_semaphore.acquire()
-                max_concurrent = _get_max_concurrent_parquet_reads()
                 with _parquet_read_lock:
                     _active_parquet_reads += 1
                     current_concurrency = _active_parquet_reads
@@ -556,7 +688,6 @@ class HTTPClient:
                     # 使用 DuckDB 读取 Parquet 文件
                     conn = duckdb.connect()
                     try:
-                        import os
                         try:
                             import psutil
                             HAS_PSUTIL = True
@@ -575,109 +706,69 @@ class HTTPClient:
                             available_memory_gb = 4.0
                             total_memory_gb = 8.0
+                        cpu_count = os.cpu_count() or 4
                         effective_concurrency = max(current_concurrency, 1)
-                        # 更保守的内存分配：使用更小的百分比，并保留安全边距
-                        # 当可用内存较低时，使用更小的百分比
-                        if available_memory_gb < 4:
-                            memory_percentage = 0.5  # 只使用50%的可用内存
-                        elif available_memory_gb < 8:
-                            memory_percentage = 0.6  # 使用60%的可用内存
+                        # Parquet 展开后通常是文件大小的 5~15 倍，排序还需要额外临时空间
+                        estimated_expanded_gb = file_size_mb * 10 / 1024
+                        estimated_need_gb = max(estimated_expanded_gb * 2.5, 1.0)
+                        # 根据总内存渐进式分配 DuckDB 内存预算：
+                        #   ≤8GB  → 20%（配合 temp_directory 溢写磁盘防 OOM）
+                        #   8~16GB → 20%~40% 线性过渡（平衡速度与安全）
+                        #   >16GB → 积极分配（充分利用内存提升速度）
+                        if total_memory_gb <= 8:
+                            per_conn_budget_gb = total_memory_gb * 0.20 / effective_concurrency
+                        elif total_memory_gb <= 16:
+                            _ratio = 0.20 + (total_memory_gb - 8) / 8 * 0.20
+                            per_conn_budget_gb = total_memory_gb * _ratio / effective_concurrency
                         else:
-                            memory_percentage = 0.7  # 使用70%的可用内存
+                            per_conn_budget_gb = max(
+                                total_memory_gb * 0.5,
+                                available_memory_gb * 0.9,
+                            ) / effective_concurrency
+                        cap_ratio = float(os.environ.get("PANDA_DATA_DUCKDB_MEMORY_CAP_RATIO", "0.65"))
+                        per_conn_budget_gb = min(
+                            per_conn_budget_gb,
+                            total_memory_gb * cap_ratio / effective_concurrency,
+                        )
+                        memory_limit_gb = min(estimated_need_gb, per_conn_budget_gb)
+                        memory_limit_gb = max(memory_limit_gb, 0.5)
+                        env_mem = os.environ.get("PANDA_DATA_DUCKDB_MEMORY_GB", "").strip()
+                        if env_mem:
+                            try:
+                                memory_limit_gb = max(0.5, float(env_mem))
+                            except ValueError:
+                                pass
-                        per_thread_memory_gb = (available_memory_gb / effective_concurrency) * memory_percentage
-                        max_per_thread_memory_gb = 6.0
-                        cpu_count = os.cpu_count() or 4
-                        max_threads_by_memory = max(1, int(available_memory_gb / 8))
-                        max_possible_threads = min(cpu_count, max_threads_by_memory)
-                        # 根据可用内存调整线程数和内存限制
-                        if file_size_mb > 1000:
-                            base_threads = 2
-                            if available_memory_gb > 32:
-                                max_threads_for_file = min(max_possible_threads, 4)
-                            elif available_memory_gb > 8:
-                                max_threads_for_file = min(max_possible_threads, 2)
-                            else:
-                                max_threads_for_file = 1  # 内存不足时只使用1个线程
-                            num_threads = max(1, min(cpu_count, max_threads_for_file))
-                            # 对于大文件，使用更保守的内存限制
-                            memory_limit_gb = min(per_thread_memory_gb, 1.5, max_per_thread_memory_gb)
-                            memory_limit = f"{max(memory_limit_gb, 0.5):.1f}GB"
-                        elif file_size_mb > 500:
-                            base_threads = 4
-                            if available_memory_gb > 32:
-                                max_threads_for_file = min(max_possible_threads, 8)
-                            elif available_memory_gb > 16:
-                                max_threads_for_file = min(max_possible_threads, 6)
-                            elif available_memory_gb > 8:
-                                max_threads_for_file = min(max_possible_threads, 4)
-                            else:
-                                max_threads_for_file = 2  # 内存不足时减少线程
-                            num_threads = max(1, min(cpu_count, max_threads_for_file))
-                            memory_limit_gb = min(per_thread_memory_gb, 2.0, max_per_thread_memory_gb)
-                            memory_limit = f"{max(memory_limit_gb, 0.5):.1f}GB"
-                        elif file_size_mb > 200:
-                            base_threads = 8
-                            if available_memory_gb > 64:
-                                max_threads_for_file = min(max_possible_threads, 16)
-                            elif available_memory_gb > 32:
-                                max_threads_for_file = min(max_possible_threads, 12)
-                            elif available_memory_gb > 8:
-                                max_threads_for_file = min(max_possible_threads, 8)
-                            else:
-                                max_threads_for_file = 4  # 内存不足时减少线程
-                            num_threads = max(1, min(cpu_count, max_threads_for_file))
-                            memory_limit_gb = min(per_thread_memory_gb, 2.0, max_per_thread_memory_gb)
-                            memory_limit = f"{max(memory_limit_gb, 0.5):.1f}GB"
+                        memory_limit = f"{memory_limit_gb:.1f}GB"
+                        # 线程：内存过小时少开线程；避免 available=4 时 int(4/4)=1 单核跑满
+                        env_thr = os.environ.get("PANDA_DATA_DUCKDB_THREADS", "").strip()
+                        if env_thr:
+                            try:
+                                num_threads = max(1, min(cpu_count, int(env_thr)))
+                            except ValueError:
+                                num_threads = min(
+                                    cpu_count,
+                                    max(2, min(16, int(memory_limit_gb // 1.2))),
+                                )
                         else:
-                            # 小文件（<=200MB）：使用更少的线程和内存
-                            if available_memory_gb > 128:
-                                max_threads_for_file = min(max_possible_threads, cpu_count)
-                                base_threads = 16
-                            elif available_memory_gb > 64:
-                                max_threads_for_file = min(max_possible_threads, min(cpu_count, 24))
-                                base_threads = 12
-                            elif available_memory_gb > 32:
-                                max_threads_for_file = min(max_possible_threads, 16)
-                                base_threads = 8
-                            elif available_memory_gb > 8:
-                                max_threads_for_file = min(max_possible_threads, 8)
-                                base_threads = 4
-                            else:
-                                # 低内存系统：但考虑到数据量可能很大（行数多），使用4个线程
-                                # 如果内存真的紧张，会在后续的内存限制中体现
-                                max_threads_for_file = min(max_possible_threads, 4)
-                                base_threads = 4
-                            num_threads = max(1, min(cpu_count, max_threads_for_file))
-                            # 小文件不需要太多内存，限制在1GB以内
-                            memory_limit_gb = min(per_thread_memory_gb, 1.0, max_per_thread_memory_gb)
-                            memory_limit = f"{max(memory_limit_gb, 0.5):.1f}GB"
-                        # 根据文件大小进一步调整内存限制（小文件不需要太多内存）
-                        if file_size_mb <= 100:
-                            # 对于小于100MB的文件，限制内存使用在512MB-1GB之间
-                            memory_limit_gb = min(memory_limit_gb, 1.0)
-                        elif file_size_mb <= 200:
-                            # 对于100-200MB的文件，限制在1.5GB以内
-                            memory_limit_gb = min(memory_limit_gb, 1.5)
-                        # 确保内存限制不超过可用内存的90%（保留安全边距）
-                        max_safe_memory_gb = available_memory_gb * 0.9
-                        memory_limit_gb = min(memory_limit_gb, max_safe_memory_gb)
-                        memory_limit = f"{max(memory_limit_gb, 0.5):.1f}GB"
-                        num_threads = max(num_threads, 1)
+                            num_threads = min(
+                                cpu_count,
+                                max(2, min(16, int(memory_limit_gb // 1.2))),
+                            )
+                        num_threads = max(1, num_threads)
                         conn.execute(f"SET threads TO {num_threads}")
                         conn.execute(f"SET memory_limit='{memory_limit}'")
-                        if file_size_mb <= 500:
-                            conn.execute("SET enable_object_cache TO true")
-                        else:
-                            conn.execute("SET enable_object_cache TO false")
+                        conn.execute("SET enable_object_cache TO true")
                         conn.execute("SET enable_progress_bar TO false")
                         conn.execute("SET preserve_insertion_order TO false")
+                        _td = tempfile.gettempdir()
+                        conn.execute(f"SET temp_directory='{_td}'")
                     except Exception as config_error:
-                        # 回退配置：使用更保守的设置以避免内存不足
                         num_threads = 1
                         memory_limit = "512MB"
                         try:
@@ -686,93 +777,95 @@ class HTTPClient:
                             conn.execute("SET enable_object_cache TO false")
                             conn.execute("SET enable_progress_bar TO false")
                             conn.execute("SET preserve_insertion_order TO false")
+                            try:
+                                _td = tempfile.gettempdir()
+                                conn.execute(f"SET temp_directory='{_td}'")
+                            except Exception:
+                                pass
                         except Exception:
                             pass
-                    # 读取列信息
-                    available_columns = []
-                    try:
-                        sample_result = conn.execute(f"SELECT * FROM read_parquet('{tmp_file_path}') LIMIT 1")
-                        sample_df = sample_result.df()
-                        available_columns = list(sample_df.columns)
-                    except Exception:
-                        pass
+                    # 读取列信息（LIMIT 1 失败时用 pyarrow schema 兜底，避免无 ORDER BY 时保持 Parquet 物理顺序）
+                    available_columns = self._infer_parquet_columns(conn, tmp_file_path)
                     # 构建查询语句
-                    # 判断 endpoint 类型并设置排序和列选择逻辑
+                    # 判断 endpoint 类型并设置排序和列选择逻辑（列名大小写不敏感，ORDER BY 使用实际列名）
                     if "getfuturetickdata" in endpoint.lower():
                         # 特殊处理：按照 symbol ASC, timestamp ASC 排序，并将 symbol, date, timestamp 放在前三列
-                        has_symbol = 'symbol' in available_columns
-                        has_timestamp = 'timestamp' in available_columns
-                        has_date = 'date' in available_columns
+                        sym_col = self._parquet_col_ci(available_columns, "symbol")
+                        ts_col = self._parquet_col_ci(available_columns, "timestamp")
+                        date_col = self._parquet_col_ci(available_columns, "date")
                         order_by_clause = ""
-                        if has_symbol and has_timestamp:
-                            order_by_clause = " ORDER BY symbol ASC, timestamp ASC"
-                        elif has_symbol:
-                            order_by_clause = " ORDER BY symbol ASC"
-                        elif has_timestamp:
-                            order_by_clause = " ORDER BY timestamp ASC"
-                        if has_symbol and has_date and has_timestamp:
-                            other_columns = [col for col in available_columns if
-                                             col not in ['symbol', 'date', 'timestamp']]
-                            select_columns = ['symbol', 'date', 'timestamp'] + other_columns
+                        if sym_col and ts_col:
+                            order_by_clause = f' ORDER BY "{sym_col}" ASC, "{ts_col}" ASC'
+                        elif sym_col:
+                            order_by_clause = f' ORDER BY "{sym_col}" ASC'
+                        elif ts_col:
+                            order_by_clause = f' ORDER BY "{ts_col}" ASC'
+                        if sym_col and date_col and ts_col:
+                            other_columns = [
+                                col
+                                for col in available_columns
+                                if col not in (sym_col, date_col, ts_col)
+                            ]
+                            select_columns = [sym_col, date_col, ts_col] + other_columns
                             select_clause = ", ".join([f'"{col}"' for col in select_columns])
-                        elif has_symbol and has_date:
-                            other_columns = [col for col in available_columns if col not in ['symbol', 'date']]
-                            select_columns = ['symbol', 'date'] + other_columns
+                        elif sym_col and date_col:
+                            other_columns = [
+                                col for col in available_columns if col not in (sym_col, date_col)
+                            ]
+                            select_columns = [sym_col, date_col] + other_columns
                             select_clause = ", ".join([f'"{col}"' for col in select_columns])
-                        elif has_symbol:
-                            other_columns = [col for col in available_columns if col != 'symbol']
-                            select_columns = ['symbol'] + other_columns
+                        elif sym_col:
+                            other_columns = [col for col in available_columns if col != sym_col]
+                            select_columns = [sym_col] + other_columns
                             select_clause = ", ".join([f'"{col}"' for col in select_columns])
                         else:
                             select_clause = "*"
                     else:
-                        # 默认处理：保持原来的逻辑
-                        has_symbol = 'symbol' in available_columns
-                        has_date = 'date' in available_columns
-                        has_minute = 'minute' in available_columns
+                        # 默认：日频/分钟等行情——优先 symbol，再 date / minute 降序
+                        sym_col = self._parquet_col_ci(available_columns, "symbol")
+                        date_col = self._parquet_col_ci(available_columns, "date")
+                        min_col = self._parquet_col_ci(available_columns, "minute")
                         order_by_clause = ""
-                        if has_symbol and has_date and has_minute:
-                            order_by_clause = " ORDER BY symbol ASC, date DESC, minute DESC"
-                        elif has_symbol and has_date:
-                            order_by_clause = " ORDER BY symbol ASC, date DESC"
-                        elif has_symbol:
-                            order_by_clause = " ORDER BY symbol ASC"
-                        elif has_date and has_minute:
-                            order_by_clause = " ORDER BY date DESC, minute DESC"
-                        elif has_date:
-                            order_by_clause = " ORDER BY date DESC"
-                        elif has_minute:
-                            order_by_clause = " ORDER BY minute DESC"
-                        if has_symbol and has_date:
-                            other_columns = [col for col in available_columns if col not in ['symbol', 'date']]
-                            select_columns = ['symbol', 'date'] + other_columns
+                        if sym_col and date_col and min_col:
+                            order_by_clause = (
+                                f' ORDER BY "{sym_col}" ASC, "{date_col}" DESC, "{min_col}" DESC'
+                            )
+                        elif sym_col and date_col:
+                            order_by_clause = f' ORDER BY "{sym_col}" ASC, "{date_col}" DESC'
+                        elif sym_col:
+                            order_by_clause = f' ORDER BY "{sym_col}" ASC'
+                        elif date_col and min_col:
+                            order_by_clause = f' ORDER BY "{date_col}" DESC, "{min_col}" DESC'
+                        elif date_col:
+                            order_by_clause = f' ORDER BY "{date_col}" DESC'
+                        elif min_col:
+                            order_by_clause = f' ORDER BY "{min_col}" DESC'
+                        if sym_col and date_col:
+                            other_columns = [
+                                col for col in available_columns if col not in (sym_col, date_col)
+                            ]
+                            select_columns = [sym_col, date_col] + other_columns
                             select_clause = ", ".join([f'"{col}"' for col in select_columns])
-                        elif has_symbol:
-                            other_columns = [col for col in available_columns if col != 'symbol']
-                            select_columns = ['symbol'] + other_columns
+                        elif sym_col:
+                            other_columns = [col for col in available_columns if col != sym_col]
+                            select_columns = [sym_col] + other_columns
                             select_clause = ", ".join([f'"{col}"' for col in select_columns])
-                        elif has_date:
-                            other_columns = [col for col in available_columns if col != 'date']
-                            select_columns = ['date'] + other_columns
+                        elif date_col:
+                            other_columns = [col for col in available_columns if col != date_col]
+                            select_columns = [date_col] + other_columns
                             select_clause = ", ".join([f'"{col}"' for col in select_columns])
                         else:
                             select_clause = "*"
                     # 执行主查询
                     query = f"SELECT {select_clause} FROM read_parquet('{tmp_file_path}'){order_by_clause}"
-                    result = conn.execute(query)
-                    # 转换为DataFrame（优先使用Arrow格式优化性能）
-                    use_arrow = False
-                    df = None
-                    # 检查是否可以使用Arrow格式
                     try:
                         import pyarrow as pa
                         HAS_PYARROW = True
@@ -781,47 +874,22 @@ class HTTPClient:
                         pa = None
                     if HAS_PYARROW:
-                        # 方法1: 尝试使用Arrow格式（通常比直接df()快2-5倍）
+                        result = conn.execute(query)
                         try:
                             arrow_result = result.arrow()
-                            # DuckDB的arrow()可能返回Table或RecordBatchReader
                             if isinstance(arrow_result, pa.Table):
-                                # 如果是Table，直接转换
                                 df = arrow_result.to_pandas()
                             elif isinstance(arrow_result, pa.RecordBatchReader):
-                                # 如果是RecordBatchReader，读取所有批次
-                                arrow_table = arrow_result.read_all()
-                                df = arrow_table.to_pandas()
-                            elif hasattr(arrow_result, 'to_pandas'):
-                                # 某些版本可能直接支持to_pandas
+                                df = arrow_result.read_all().to_pandas()
+                            elif hasattr(arrow_result, "to_pandas"):
                                 df = arrow_result.to_pandas()
-                            elif hasattr(arrow_result, 'read_all'):
-                                # 尝试使用read_all方法
-                                arrow_table = arrow_result.read_all()
-                                df = arrow_table.to_pandas()
                             else:
-                                # 未知类型，尝试作为迭代器读取批次
-                                try:
-                                    batches = list(arrow_result)
-                                    if batches:
-                                        arrow_table = pa.Table.from_batches(batches)
-                                        df = arrow_table.to_pandas()
-                                    else:
-                                        raise ValueError("No batches in RecordBatchReader")
-                                except Exception as e:
-                                    raise ValueError(
-                                        f"Cannot convert arrow_result to pandas: {type(arrow_result)}, error: {e}")
-                            use_arrow = True
+                                raise TypeError(f"unexpected arrow result type: {type(arrow_result)}")
                         except Exception:
-                            # Arrow转换失败，需要重新执行查询
-                            # 重新执行查询（因为result已经被消耗）
-                            result_retry = conn.execute(query)
-                            df = result_retry.df()
+                            result = conn.execute(query)
+                            df = result.df()
                     else:
-                        # 方法2: 直接使用df()（如果没有pyarrow）
-                        df = result.df()
+                        df = conn.execute(query).df()
                     # 确保df已正确赋值
                     if df is None:
@@ -831,7 +899,6 @@ class HTTPClient:
                     # 清理临时文件
                     try:
-                        import os
                         if os.path.exists(tmp_file_path):
                             os.remove(tmp_file_path)
                     except Exception as e:
@@ -1062,7 +1129,6 @@ class HTTPClient:
         for attempt in range(retries):
             try:
-                # 发送请求并等待响应
                 response = self._opener.open(request, timeout=timeout_value)
                 return self._parse_response(response, endpoint)

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: panda_data
-Version: 0.0.3
+Version: 0.0.4
 Summary: PandaAI DataQuant
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

{panda_data-0.0.3 → panda_data-0.0.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "panda_data"
-version = "0.0.3"
+version = "0.0.4"
 description = "PandaAI DataQuant"
 readme = "README.md"
 requires-python = ">=3.10"

panda_data-0.0.3/panda_data/test.py DELETED Viewed

@@ -1,27 +0,0 @@
-import panda_data
-import time
-if __name__ == "__main__":
-    # 1. pip install panda_data
-    # 2. 账号为：86+官网注册的手机号（eg:8617777777777）,密码与官网同步
-    # 3. 接口文档见官方知识库
-    panda_data.init_token(username="", password="")
-    # 开始计时
-    start_time = time.time()
-    result = panda_data.get_market_data(
-        symbol="000001.SZ",
-        start_date="20200101",
-        end_date="20250101",
-        # fields = ["open"],
-        type="stock"
-    )
-    print(result)
-    end_time = time.time()
-    print(f"耗时:{end_time - start_time}")
-    print(result)

{panda_data-0.0.3 → panda_data-0.0.4}/README.md RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/__init__.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/client.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/config/__init__.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/core/__init__.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/core/service.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/exceptions.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/__init__.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/financial_and_factors_reader.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/future_reader.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/init_token.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/market_reader.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/market_reference_reader.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/readers/trading_tools_reader.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/transport/__init__.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/utils/common_utils.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data/utils/param_check_utils.py RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data.egg-info/requires.txt RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/panda_data.egg-info/top_level.txt RENAMED Viewed

File without changes

{panda_data-0.0.3 → panda_data-0.0.4}/setup.cfg RENAMED Viewed

File without changes

panda-data 0.0.3__tar.gz → 0.0.4__tar.gz

panda-data 0.0.3tar.gz → 0.0.4tar.gz