PyPI - cnhkmcp - Versions diffs - 2.1.2__py3-none-any.whl → 2.1.3__py3-none-any.whl - Mend

cnhkmcp 2.1.2py3-none-any.whl → 2.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py DELETED Viewed

@@ -1,252 +0,0 @@
-import ace_lib
-import pandas as pd
-import json
-import openai
-import os
-import sys
-import time
-import random
-# Default Moonshot Configuration
-DEFAULT_MOONSHOT_BASE_URL = "https://api.moonshot.cn/v1"
-DEFAULT_MOONSHOT_MODEL = "kimi-k2-turbo-preview"
-def get_llm_client(api_key, base_url):
-    return openai.OpenAI(
-        api_key=api_key,
-        base_url=base_url,
-    )
-def test_llm_connection(api_key, base_url, model):
-    print("\nTesting LLM connection...")
-    client = get_llm_client(api_key, base_url)
-    try:
-        client.chat.completions.create(
-            model=model,
-            messages=[{"role": "user", "content": "Hello"}],
-            max_tokens=5
-        )
-        print("LLM connection successful.")
-        return True
-    except Exception as e:
-        print(f"LLM connection failed: {e}")
-        return False
-def call_llm_with_retry(client, model, system_prompt, operators_df, datafields_df, dataset_id, max_retries=5):
-    n_ops = len(operators_df)
-    n_fields = len(datafields_df)
-    for attempt in range(max_retries + 1):
-        print(f"\nAttempt {attempt + 1}/{max_retries + 1} - Preparing prompt with {n_ops} operators and {n_fields} datafields...")
-        # Sample rows if needed, otherwise take head
-        # Using head for stability, but could be random sample
-        ops_subset = operators_df.head(n_ops)
-        fields_subset = datafields_df.head(n_fields)
-        operators_info = ops_subset[['name', 'category', 'description', 'extra_side_note']].to_string()
-        datafields_info = fields_subset[['id', 'description', 'subcategory']].to_string()
-        user_prompt = f"""
-Here is the information about available operators (first {n_ops} rows):
-{operators_info}
-Here is the information about the dataset '{dataset_id}' (first {n_fields} rows):
-{datafields_info}
-Please come up with several Alpha templates based on this information.
-Specify the AI answer in Chinese.
-"""
-        try:
-            print("Sending request to LLM...")
-            completion = client.chat.completions.create(
-                model=model,
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt}
-                ],
-                temperature=0.3,
-            )
-            return completion.choices[0].message.content
-        except openai.BadRequestError as e:
-            error_msg = str(e)
-            print(f"LLM Bad Request Error: {error_msg}")
-            # Check for token limit error
-            if "token limit" in error_msg or "context_length_exceeded" in error_msg or "400" in error_msg:
-                print("Token limit exceeded. Reducing context size...")
-                n_ops = max(1, n_ops // 2)
-                n_fields = max(1, n_fields // 2)
-                if n_ops == 1 and n_fields == 1:
-                    print("Cannot reduce context further.")
-                    return f"Failed after retries: {e}"
-            else:
-                return f"LLM Error (not token related): {e}"
-        except Exception as e:
-            return f"General Error calling LLM: {e}"
-    return "Max retries exceeded."
-def main():
-    print("=== BRAIN Alpha Generator Full Version ===\n")
-    # 1. Interactive Login
-    print("--- Step 1: Login to BRAIN ---")
-    email = input("Enter BRAIN Email: ").strip()
-    while not email:
-        email = input("Email is required. Enter BRAIN Email: ").strip()
-    import getpass
-    password = getpass.getpass("Enter BRAIN Password: ").strip()
-    while not password:
-        password = getpass.getpass("Password is required. Enter BRAIN Password: ").strip()
-    # Monkeypatch ace_lib.get_credentials to use provided inputs
-    ace_lib.get_credentials = lambda: (email, password)
-    print("Logging in...")
-    try:
-        s = ace_lib.start_session()
-        print("Login successful.")
-    except Exception as e:
-        print(f"Login failed: {e}")
-        return
-    # 2. LLM Configuration
-    print("\n--- Step 2: LLM Configuration ---")
-    base_url = input(f"Enter LLM Base URL (default: {DEFAULT_MOONSHOT_BASE_URL}): ").strip()
-    if not base_url:
-        base_url = DEFAULT_MOONSHOT_BASE_URL
-    api_key = input("Enter LLM API Key (required): ").strip()
-    while not api_key:
-        print("API Key is required.")
-        api_key = input("Enter LLM API Key: ").strip()
-    model_name = input(f"Enter LLM Model Name (default: {DEFAULT_MOONSHOT_MODEL}): ").strip()
-    if not model_name:
-        model_name = DEFAULT_MOONSHOT_MODEL
-    if not test_llm_connection(api_key, base_url, model_name):
-        print("Aborting due to LLM connection failure.")
-        return
-    llm_client = get_llm_client(api_key, base_url)
-    # 3. Load Operators
-    print("\n--- Step 3: Load Operators ---")
-    print("Getting operators...")
-    try:
-        operators_df = ace_lib.get_operators(s)
-        operators_df = operators_df[operators_df['scope'] == 'REGULAR']
-        print(f"Retrieved {len(operators_df)} operators (REGULAR only).")
-        print("Fetching documentation for operators...")
-        operators_df = operators_df.copy()
-        def fetch_doc_content(doc_path):
-            if pd.isna(doc_path) or not doc_path:
-                return None
-            url = ace_lib.brain_api_url + doc_path
-            try:
-                r = s.get(url)
-                if r.status_code == 200:
-                    return json.dumps(r.json())
-                return None
-            except Exception:
-                return None
-        operators_df['extra_side_note'] = operators_df['documentation'].apply(fetch_doc_content)
-        operators_df.drop(columns=['documentation', 'level'], inplace=True)
-        print("Operators loaded and processed.")
-    except Exception as e:
-        print(f"Failed to get operators: {e}")
-        return
-    # 4. Dataset Selection
-    print("\n--- Step 4: Select Dataset ---")
-    region = input("Enter Region (default: USA): ").strip() or "USA"
-    delay = input("Enter Delay (default: 1): ").strip() or "1"
-    universe = input("Enter Universe (default: TOP3000): ").strip() or "TOP3000"
-    try:
-        delay = int(delay)
-    except ValueError:
-        print("Invalid delay, using default 1")
-        delay = 1
-    print(f"Fetching datasets for Region={region}, Delay={delay}, Universe={universe}...")
-    try:
-        datasets_df = ace_lib.get_datasets(
-            s,
-            region=region,
-            delay=delay,
-            universe=universe
-        )
-        print(f"Retrieved {len(datasets_df)} datasets.")
-        # print(datasets_df[['id', 'name', 'category', 'subcategory']].head(10))
-        # Print all datasets for user selection
-        pd.set_option('display.max_rows', None)
-        print(datasets_df[['id', 'name', 'category', 'subcategory']])
-    except Exception as e:
-        print(f"Failed to get datasets: {e}")
-        return
-    # 5. Dataset Detail
-    print("\n--- Step 5: Get Dataset Details ---")
-    dataset_id = input("Enter Dataset ID to analyze (e.g., analyst10): ").strip()
-    while not dataset_id:
-        dataset_id = input("Dataset ID is required: ").strip()
-    print(f"Getting datafields for dataset: {dataset_id}...")
-    try:
-        datafields_df = ace_lib.get_datafields(
-            s,
-            region=region,
-            delay=delay,
-            universe=universe,
-            data_type="ALL",
-            dataset_id=dataset_id
-        )
-        print(f"Retrieved {len(datafields_df)} datafields.")
-    except Exception as e:
-        print(f"Failed to get datafields: {e}")
-        return
-    # 6. Generate Alpha Templates
-    print("\n--- Step 6: Generate Alpha Templates ---")
-    # Load System Prompt
-    # Use relative path based on the script location
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    system_prompt_path = os.path.join(script_dir, "what_is_Alpha_template.md")
-    try:
-        with open(system_prompt_path, "r", encoding="utf-8") as f:
-            system_prompt = f.read()
-        print(f"System prompt loaded from {system_prompt_path}")
-    except Exception as e:
-        print(f"System prompt file not found at {system_prompt_path}, using default. Error: {e}")
-        system_prompt = "You are a helpful assistant for generating Alpha templates."
-    response = call_llm_with_retry(
-        llm_client,
-        model_name,
-        system_prompt,
-        operators_df,
-        datafields_df,
-        dataset_id
-    )
-    print("\n=== LLM Response ===")
-    print(response)
-    print("====================")
-if __name__ == "__main__":
-    main()

cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py DELETED Viewed

@@ -1,157 +0,0 @@
-import getpass
-import json
-import os
-import sys
-from typing import List
-import pandas as pd
-# Ensure we can import ace_lib from the project root
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(SCRIPT_DIR)
-if ROOT_DIR not in sys.path:
-    sys.path.append(ROOT_DIR)
-import ace_lib  # noqa: E402
-def prompt_credentials() -> tuple[str, str]:
-    """Prompt user for platform credentials."""
-    email = input("Enter BRAIN Email: ").strip()
-    while not email:
-        email = input("Email is required. Enter BRAIN Email: ").strip()
-    password = getpass.getpass("Enter BRAIN Password: ").strip()
-    while not password:
-        password = getpass.getpass("Password is required. Enter BRAIN Password: ").strip()
-    return email, password
-def fetch_all_combinations(session: ace_lib.SingleSession) -> pd.DataFrame:
-    """Return all valid instrument/region/delay/universe combos from platform settings."""
-    options_df = ace_lib.get_instrument_type_region_delay(session)
-    if options_df is None or options_df.empty:
-        raise RuntimeError("No simulation options fetched; cannot enumerate datasets.")
-    return options_df
-def fetch_datasets_for_combo(
-    session: ace_lib.SingleSession,
-    instrument_type: str,
-    region: str,
-    delay: int,
-    universe: str,
-) -> pd.DataFrame:
-    """Fetch datasets for one combination (theme ALL to include both theme true/false)."""
-    df = ace_lib.get_datasets(
-        session,
-        instrument_type=instrument_type,
-        region=region,
-        delay=delay,
-        universe=universe,
-        theme="ALL",
-    )
-    if df is None:
-        return pd.DataFrame()
-    df = df.copy()
-    df["param_instrument_type"] = instrument_type
-    df["param_region"] = region
-    df["param_delay"] = delay
-    df["param_universe"] = universe
-    df["combo_key"] = df.apply(
-        lambda row: f"{instrument_type}-{region}-D{delay}-{universe}",
-        axis=1,
-    )
-    return df
-def merge_and_deduplicate(datasets: List[pd.DataFrame]) -> pd.DataFrame:
-    """Merge fetched datasets and deduplicate by dataset id, keeping all combo metadata."""
-    combined = pd.concat([df for df in datasets if not df.empty], ignore_index=True)
-    if combined.empty:
-        return combined
-    # Aggregate availability combos per dataset id
-    availability = (
-        combined.groupby("id")["combo_key"]
-        .agg(lambda x: " | ".join(sorted(set(x))))
-        .rename("available_in")
-        .reset_index()
-    )
-    # Drop duplicate rows by dataset id, keep first occurrence of other columns
-    unique_df = combined.drop_duplicates(subset=["id"]).copy()
-    unique_df = unique_df.merge(availability, on="id", how="left")
-    # Sort for readability
-    sort_cols = [col for col in ["category", "subcategory", "id"] if col in unique_df.columns]
-    if sort_cols:
-        # Ensure sort keys are hashable/strings to avoid unhashable dict errors
-        for col in sort_cols:
-            unique_df[col] = unique_df[col].apply(
-                lambda v: v
-                if pd.isna(v) or isinstance(v, (int, float, str, bool))
-                else json.dumps(v, ensure_ascii=False, sort_keys=True)
-            )
-        unique_df = unique_df.sort_values(sort_cols).reset_index(drop=True)
-    return unique_df
-def main():
-    print("=== Fetch All BRAIN Datasets (all regions/universes/delays) ===")
-    email, password = prompt_credentials()
-    # Monkey-patch ace_lib credential retrieval so start_session uses provided credentials
-    ace_lib.get_credentials = lambda: (email, password)
-    print("Logging in...")
-    try:
-        session = ace_lib.start_session()
-        print("Login successful.")
-    except Exception as exc:
-        print(f"Login failed: {exc}")
-        return
-    print("Fetching valid instrument/region/delay/universe combinations from platform settings...")
-    try:
-        options_df = fetch_all_combinations(session)
-    except Exception as exc:
-        print(f"Failed to fetch simulation options: {exc}")
-        return
-    all_datasets: List[pd.DataFrame] = []
-    total_combos = 0
-    for _, row in options_df.iterrows():
-        instrument_type = row.get("InstrumentType")
-        region = row.get("Region")
-        delay = row.get("Delay")
-        universes = row.get("Universe") or []
-        for universe in universes:
-            total_combos += 1
-            print(f"[{total_combos}] Fetching datasets for {instrument_type} / {region} / D{delay} / {universe}...")
-            try:
-                df = fetch_datasets_for_combo(session, instrument_type, region, delay, universe)
-                print(f"  -> Retrieved {len(df)} rows")
-                all_datasets.append(df)
-            except Exception as exc:
-                print(f"  -> Failed for {instrument_type}-{region}-D{delay}-{universe}: {exc}")
-    result_df = merge_and_deduplicate(all_datasets)
-    if result_df.empty:
-        print("No datasets fetched; nothing to save.")
-        return
-    output_path = os.path.join(SCRIPT_DIR, "all_datasets_full.csv")
-    result_df.to_csv(output_path, index=False)
-    print(f"Saved {len(result_df)} unique datasets to {output_path}")
-if __name__ == "__main__":
-    main()

cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py DELETED Viewed

@@ -1,99 +0,0 @@
-import getpass
-import os
-import sys
-from typing import List
-import pandas as pd
-# Make ace_lib importable
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(SCRIPT_DIR)
-if ROOT_DIR not in sys.path:
-    sys.path.append(ROOT_DIR)
-import ace_lib  # noqa: E402
-def prompt_credentials() -> tuple[str, str]:
-    email = input("Enter BRAIN Email: ").strip()
-    while not email:
-        email = input("Email is required. Enter BRAIN Email: ").strip()
-    password = getpass.getpass("Enter BRAIN Password: ").strip()
-    while not password:
-        password = getpass.getpass("Password is required. Enter BRAIN Password: ").strip()
-    return email, password
-def fetch_operators(session: ace_lib.SingleSession) -> pd.DataFrame:
-    df = ace_lib.get_operators(session)
-    if df is None or df.empty:
-        return pd.DataFrame()
-    df = df.copy()
-    # Choose an identifier column robustly
-    id_col = "id" if "id" in df.columns else None
-    if id_col is None:
-        if "name" in df.columns:
-            id_col = "name"
-        else:
-            id_col = "_row_id"
-            df[id_col] = df.index
-    # Re-aggregate scopes so each operator id is unique
-    if "scope" in df.columns:
-        scope_map = (
-            df.groupby(id_col)["scope"]
-            .agg(lambda x: sorted(set([item for item in x if pd.notna(item)])))
-            .rename("scopes")
-            .reset_index()
-        )
-    else:
-        scope_map = pd.DataFrame({id_col: df[id_col].unique(), "scopes": [[] for _ in range(df[id_col].nunique())]})
-    unique_df = df.drop(columns=["scope"], errors="ignore").drop_duplicates(subset=[id_col]).merge(
-        scope_map, on=id_col, how="left"
-    )
-    # Sort for readability
-    sort_cols: List[str] = [col for col in ["category", "subcategory", "name", id_col] if col in unique_df.columns]
-    if sort_cols:
-        unique_df = unique_df.sort_values(sort_cols).reset_index(drop=True)
-    return unique_df
-def main():
-    print("=== Fetch All BRAIN Operators ===")
-    email, password = prompt_credentials()
-    ace_lib.get_credentials = lambda: (email, password)
-    print("Logging in...")
-    try:
-        session = ace_lib.start_session()
-        print("Login successful.")
-    except Exception as exc:
-        print(f"Login failed: {exc}")
-        return
-    print("Fetching operators...")
-    try:
-        operators_df = fetch_operators(session)
-    except Exception as exc:
-        print(f"Failed to fetch operators: {exc}")
-        return
-    if operators_df.empty:
-        print("No operators returned; nothing to save.")
-        return
-    output_path = os.path.join(SCRIPT_DIR, "all_operators.csv")
-    operators_df.to_csv(output_path, index=False)
-    print(f"Saved {len(operators_df)} operators to {output_path}")
-if __name__ == "__main__":
-    main()

cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py DELETED Viewed

@@ -1,180 +0,0 @@
-import json
-import os
-from typing import Union
-import pandas as pd
-from pandas.io.formats.style import Styler
-brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
-brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com")
-def make_clickable_alpha_id(alpha_id: str) -> str:
-    """
-    Create a clickable HTML link for an alpha ID.
-    Args:
-        alpha_id (str): The ID of the alpha.
-    Returns:
-        str: An HTML string containing a clickable link to the alpha's page on the platform.
-    """
-    url = brain_url + "/alpha/"
-    return f'<a href="{url}{alpha_id}">{alpha_id}</a>'
-def prettify_result(
-    result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False
-) -> Union[pd.DataFrame, Styler]:
-    """
-    Combine and format simulation results into a single DataFrame for analysis.
-    Args:
-        result (list): A list of dictionaries containing simulation results.
-        detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False.
-        clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False.
-    Returns:
-        pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results,
-        optionally with clickable alpha IDs.
-    """
-    list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None]
-    is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
-    is_stats_df = is_stats_df.sort_values("fitness", ascending=False)
-    expressions = {
-        result[x]["alpha_id"]: (
-            {
-                "selection": result[x]["simulate_data"]["selection"],
-                "combo": result[x]["simulate_data"]["combo"],
-            }
-            if result[x]["simulate_data"]["type"] == "SUPER"
-            else result[x]["simulate_data"]["regular"]
-        )
-        for x in range(len(result))
-        if result[x]["is_stats"] is not None
-    }
-    expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"])
-    list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
-    is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True)
-    is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"]
-    if detailed_tests_view:
-        cols = ["limit", "result", "value"]
-        is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
-        is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index()
-    else:
-        is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index()
-    alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
-    alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
-    alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()])
-    alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower()
-    if clickable_alpha_id:
-        return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))})
-    return alpha_stats
-def concat_pnl(result: list) -> pd.DataFrame:
-    """
-    Combine PnL results from multiple alphas into a single DataFrame.
-    Args:
-        result (list): A list of dictionaries containing simulation results with PnL data.
-    Returns:
-        pandas.DataFrame: A DataFrame containing combined PnL data for all alphas.
-    """
-    list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None]
-    pnls_df = pd.concat(list_of_pnls).reset_index()
-    return pnls_df
-def concat_is_tests(result: list) -> pd.DataFrame:
-    """
-    Combine in-sample test results from multiple alphas into a single DataFrame.
-    Args:
-        result (list): A list of dictionaries containing simulation results with in-sample test data.
-    Returns:
-        pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas.
-    """
-    is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
-    is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True)
-    return is_tests_df
-def save_simulation_result(result: dict) -> None:
-    """
-    Save the simulation result to a JSON file in the 'simulation_results' folder.
-    Args:
-        result (dict): A dictionary containing the simulation result for an alpha.
-    """
-    alpha_id = result["id"]
-    region = result["settings"]["region"]
-    folder_path = "simulation_results/"
-    file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
-    os.makedirs(folder_path, exist_ok=True)
-    with open(file_path, "w", encoding="utf-8") as file:
-        json.dump(result, file)
-def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None:
-    """
-    Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder.
-    Args:
-        pnl_df (pandas.DataFrame): The DataFrame containing PnL data.
-        alpha_id (str): The ID of the alpha.
-        region (str): The region for which the PnL data was generated.
-    """
-    folder_path = "alphas_pnl/"
-    file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
-    os.makedirs(folder_path, exist_ok=True)
-    pnl_df.to_csv(file_path)
-def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str):
-    """
-    Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder.
-    Args:
-        yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics.
-        alpha_id (str): The ID of the alpha.
-        region (str): The region for which the statistics were generated.
-    """
-    folder_path = "yearly_stats/"
-    file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
-    os.makedirs(folder_path, exist_ok=True)
-    yearly_stats.to_csv(file_path, index=False)
-def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
-    """
-    Expand dictionary columns in a DataFrame into separate columns.
-    Args:
-        data (pandas.DataFrame): The input DataFrame with dictionary columns.
-    Returns:
-        pandas.DataFrame: A new DataFrame with expanded columns.
-    """
-    dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
-    new_columns = pd.concat(
-        [data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
-        axis=1,
-    )
-    data = pd.concat([data, new_columns], axis=1)
-    return data

cnhkmcp 2.1.2__py3-none-any.whl → 2.1.3__py3-none-any.whl

cnhkmcp 2.1.2py3-none-any.whl → 2.1.3py3-none-any.whl