PyPI - realtimex-deeptutor - Versions diffs - 0.5.0.post1__py3-none-any.whl → 0.5.0.post3__py3-none-any.whl - Mend

realtimex-deeptutor 0.5.0.post1py3-none-any.whl → 0.5.0.post3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

{realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/METADATA +24 -17
{realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/RECORD +143 -123
{realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/WHEEL +1 -1
realtimex_deeptutor-0.5.0.post3.dist-info/entry_points.txt +4 -0
{realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/top_level.txt +1 -0
scripts/__init__.py +1 -0
scripts/audit_prompts.py +179 -0
scripts/check_install.py +460 -0
scripts/generate_roster.py +327 -0
scripts/install_all.py +653 -0
scripts/migrate_kb.py +655 -0
scripts/start.py +807 -0
scripts/start_web.py +632 -0
scripts/sync_prompts_from_en.py +147 -0
src/__init__.py +2 -2
src/agents/ideagen/material_organizer_agent.py +2 -0
src/agents/solve/__init__.py +6 -0
src/agents/solve/main_solver.py +9 -0
src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +9 -7
src/agents/solve/session_manager.py +345 -0
src/api/main.py +14 -0
src/api/routers/chat.py +3 -3
src/api/routers/co_writer.py +12 -7
src/api/routers/config.py +1 -0
src/api/routers/guide.py +3 -1
src/api/routers/ideagen.py +7 -0
src/api/routers/knowledge.py +64 -12
src/api/routers/question.py +2 -0
src/api/routers/realtimex.py +137 -0
src/api/routers/research.py +9 -0
src/api/routers/solve.py +120 -2
src/cli/__init__.py +13 -0
src/cli/start.py +209 -0
src/config/constants.py +11 -9
src/knowledge/add_documents.py +453 -213
src/knowledge/extract_numbered_items.py +9 -10
src/knowledge/initializer.py +102 -101
src/knowledge/manager.py +251 -74
src/knowledge/progress_tracker.py +43 -2
src/knowledge/start_kb.py +11 -2
src/logging/__init__.py +5 -0
src/logging/adapters/__init__.py +1 -0
src/logging/adapters/lightrag.py +25 -18
src/logging/adapters/llamaindex.py +1 -0
src/logging/config.py +30 -27
src/logging/handlers/__init__.py +1 -0
src/logging/handlers/console.py +7 -50
src/logging/handlers/file.py +5 -20
src/logging/handlers/websocket.py +23 -19
src/logging/logger.py +161 -126
src/logging/stats/__init__.py +1 -0
src/logging/stats/llm_stats.py +37 -17
src/services/__init__.py +17 -1
src/services/config/__init__.py +1 -0
src/services/config/knowledge_base_config.py +1 -0
src/services/config/loader.py +1 -1
src/services/config/unified_config.py +211 -4
src/services/embedding/__init__.py +1 -0
src/services/embedding/adapters/__init__.py +3 -0
src/services/embedding/adapters/base.py +1 -0
src/services/embedding/adapters/cohere.py +1 -0
src/services/embedding/adapters/jina.py +1 -0
src/services/embedding/adapters/ollama.py +1 -0
src/services/embedding/adapters/openai_compatible.py +1 -0
src/services/embedding/adapters/realtimex.py +125 -0
src/services/embedding/client.py +27 -0
src/services/embedding/config.py +3 -0
src/services/embedding/provider.py +1 -0
src/services/llm/__init__.py +17 -3
src/services/llm/capabilities.py +47 -0
src/services/llm/client.py +32 -0
src/services/llm/cloud_provider.py +21 -4
src/services/llm/config.py +36 -2
src/services/llm/error_mapping.py +1 -0
src/services/llm/exceptions.py +30 -0
src/services/llm/factory.py +55 -16
src/services/llm/local_provider.py +1 -0
src/services/llm/providers/anthropic.py +1 -0
src/services/llm/providers/base_provider.py +1 -0
src/services/llm/providers/open_ai.py +1 -0
src/services/llm/realtimex_provider.py +240 -0
src/services/llm/registry.py +1 -0
src/services/llm/telemetry.py +1 -0
src/services/llm/types.py +1 -0
src/services/llm/utils.py +1 -0
src/services/prompt/__init__.py +1 -0
src/services/prompt/manager.py +3 -2
src/services/rag/__init__.py +27 -5
src/services/rag/components/__init__.py +1 -0
src/services/rag/components/base.py +1 -0
src/services/rag/components/chunkers/__init__.py +1 -0
src/services/rag/components/chunkers/base.py +1 -0
src/services/rag/components/chunkers/fixed.py +1 -0
src/services/rag/components/chunkers/numbered_item.py +1 -0
src/services/rag/components/chunkers/semantic.py +1 -0
src/services/rag/components/embedders/__init__.py +1 -0
src/services/rag/components/embedders/base.py +1 -0
src/services/rag/components/embedders/openai.py +1 -0
src/services/rag/components/indexers/__init__.py +1 -0
src/services/rag/components/indexers/base.py +1 -0
src/services/rag/components/indexers/graph.py +5 -44
src/services/rag/components/indexers/lightrag.py +5 -44
src/services/rag/components/indexers/vector.py +1 -0
src/services/rag/components/parsers/__init__.py +1 -0
src/services/rag/components/parsers/base.py +1 -0
src/services/rag/components/parsers/markdown.py +1 -0
src/services/rag/components/parsers/pdf.py +1 -0
src/services/rag/components/parsers/text.py +1 -0
src/services/rag/components/retrievers/__init__.py +1 -0
src/services/rag/components/retrievers/base.py +1 -0
src/services/rag/components/retrievers/dense.py +1 -0
src/services/rag/components/retrievers/hybrid.py +5 -44
src/services/rag/components/retrievers/lightrag.py +5 -44
src/services/rag/components/routing.py +48 -0
src/services/rag/factory.py +112 -46
src/services/rag/pipeline.py +1 -0
src/services/rag/pipelines/__init__.py +27 -18
src/services/rag/pipelines/lightrag.py +1 -0
src/services/rag/pipelines/llamaindex.py +99 -0
src/services/rag/pipelines/raganything.py +67 -100
src/services/rag/pipelines/raganything_docling.py +368 -0
src/services/rag/service.py +5 -12
src/services/rag/types.py +1 -0
src/services/rag/utils/__init__.py +17 -0
src/services/rag/utils/image_migration.py +279 -0
src/services/search/__init__.py +1 -0
src/services/search/base.py +1 -0
src/services/search/consolidation.py +1 -0
src/services/search/providers/__init__.py +1 -0
src/services/search/providers/baidu.py +1 -0
src/services/search/providers/exa.py +1 -0
src/services/search/providers/jina.py +1 -0
src/services/search/providers/perplexity.py +1 -0
src/services/search/providers/serper.py +1 -0
src/services/search/providers/tavily.py +1 -0
src/services/search/types.py +1 -0
src/services/settings/__init__.py +1 -0
src/services/settings/interface_settings.py +78 -0
src/services/setup/__init__.py +1 -0
src/services/tts/__init__.py +1 -0
src/services/tts/config.py +1 -0
src/utils/realtimex.py +284 -0
realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +0 -2
src/services/rag/pipelines/academic.py +0 -44
{realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/licenses/LICENSE +0 -0

src/services/rag/utils/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+"""
+RAG Utilities
+=============
+Utility modules for RAG operations.
+"""
+from .image_migration import (
+    cleanup_parser_output_dirs,
+    migrate_images_and_update_paths,
+)
+__all__ = [
+    "migrate_images_and_update_paths",
+    "cleanup_parser_output_dirs",
+]

src/services/rag/utils/image_migration.py ADDED Viewed

@@ -0,0 +1,279 @@
+# -*- coding: utf-8 -*-
+"""
+Image Migration Utilities
+=========================
+Utilities for migrating images from parser output directories to the canonical
+knowledge base images directory, and updating content_list paths accordingly.
+This is needed because:
+1. Parsers (MinerU/Docling) output images to nested directories like:
+   content_list/{doc}/auto/images/ or content_list/{doc}/docling/images/
+2. RAG stores these paths in chunks, so if we move files later, retrieval breaks
+3. By migrating images BEFORE RAG indexing, we ensure correct paths are stored
+"""
+import asyncio
+from pathlib import Path
+import shutil
+from typing import Any, Dict, List, Tuple
+from src.logging import get_logger
+logger = get_logger("ImageMigration")
+# Maximum concurrent file operations to avoid overwhelming I/O
+MAX_CONCURRENT_COPIES = 10
+async def migrate_images_and_update_paths(
+    content_list: List[Dict[str, Any]],
+    source_base_dir: Path,
+    target_images_dir: Path,
+    batch_size: int = 50,
+) -> Tuple[List[Dict[str, Any]], int]:
+    """
+    Migrate images from parser output to canonical images directory and update paths.
+    This function:
+    1. Scans content_list for image paths
+    2. Copies images to target_images_dir (with deduplication)
+    3. Updates content_list with new paths
+    4. Returns updated content_list
+    Args:
+        content_list: Parsed content list from MinerU/Docling
+        source_base_dir: Base directory where parser outputs are located
+        target_images_dir: Canonical images directory (e.g., kb/images/)
+        batch_size: Number of images to process in each batch
+    Returns:
+        Tuple of (updated_content_list, num_images_migrated)
+    """
+    # Ensure target directory exists
+    target_images_dir.mkdir(parents=True, exist_ok=True)
+    # Collect all image items that need migration
+    image_items = []
+    for idx, item in enumerate(content_list):
+        if not isinstance(item, dict):
+            continue
+        # Check for image path in various fields
+        img_path = item.get("img_path") or item.get("image_path")
+        if img_path:
+            image_items.append((idx, img_path, "img_path" if "img_path" in item else "image_path"))
+    if not image_items:
+        logger.debug("No images found in content_list, skipping migration")
+        return content_list, 0
+    logger.info(f"Found {len(image_items)} images to migrate")
+    # Process images in batches to handle large quantities
+    migrated_count = 0
+    path_updates = {}  # old_path -> new_path mapping
+    for batch_start in range(0, len(image_items), batch_size):
+        batch = image_items[batch_start : batch_start + batch_size]
+        batch_updates = await _process_image_batch(batch, source_base_dir, target_images_dir)
+        path_updates.update(batch_updates)
+        migrated_count += len([v for v in batch_updates.values() if v])
+        if batch_start + batch_size < len(image_items):
+            logger.info(f"Migrated {batch_start + len(batch)}/{len(image_items)} images...")
+    # Update content_list with new paths
+    updated_content_list = _update_content_list_paths(content_list, path_updates)
+    logger.info(f"Image migration complete: {migrated_count} images migrated")
+    return updated_content_list, migrated_count
+async def _process_image_batch(
+    batch: List[Tuple[int, str, str]],
+    source_base_dir: Path,
+    target_images_dir: Path,
+) -> Dict[str, str]:
+    """
+    Process a batch of images concurrently.
+    Args:
+        batch: List of (index, image_path, field_name) tuples
+        source_base_dir: Base directory for resolving relative paths
+        target_images_dir: Target directory for images
+    Returns:
+        Dict mapping old paths to new paths
+    """
+    semaphore = asyncio.Semaphore(MAX_CONCURRENT_COPIES)
+    async def copy_single_image(idx: int, img_path: str, field_name: str) -> Tuple[str, str]:
+        async with semaphore:
+            return await _migrate_single_image(img_path, source_base_dir, target_images_dir)
+    tasks = [copy_single_image(idx, img_path, field_name) for idx, img_path, field_name in batch]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    path_updates = {}
+    for result in results:
+        if isinstance(result, Exception):
+            logger.warning(f"Error migrating image: {result}")
+            continue
+        old_path, new_path = result
+        if new_path:
+            path_updates[old_path] = new_path
+    return path_updates
+async def _migrate_single_image(
+    img_path: str,
+    source_base_dir: Path,
+    target_images_dir: Path,
+) -> Tuple[str, str]:
+    """
+    Migrate a single image file.
+    Args:
+        img_path: Original image path (may be absolute or relative)
+        source_base_dir: Base directory for resolving relative paths
+        target_images_dir: Target directory for images
+    Returns:
+        Tuple of (original_path, new_path) or (original_path, None) if failed
+    """
+    try:
+        # Resolve the source path
+        source_path = Path(img_path)
+        if not source_path.is_absolute():
+            source_path = source_base_dir / img_path
+        if not source_path.exists():
+            logger.warning(f"Source image not found: {img_path}")
+            return (img_path, None)
+        # Generate target filename (preserve original name)
+        target_filename = source_path.name
+        target_path = target_images_dir / target_filename
+        # Handle filename conflicts by adding suffix
+        if target_path.exists():
+            # Check if it's the same file (by size)
+            if target_path.stat().st_size == source_path.stat().st_size:
+                # Same file already exists, just update path
+                return (img_path, str(target_path))
+            # Different file with same name, add suffix
+            stem = source_path.stem
+            suffix = source_path.suffix
+            counter = 1
+            while target_path.exists():
+                target_filename = f"{stem}_{counter}{suffix}"
+                target_path = target_images_dir / target_filename
+                counter += 1
+        # Copy file using thread pool to avoid blocking
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(None, shutil.copy2, str(source_path), str(target_path))
+        logger.debug(f"Migrated: {source_path.name} -> {target_path}")
+        return (img_path, str(target_path))
+    except Exception as e:
+        logger.error(f"Failed to migrate image {img_path}: {e}")
+        return (img_path, None)
+def _update_content_list_paths(
+    content_list: List[Dict[str, Any]],
+    path_updates: Dict[str, str],
+) -> List[Dict[str, Any]]:
+    """
+    Update image paths in content_list with new paths.
+    Args:
+        content_list: Original content list
+        path_updates: Mapping of old paths to new paths
+    Returns:
+        Updated content list (new list, original is not modified)
+    """
+    updated_list = []
+    for item in content_list:
+        if not isinstance(item, dict):
+            updated_list.append(item)
+            continue
+        # Create a copy of the item
+        updated_item = dict(item)
+        # Update img_path if present
+        if "img_path" in updated_item:
+            old_path = updated_item["img_path"]
+            if old_path in path_updates and path_updates[old_path]:
+                updated_item["img_path"] = path_updates[old_path]
+        # Update image_path if present (alternative field name)
+        if "image_path" in updated_item:
+            old_path = updated_item["image_path"]
+            if old_path in path_updates and path_updates[old_path]:
+                updated_item["image_path"] = path_updates[old_path]
+        updated_list.append(updated_item)
+    return updated_list
+async def cleanup_parser_output_dirs(
+    content_list_dir: Path,
+    parser_subdirs: List[str] = None,
+) -> int:
+    """
+    Clean up parser output directories after successful migration.
+    Only removes the nested parser output directories (auto/, docling/),
+    NOT the content_list JSON files at the root level.
+    Args:
+        content_list_dir: The content_list directory
+        parser_subdirs: List of parser subdirectory names to clean
+    Returns:
+        Number of directories cleaned up
+    """
+    if parser_subdirs is None:
+        parser_subdirs = ["auto", "docling"]
+    cleaned_count = 0
+    for doc_dir in content_list_dir.glob("*"):
+        if not doc_dir.is_dir():
+            continue
+        for parser_subdir in parser_subdirs:
+            subdir = doc_dir / parser_subdir
+            if subdir.exists():
+                try:
+                    # Run in thread pool to avoid blocking
+                    loop = asyncio.get_event_loop()
+                    await loop.run_in_executor(None, shutil.rmtree, str(subdir))
+                    cleaned_count += 1
+                    logger.debug(f"Cleaned up: {subdir}")
+                except Exception as e:
+                    logger.warning(f"Failed to clean up {subdir}: {e}")
+        # Remove the doc_dir if it's now empty
+        try:
+            if doc_dir.exists() and not any(doc_dir.iterdir()):
+                doc_dir.rmdir()
+                logger.debug(f"Removed empty directory: {doc_dir}")
+        except Exception as e:
+            logger.debug(f"Could not remove directory {doc_dir}: {e}")
+    if cleaned_count > 0:
+        logger.info(f"Cleaned up {cleaned_count} parser output directories")
+    return cleaned_count

src/services/search/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Web Search Service - Pluggable search provider architecture

src/services/search/base.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Web Search Base Provider - Abstract base class for all search providers

src/services/search/consolidation.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Answer Consolidation - Generate answers from raw search results

src/services/search/providers/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Web Search Provider Registry

src/services/search/providers/baidu.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Baidu AI Search Provider

src/services/search/providers/exa.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Exa Neural Search Provider

src/services/search/providers/jina.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Jina Reader Search Provider

src/services/search/providers/perplexity.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Perplexity AI Search Provider

src/services/search/providers/serper.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Serper Google SERP Provider

src/services/search/providers/tavily.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Tavily Search Provider

src/services/search/types.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Web Search Types - Shared dataclasses and type definitions

src/services/settings/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """User interface (UI) settings helpers."""

src/services/settings/interface_settings.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""
+Interface (UI) settings reader.
+This is the canonical backend source for user-selected UI language/theme stored in:
+  data/user/settings/interface.json
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+INTERFACE_SETTINGS_FILE = PROJECT_ROOT / "data" / "user" / "settings" / "interface.json"
+DEFAULT_UI_SETTINGS: dict[str, Any] = {
+    "theme": "light",
+    "language": "en",
+}
+def _normalize_language(language: Any, default: str = "en") -> str:
+    """
+    Normalize language codes:
+    - en/english -> en
+    - zh/chinese/cn -> zh
+    """
+    if language is None or language == "":
+        language = default
+    if isinstance(language, str):
+        s = language.lower().strip()
+        if s in {"en", "english"}:
+            return "en"
+        if s in {"zh", "chinese", "cn"}:
+            return "zh"
+    # Fall back to default
+    if isinstance(default, str):
+        return _normalize_language(default, "en")
+    return "en"
+def get_ui_settings() -> dict[str, Any]:
+    """
+    Read UI settings from interface.json with defaults.
+    Returns:
+        dict containing at least: {"theme": "...", "language": "..."}
+    """
+    if INTERFACE_SETTINGS_FILE.exists():
+        try:
+            with open(INTERFACE_SETTINGS_FILE, encoding="utf-8") as f:
+                saved = json.load(f) or {}
+            merged = {**DEFAULT_UI_SETTINGS, **saved}
+            merged["language"] = _normalize_language(
+                merged.get("language"), DEFAULT_UI_SETTINGS["language"]
+            )
+            return merged
+        except Exception:
+            # On any parse error, fall back to defaults (safe)
+            return DEFAULT_UI_SETTINGS.copy()
+    return DEFAULT_UI_SETTINGS.copy()
+def get_ui_language(default: str = "en") -> str:
+    """
+    Get current UI language.
+    Priority:
+    1) interface.json
+    2) provided default
+    3) 'en'
+    """
+    settings = get_ui_settings()
+    return _normalize_language(settings.get("language"), default)

src/services/setup/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Setup Service
 =============

src/services/tts/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 TTS Service
 ===========

src/services/tts/config.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 TTS Configuration
 =================

realtimex-deeptutor 0.5.0.post1__py3-none-any.whl → 0.5.0.post3__py3-none-any.whl

realtimex-deeptutor 0.5.0.post1py3-none-any.whl → 0.5.0.post3py3-none-any.whl