PyPI - realtimex-deeptutor - Versions diffs - 0.5.0.post1__py3-none-any.whl - Mend

realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

realtimex_deeptutor/__init__.py +67 -0
realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
src/__init__.py +40 -0
src/agents/__init__.py +24 -0
src/agents/base_agent.py +657 -0
src/agents/chat/__init__.py +24 -0
src/agents/chat/chat_agent.py +435 -0
src/agents/chat/prompts/en/chat_agent.yaml +35 -0
src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
src/agents/chat/session_manager.py +311 -0
src/agents/co_writer/__init__.py +0 -0
src/agents/co_writer/edit_agent.py +260 -0
src/agents/co_writer/narrator_agent.py +423 -0
src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
src/agents/guide/__init__.py +16 -0
src/agents/guide/agents/__init__.py +11 -0
src/agents/guide/agents/chat_agent.py +104 -0
src/agents/guide/agents/interactive_agent.py +223 -0
src/agents/guide/agents/locate_agent.py +149 -0
src/agents/guide/agents/summary_agent.py +150 -0
src/agents/guide/guide_manager.py +500 -0
src/agents/guide/prompts/en/chat_agent.yaml +41 -0
src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
src/agents/guide/prompts/en/locate_agent.yaml +68 -0
src/agents/guide/prompts/en/summary_agent.yaml +157 -0
src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
src/agents/ideagen/__init__.py +12 -0
src/agents/ideagen/idea_generation_workflow.py +426 -0
src/agents/ideagen/material_organizer_agent.py +173 -0
src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
src/agents/question/__init__.py +24 -0
src/agents/question/agents/__init__.py +18 -0
src/agents/question/agents/generate_agent.py +381 -0
src/agents/question/agents/relevance_analyzer.py +207 -0
src/agents/question/agents/retrieve_agent.py +239 -0
src/agents/question/coordinator.py +718 -0
src/agents/question/example.py +109 -0
src/agents/question/prompts/en/coordinator.yaml +75 -0
src/agents/question/prompts/en/generate_agent.yaml +77 -0
src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
src/agents/question/prompts/zh/coordinator.yaml +75 -0
src/agents/question/prompts/zh/generate_agent.yaml +77 -0
src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
src/agents/research/agents/__init__.py +23 -0
src/agents/research/agents/decompose_agent.py +507 -0
src/agents/research/agents/manager_agent.py +228 -0
src/agents/research/agents/note_agent.py +180 -0
src/agents/research/agents/rephrase_agent.py +263 -0
src/agents/research/agents/reporting_agent.py +1333 -0
src/agents/research/agents/research_agent.py +714 -0
src/agents/research/data_structures.py +451 -0
src/agents/research/main.py +188 -0
src/agents/research/prompts/en/decompose_agent.yaml +89 -0
src/agents/research/prompts/en/manager_agent.yaml +24 -0
src/agents/research/prompts/en/note_agent.yaml +121 -0
src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
src/agents/research/prompts/en/reporting_agent.yaml +380 -0
src/agents/research/prompts/en/research_agent.yaml +173 -0
src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
src/agents/research/prompts/zh/manager_agent.yaml +24 -0
src/agents/research/prompts/zh/note_agent.yaml +121 -0
src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
src/agents/research/prompts/zh/research_agent.yaml +173 -0
src/agents/research/research_pipeline.py +1309 -0
src/agents/research/utils/__init__.py +60 -0
src/agents/research/utils/citation_manager.py +799 -0
src/agents/research/utils/json_utils.py +98 -0
src/agents/research/utils/token_tracker.py +297 -0
src/agents/solve/__init__.py +80 -0
src/agents/solve/analysis_loop/__init__.py +14 -0
src/agents/solve/analysis_loop/investigate_agent.py +414 -0
src/agents/solve/analysis_loop/note_agent.py +190 -0
src/agents/solve/main_solver.py +862 -0
src/agents/solve/memory/__init__.py +34 -0
src/agents/solve/memory/citation_memory.py +353 -0
src/agents/solve/memory/investigate_memory.py +226 -0
src/agents/solve/memory/solve_memory.py +340 -0
src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
src/agents/solve/solve_loop/__init__.py +22 -0
src/agents/solve/solve_loop/citation_manager.py +74 -0
src/agents/solve/solve_loop/manager_agent.py +274 -0
src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
src/agents/solve/solve_loop/response_agent.py +301 -0
src/agents/solve/solve_loop/solve_agent.py +325 -0
src/agents/solve/solve_loop/tool_agent.py +470 -0
src/agents/solve/utils/__init__.py +64 -0
src/agents/solve/utils/config_validator.py +313 -0
src/agents/solve/utils/display_manager.py +223 -0
src/agents/solve/utils/error_handler.py +363 -0
src/agents/solve/utils/json_utils.py +98 -0
src/agents/solve/utils/performance_monitor.py +407 -0
src/agents/solve/utils/token_tracker.py +541 -0
src/api/__init__.py +0 -0
src/api/main.py +240 -0
src/api/routers/__init__.py +1 -0
src/api/routers/agent_config.py +69 -0
src/api/routers/chat.py +296 -0
src/api/routers/co_writer.py +337 -0
src/api/routers/config.py +627 -0
src/api/routers/dashboard.py +18 -0
src/api/routers/guide.py +337 -0
src/api/routers/ideagen.py +436 -0
src/api/routers/knowledge.py +821 -0
src/api/routers/notebook.py +247 -0
src/api/routers/question.py +537 -0
src/api/routers/research.py +394 -0
src/api/routers/settings.py +164 -0
src/api/routers/solve.py +305 -0
src/api/routers/system.py +252 -0
src/api/run_server.py +61 -0
src/api/utils/history.py +172 -0
src/api/utils/log_interceptor.py +21 -0
src/api/utils/notebook_manager.py +415 -0
src/api/utils/progress_broadcaster.py +72 -0
src/api/utils/task_id_manager.py +100 -0
src/config/__init__.py +0 -0
src/config/accessors.py +18 -0
src/config/constants.py +34 -0
src/config/defaults.py +18 -0
src/config/schema.py +38 -0
src/config/settings.py +50 -0
src/core/errors.py +62 -0
src/knowledge/__init__.py +23 -0
src/knowledge/add_documents.py +606 -0
src/knowledge/config.py +65 -0
src/knowledge/example_add_documents.py +236 -0
src/knowledge/extract_numbered_items.py +1039 -0
src/knowledge/initializer.py +621 -0
src/knowledge/kb.py +22 -0
src/knowledge/manager.py +782 -0
src/knowledge/progress_tracker.py +182 -0
src/knowledge/start_kb.py +535 -0
src/logging/__init__.py +103 -0
src/logging/adapters/__init__.py +17 -0
src/logging/adapters/lightrag.py +184 -0
src/logging/adapters/llamaindex.py +141 -0
src/logging/config.py +80 -0
src/logging/handlers/__init__.py +20 -0
src/logging/handlers/console.py +75 -0
src/logging/handlers/file.py +201 -0
src/logging/handlers/websocket.py +127 -0
src/logging/logger.py +709 -0
src/logging/stats/__init__.py +16 -0
src/logging/stats/llm_stats.py +179 -0
src/services/__init__.py +56 -0
src/services/config/__init__.py +61 -0
src/services/config/knowledge_base_config.py +210 -0
src/services/config/loader.py +260 -0
src/services/config/unified_config.py +603 -0
src/services/embedding/__init__.py +45 -0
src/services/embedding/adapters/__init__.py +22 -0
src/services/embedding/adapters/base.py +106 -0
src/services/embedding/adapters/cohere.py +127 -0
src/services/embedding/adapters/jina.py +99 -0
src/services/embedding/adapters/ollama.py +116 -0
src/services/embedding/adapters/openai_compatible.py +96 -0
src/services/embedding/client.py +159 -0
src/services/embedding/config.py +156 -0
src/services/embedding/provider.py +119 -0
src/services/llm/__init__.py +152 -0
src/services/llm/capabilities.py +313 -0
src/services/llm/client.py +302 -0
src/services/llm/cloud_provider.py +530 -0
src/services/llm/config.py +200 -0
src/services/llm/error_mapping.py +103 -0
src/services/llm/exceptions.py +152 -0
src/services/llm/factory.py +450 -0
src/services/llm/local_provider.py +347 -0
src/services/llm/providers/anthropic.py +95 -0
src/services/llm/providers/base_provider.py +93 -0
src/services/llm/providers/open_ai.py +83 -0
src/services/llm/registry.py +71 -0
src/services/llm/telemetry.py +40 -0
src/services/llm/types.py +27 -0
src/services/llm/utils.py +333 -0
src/services/prompt/__init__.py +25 -0
src/services/prompt/manager.py +206 -0
src/services/rag/__init__.py +64 -0
src/services/rag/components/__init__.py +29 -0
src/services/rag/components/base.py +59 -0
src/services/rag/components/chunkers/__init__.py +18 -0
src/services/rag/components/chunkers/base.py +34 -0
src/services/rag/components/chunkers/fixed.py +71 -0
src/services/rag/components/chunkers/numbered_item.py +94 -0
src/services/rag/components/chunkers/semantic.py +97 -0
src/services/rag/components/embedders/__init__.py +14 -0
src/services/rag/components/embedders/base.py +32 -0
src/services/rag/components/embedders/openai.py +63 -0
src/services/rag/components/indexers/__init__.py +18 -0
src/services/rag/components/indexers/base.py +35 -0
src/services/rag/components/indexers/graph.py +172 -0
src/services/rag/components/indexers/lightrag.py +156 -0
src/services/rag/components/indexers/vector.py +146 -0
src/services/rag/components/parsers/__init__.py +18 -0
src/services/rag/components/parsers/base.py +35 -0
src/services/rag/components/parsers/markdown.py +52 -0
src/services/rag/components/parsers/pdf.py +115 -0
src/services/rag/components/parsers/text.py +86 -0
src/services/rag/components/retrievers/__init__.py +18 -0
src/services/rag/components/retrievers/base.py +34 -0
src/services/rag/components/retrievers/dense.py +200 -0
src/services/rag/components/retrievers/hybrid.py +164 -0
src/services/rag/components/retrievers/lightrag.py +169 -0
src/services/rag/components/routing.py +286 -0
src/services/rag/factory.py +234 -0
src/services/rag/pipeline.py +215 -0
src/services/rag/pipelines/__init__.py +32 -0
src/services/rag/pipelines/academic.py +44 -0
src/services/rag/pipelines/lightrag.py +43 -0
src/services/rag/pipelines/llamaindex.py +313 -0
src/services/rag/pipelines/raganything.py +384 -0
src/services/rag/service.py +244 -0
src/services/rag/types.py +73 -0
src/services/search/__init__.py +284 -0
src/services/search/base.py +87 -0
src/services/search/consolidation.py +398 -0
src/services/search/providers/__init__.py +128 -0
src/services/search/providers/baidu.py +188 -0
src/services/search/providers/exa.py +194 -0
src/services/search/providers/jina.py +161 -0
src/services/search/providers/perplexity.py +153 -0
src/services/search/providers/serper.py +209 -0
src/services/search/providers/tavily.py +161 -0
src/services/search/types.py +114 -0
src/services/setup/__init__.py +34 -0
src/services/setup/init.py +285 -0
src/services/tts/__init__.py +16 -0
src/services/tts/config.py +99 -0
src/tools/__init__.py +91 -0
src/tools/code_executor.py +536 -0
src/tools/paper_search_tool.py +171 -0
src/tools/query_item_tool.py +310 -0
src/tools/question/__init__.py +15 -0
src/tools/question/exam_mimic.py +616 -0
src/tools/question/pdf_parser.py +211 -0
src/tools/question/question_extractor.py +397 -0
src/tools/rag_tool.py +173 -0
src/tools/tex_chunker.py +339 -0
src/tools/tex_downloader.py +253 -0
src/tools/web_search.py +71 -0
src/utils/config_manager.py +206 -0
src/utils/document_validator.py +168 -0
src/utils/error_rate_tracker.py +111 -0
src/utils/error_utils.py +82 -0
src/utils/json_parser.py +110 -0
src/utils/network/circuit_breaker.py +79 -0

src/services/search/providers/baidu.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""
+Baidu AI Search Provider
+API: https://qianfan.baidubce.com/v2/ai_search/chat/completions
+Features:
+- AI-powered search with ERNIE models
+- Deep search mode for comprehensive results
+- Corner markers for reference citations
+- Follow-up query suggestions
+- Recency filtering
+"""
+from datetime import datetime
+from typing import Any
+import requests
+from ..base import BaseSearchProvider
+from ..types import Citation, SearchResult, WebSearchResponse
+from . import register_provider
+@register_provider("baidu")
+class BaiduProvider(BaseSearchProvider):
+    """Baidu AI Search provider"""
+    display_name = "Baidu AI"
+    description = "百度AI搜索 with ERNIE models"
+    supports_answer = True
+    BASE_URL = "https://qianfan.baidubce.com/v2/ai_search/chat/completions"
+    def search(
+        self,
+        query: str,
+        model: str = "ernie-4.5-turbo-32k",
+        search_source: str = "baidu_search_v2",
+        enable_deep_search: bool = False,
+        enable_corner_markers: bool = True,
+        enable_followup_queries: bool = False,
+        temperature: float = 0.11,
+        top_p: float = 0.55,
+        search_mode: str = "auto",
+        search_recency_filter: str | None = None,
+        instruction: str = "",
+        timeout: int = 120,
+        **kwargs: Any,
+    ) -> WebSearchResponse:
+        """
+        Perform intelligent search using Baidu AI Search API.
+        Args:
+            query: Search query.
+            model: Model to use for generation (default: ernie-4.5-turbo-32k).
+            search_source: Search engine version (baidu_search_v1 or baidu_search_v2).
+            enable_deep_search: Enable deep search for more comprehensive results.
+            enable_corner_markers: Enable corner markers for reference citations.
+            enable_followup_queries: Enable follow-up query suggestions.
+            temperature: Model sampling temperature (0, 1].
+            top_p: Model sampling top_p (0, 1].
+            search_mode: Search mode (auto, required, disabled).
+            search_recency_filter: Filter by recency (week, month, semiyear, year).
+            instruction: System instruction for response style.
+            timeout: Request timeout in seconds.
+            **kwargs: Additional options.
+        Returns:
+            WebSearchResponse: Standardized search response.
+        """
+        self.logger.debug(f"Calling Baidu API with model={model}, deep_search={enable_deep_search}")
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": (
+                f"Bearer {self.api_key}" if not self.api_key.startswith("Bearer ") else self.api_key
+            ),
+        }
+        payload = {
+            "messages": [{"role": "user", "content": query}],
+            "model": model,
+            "search_source": search_source,
+            "stream": False,
+            "enable_deep_search": enable_deep_search,
+            "enable_corner_markers": enable_corner_markers,
+            "enable_followup_queries": enable_followup_queries,
+            "temperature": temperature,
+            "top_p": top_p,
+            "search_mode": search_mode,
+        }
+        if search_recency_filter:
+            payload["search_recency_filter"] = search_recency_filter
+        if instruction:
+            payload["instruction"] = instruction
+        response = requests.post(self.BASE_URL, headers=headers, json=payload, timeout=timeout)
+        if response.status_code != 200:
+            try:
+                error_data = response.json() if response.text else {}
+            except Exception:
+                error_data = {}
+            raise Exception(
+                f"Baidu AI Search API error: {response.status_code} - "
+                f"{error_data.get('message', response.text)}"
+            )
+        try:
+            data = response.json()
+        except Exception as e:
+            raise Exception(f"Failed to parse Baidu API response: {e}")
+        # Extract answer from response
+        answer = ""
+        finish_reason = ""
+        if data.get("choices"):
+            choice = data["choices"][0]
+            if choice.get("message"):
+                answer = choice["message"].get("content", "")
+            finish_reason = choice.get("finish_reason", "")
+        # Extract usage information
+        usage_info: dict[str, Any] = {}
+        if data.get("usage"):
+            usage = data["usage"]
+            usage_info = {
+                "prompt_tokens": usage.get("prompt_tokens", 0),
+                "completion_tokens": usage.get("completion_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            }
+        # Extract references/citations
+        citations: list[Citation] = []
+        search_results: list[SearchResult] = []
+        if data.get("references"):
+            for i, ref in enumerate(data["references"], 1):
+                citations.append(
+                    Citation(
+                        id=ref.get("id", i),
+                        reference=f"[{ref.get('id', i)}]",
+                        url=ref.get("url", ""),
+                        title=ref.get("title", ""),
+                        snippet=ref.get("content", ""),
+                        date=ref.get("date", ""),
+                        source=ref.get("web_anchor", ""),
+                        type=ref.get("type", "web"),
+                        icon=ref.get("icon", ""),
+                        website=ref.get("website", ""),
+                        web_anchor=ref.get("web_anchor", ""),
+                    )
+                )
+                search_results.append(
+                    SearchResult(
+                        title=ref.get("title", ""),
+                        url=ref.get("url", ""),
+                        snippet=ref.get("content", ""),
+                        date=ref.get("date", ""),
+                        source=ref.get("web_anchor", ""),
+                    )
+                )
+        # Build metadata
+        metadata: dict[str, Any] = {
+            "finish_reason": finish_reason,
+            "is_safe": data.get("is_safe", True),
+            "request_id": data.get("request_id", ""),
+        }
+        # Add follow-up queries if available
+        if data.get("followup_queries"):
+            metadata["followup_queries"] = data["followup_queries"]
+        response_obj = WebSearchResponse(
+            query=query,
+            answer=answer,
+            provider="baidu",
+            timestamp=datetime.now().isoformat(),
+            model=model,
+            citations=citations,
+            search_results=search_results,
+            usage=usage_info,
+            metadata=metadata,
+        )
+        return response_obj

src/services/search/providers/exa.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""
+Exa Neural Search Provider
+API Docs: https://exa.ai/docs/reference/search
+Endpoint: https://api.exa.ai/search
+Features:
+- Embeddings-based neural search (finds semantically similar content)
+- Multiple search types: auto, neural, keyword
+- Category filtering: research paper, news, company, people, github, tweet, pdf
+- Date filtering (published date and crawl date)
+- Domain include/exclude lists
+- Full text extraction with highlights and summaries
+- Cost tracking in response
+Pricing:
+- Neural search (1-25 results): $0.005/request
+- Neural search (26-100 results): $0.025/request
+- Content text/highlight/summary: $0.001/page
+"""
+from datetime import datetime
+from typing import Any
+import requests
+from ..base import BaseSearchProvider
+from ..types import Citation, SearchResult, WebSearchResponse
+from . import register_provider
+@register_provider("exa")
+class ExaProvider(BaseSearchProvider):
+    """Exa neural/embeddings-based search provider"""
+    display_name = "Exa"
+    description = "Neural/embeddings search"
+    supports_answer = True  # Provides summaries and context
+    BASE_URL = "https://api.exa.ai/search"
+    def search(
+        self,
+        query: str,
+        search_type: str = "auto",  # auto, neural, keyword
+        num_results: int = 10,
+        include_text: bool = True,
+        include_highlights: bool = True,
+        include_summary: bool = True,
+        max_characters: int | None = None,
+        category: str | None = None,  # research paper, news, company, etc.
+        include_domains: list[str] | None = None,
+        exclude_domains: list[str] | None = None,
+        start_published_date: str | None = None,  # ISO format
+        end_published_date: str | None = None,
+        timeout: int = 60,
+        **kwargs: Any,
+    ) -> WebSearchResponse:
+        """
+        Perform neural search using Exa API.
+        Args:
+            query: Search query.
+            search_type: Search type - "auto", "neural", or "keyword".
+            num_results: Number of results to return.
+            include_text: Include full text content.
+            include_highlights: Include relevant highlights.
+            include_summary: Include AI-generated summaries.
+            max_characters: Maximum characters per result.
+            category: Filter by category.
+            include_domains: List of domains to include.
+            exclude_domains: List of domains to exclude.
+            start_published_date: Filter by start date (ISO format).
+            end_published_date: Filter by end date (ISO format).
+            timeout: Request timeout in seconds.
+            **kwargs: Additional options.
+        Returns:
+            WebSearchResponse: Standardized search response.
+        """
+        self.logger.debug(f"Calling Exa API type={search_type}, num_results={num_results}")
+        headers = {
+            "Content-Type": "application/json",
+            "x-api-key": self.api_key,
+        }
+        # Build contents configuration
+        contents: dict[str, Any] = {}
+        if include_text:
+            contents["text"] = {"maxCharacters": max_characters} if max_characters else True
+        if include_highlights:
+            contents["highlights"] = True
+        if include_summary:
+            contents["summary"] = True
+        payload: dict[str, Any] = {
+            "query": query,
+            "type": search_type,
+            "numResults": num_results,
+            "contents": contents,
+        }
+        if category:
+            payload["category"] = category
+        if include_domains:
+            payload["includeDomains"] = include_domains
+        if exclude_domains:
+            payload["excludeDomains"] = exclude_domains
+        if start_published_date:
+            payload["startPublishedDate"] = start_published_date
+        if end_published_date:
+            payload["endPublishedDate"] = end_published_date
+        response = requests.post(self.BASE_URL, headers=headers, json=payload, timeout=timeout)
+        if response.status_code != 200:
+            try:
+                error_data = response.json() if response.text else {}
+            except Exception:
+                error_data = {}
+            self.logger.error(f"Exa API error: {response.status_code}")
+            raise Exception(
+                f"Exa API error: {response.status_code} - {error_data.get('error', response.text)}"
+            )
+        try:
+            data = response.json()
+        except Exception as e:
+            raise Exception(f"Failed to parse Exa API response: {e}")
+        self.logger.debug(f"Exa returned {len(data.get('results', []))} results")
+        # Build answer from summaries
+        summaries = []
+        citations: list[Citation] = []
+        search_results: list[SearchResult] = []
+        for i, result in enumerate(data.get("results", []), 1):
+            # Extract summary for answer
+            summary = result.get("summary", "")
+            if summary:
+                summaries.append(f"[{i}] {summary}")
+            # Build search result
+            sr = SearchResult(
+                title=result.get("title", ""),
+                url=result.get("url", ""),
+                snippet=summary or result.get("text", "")[:500],
+                date=result.get("publishedDate", ""),
+                source=result.get("author", ""),
+                content=result.get("text", ""),
+                score=result.get("score", 0.0),
+            )
+            search_results.append(sr)
+            # Build citation
+            citations.append(
+                Citation(
+                    id=i,
+                    reference=f"[{i}]",
+                    url=result.get("url", ""),
+                    title=result.get("title", ""),
+                    snippet=summary or result.get("text", "")[:500],
+                    date=result.get("publishedDate", ""),
+                    source=result.get("author", ""),
+                    content=result.get("text", ""),
+                )
+            )
+        # Combine summaries as answer
+        answer = "\n\n".join(summaries) if summaries else ""
+        # Build metadata
+        metadata: dict[str, Any] = {
+            "finish_reason": "stop",
+            "search_type": search_type,
+            "autoprompt_string": data.get("autopromptString", ""),
+        }
+        # Add cost info if available
+        if data.get("costDollars"):
+            metadata["cost_dollars"] = data["costDollars"]
+        response_obj = WebSearchResponse(
+            query=query,
+            answer=answer,
+            provider="exa",
+            timestamp=datetime.now().isoformat(),
+            model=f"exa-{search_type}",
+            citations=citations,
+            search_results=search_results,
+            usage={},
+            metadata=metadata,
+        )
+        return response_obj

src/services/search/providers/jina.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+Jina Reader Search Provider
+API Docs: https://jina.ai/reader
+Search Endpoint: https://s.jina.ai/{query}
+Reader Endpoint: https://r.jina.ai/{url}
+Features:
+- Web search with SERP results (s.jina.ai)
+- URL to clean content conversion (r.jina.ai)
+- Returns clean, LLM-friendly text
+- Automatic content extraction
+- Image captioning support
+- PDF support
+- Free tier: 10M tokens
+"""
+from datetime import datetime
+from typing import Any
+import urllib.parse
+import requests
+from ..base import BaseSearchProvider
+from ..types import Citation, SearchResult, WebSearchResponse
+from . import register_provider
+@register_provider("jina")
+class JinaProvider(BaseSearchProvider):
+    """Jina Reader search provider"""
+    display_name = "Jina"
+    description = "SERP with content extraction (free tier)"
+    supports_answer = False  # Returns raw content, not LLM answers
+    requires_api_key = False  # Has free tier without API key
+    BASE_URL = "https://s.jina.ai"
+    def search(
+        self,
+        query: str,
+        enrich: bool = True,
+        timeout: int = 60,
+        **kwargs: Any,
+    ) -> WebSearchResponse:
+        """
+        Perform web search using Jina Reader API.
+        Args:
+            query: Search query.
+            enrich: If True, fetch full content + images. If False, basic SERP only.
+            timeout: Request timeout in seconds.
+            **kwargs: Additional options.
+        Returns:
+            WebSearchResponse: Standardized search response.
+        """
+        headers: dict[str, str] = {
+            "Accept": "application/json",
+        }
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        if enrich:
+            # Enriched mode: full content + images
+            headers["X-Engine"] = "direct"
+            headers["X-Timeout"] = str(timeout)
+            headers["X-With-Images-Summary"] = "true"
+        else:
+            # Basic mode: SERP only, no content
+            headers["X-Respond-With"] = "no-content"
+        # URL encode the query
+        encoded_query = urllib.parse.quote(query)
+        url = f"{self.BASE_URL}/{encoded_query}"
+        response = requests.get(url, headers=headers, timeout=timeout)
+        if response.status_code != 200:
+            self.logger.error(f"Jina API error: {response.status_code}")
+            raise Exception(f"Jina API error: {response.status_code} - {response.text}")
+        data = response.json()
+        self.logger.debug(f"Jina returned {len(data.get('data', []))} results")
+        # Extract search results
+        citations: list[Citation] = []
+        search_results: list[SearchResult] = []
+        # Jina Search API returns results in 'data' array
+        # Basic fields: title, url, description, date, content, usage
+        # Enriched fields (enrich=true): images, publishedTime, metadata, external
+        for i, result in enumerate(data.get("data", []), 1):
+            # Build attributes dict for enriched fields
+            attributes: dict[str, Any] = {}
+            if result.get("images"):
+                attributes["images"] = result["images"]
+            if result.get("publishedTime"):
+                attributes["publishedTime"] = result["publishedTime"]
+            if result.get("metadata"):
+                attributes["metadata"] = result["metadata"]
+            if result.get("external"):
+                attributes["external"] = result["external"]
+            sr = SearchResult(
+                title=result.get("title", ""),
+                url=result.get("url", ""),
+                snippet=result.get("description", ""),
+                date=result.get("date", ""),
+                content=result.get("content", ""),
+                attributes=attributes,
+            )
+            search_results.append(sr)
+            citations.append(
+                Citation(
+                    id=i,
+                    reference=f"[{i}]",
+                    url=result.get("url", ""),
+                    title=result.get("title", ""),
+                    snippet=result.get("description", ""),
+                    date=result.get("date", ""),
+                    content=result.get("content", ""),
+                )
+            )
+        # Build metadata
+        metadata: dict[str, Any] = {
+            "finish_reason": "stop",
+            "code": data.get("code", 200),
+            "status": data.get("status", 20000),
+        }
+        # Calculate total tokens - prefer meta.usage.tokens if available
+        total_tokens = 0
+        if data.get("meta", {}).get("usage", {}).get("tokens"):
+            total_tokens = data["meta"]["usage"]["tokens"]
+        else:
+            # Fallback: sum per-result tokens
+            for result in data.get("data", []):
+                if result.get("usage", {}).get("tokens"):
+                    total_tokens += result["usage"]["tokens"]
+        usage: dict[str, Any] = {}
+        if total_tokens > 0:
+            usage["total_tokens"] = total_tokens
+        response_obj = WebSearchResponse(
+            query=query,
+            answer="",  # Jina doesn't provide LLM answers
+            provider="jina",
+            timestamp=datetime.now().isoformat(),
+            model="jina-reader",
+            citations=citations,
+            search_results=search_results,
+            usage=usage,
+            metadata=metadata,
+        )
+        return response_obj

src/services/search/providers/perplexity.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""
+Perplexity AI Search Provider
+API: Uses perplexity Python package
+Model: sonar (default)
+Features:
+- AI-powered search with LLM-generated answers
+- Automatic citation extraction
+- Usage tracking with cost information
+"""
+from datetime import datetime
+from typing import Any
+from ..base import BaseSearchProvider
+from ..types import Citation, SearchResult, WebSearchResponse
+from . import register_provider
+@register_provider("perplexity")
+class PerplexityProvider(BaseSearchProvider):
+    """Perplexity AI search provider"""
+    display_name = "Perplexity"
+    description = "AI-powered search with answers"
+    supports_answer = True
+    BASE_URL = "https://api.perplexity.ai"  # Used by the perplexity package internally
+    def __init__(self, api_key: str | None = None, **kwargs: Any) -> None:
+        super().__init__(api_key, **kwargs)
+        self._client = None
+    @property
+    def client(self):
+        """Lazy-load the Perplexity client."""
+        if self._client is None:
+            try:
+                from perplexity import Perplexity
+            except ImportError as e:
+                raise ImportError(
+                    "perplexityai module is not installed. To use Perplexity search, please install: "
+                    "pip install perplexityai"
+                ) from e
+            self._client = Perplexity(api_key=self.api_key)
+        return self._client
+    def search(
+        self,
+        query: str,
+        model: str = "sonar",
+        system_prompt: str = "You are a helpful AI assistant. Provide detailed and accurate answers based on web search results.",
+        **kwargs: Any,
+    ) -> WebSearchResponse:
+        """
+        Perform search using Perplexity API.
+        Args:
+            query: Search query.
+            model: Model to use (default: sonar).
+            system_prompt: System prompt for the model.
+            **kwargs: Additional options.
+        Returns:
+            WebSearchResponse: Standardized search response.
+        """
+        self.logger.debug(f"Calling Perplexity API with model={model}")
+        completion = self.client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": query},
+            ],
+        )
+        if not completion.choices or len(completion.choices) == 0:
+            raise ValueError("Perplexity API returned no choices")
+        answer = completion.choices[0].message.content
+        # Build usage info with safe attribute access
+        usage_info: dict[str, Any] = {}
+        if hasattr(completion, "usage") and completion.usage is not None:
+            usage = completion.usage
+            usage_info = {
+                "prompt_tokens": getattr(usage, "prompt_tokens", 0),
+                "completion_tokens": getattr(usage, "completion_tokens", 0),
+                "total_tokens": getattr(usage, "total_tokens", 0),
+            }
+            if hasattr(usage, "cost") and usage.cost is not None:
+                cost = usage.cost
+                usage_info["cost"] = {
+                    "total_cost": getattr(cost, "total_cost", 0),
+                    "input_tokens_cost": getattr(cost, "input_tokens_cost", 0),
+                    "output_tokens_cost": getattr(cost, "output_tokens_cost", 0),
+                }
+        # Build search results list
+        search_results: list[SearchResult] = []
+        if hasattr(completion, "search_results") and completion.search_results:
+            for search_item in completion.search_results:
+                search_results.append(
+                    SearchResult(
+                        title=getattr(search_item, "title", "") or "",
+                        url=getattr(search_item, "url", "") or "",
+                        snippet=getattr(search_item, "snippet", "") or "",
+                        date=getattr(search_item, "date", "") or "",
+                        source=str(getattr(search_item, "source", ""))
+                        if getattr(search_item, "source", None)
+                        else "",
+                    )
+                )
+        # Build citations list
+        citations: list[Citation] = []
+        if hasattr(completion, "citations") and completion.citations:
+            for i, citation_url in enumerate(completion.citations, 1):
+                # Try to find matching search result for more info
+                title = ""
+                snippet = ""
+                for sr in search_results:
+                    if sr.url == citation_url:
+                        title = sr.title
+                        snippet = sr.snippet
+                        break
+                citations.append(
+                    Citation(
+                        id=i,
+                        reference=f"[{i}]",
+                        url=citation_url,
+                        title=title,
+                        snippet=snippet,
+                    )
+                )
+        # Ensure answer is a string
+        answer_str = str(answer) if answer else ""
+        response = WebSearchResponse(
+            query=query,
+            answer=answer_str,
+            provider="perplexity",
+            timestamp=datetime.now().isoformat(),
+            model=completion.model,
+            citations=citations,
+            search_results=search_results,
+            usage=usage_info,
+            metadata={
+                "finish_reason": completion.choices[0].finish_reason,
+            },
+        )
+        return response