PyPI - local-deep-research - Versions diffs - 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

local-deep-research 0.1.14py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

local_deep_research/defaults/search_engines.toml CHANGED Viewed

@@ -37,7 +37,7 @@ module_path = "local_deep_research.web_search_engines.engines.search_engine_pubm
 class_name = "PubMedSearchEngine"
 requires_api_key = false
 api_key_env = "NCBI_API_KEY"
-reliability = 0.95
+reliability = 0.98
 strengths = [
     "biomedical literature", "medical research", "clinical studies",
     "life sciences", "health information", "scientific papers"
@@ -191,7 +191,7 @@ module_path = "local_deep_research.web_search_engines.engines.search_engine_sema
 class_name = "SemanticScholarSearchEngine"
 requires_api_key = false
 api_key_env = "S2_API_KEY"
-reliability = 0.95
+reliability = 0.87
 strengths = [
     "comprehensive scientific literature",
     "extensive citation network",

local_deep_research/search_system.py CHANGED Viewed

@@ -28,7 +28,7 @@ class AdvancedSearchSystem:
         # Check if search is available, log warning if not
         if self.search is None:
-            print("WARNING: Search system initialized with no search engine! Research will not be effective.")
+            logger.info("WARNING: Search system initialized with no search engine! Research will not be effective.")
             self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
@@ -101,7 +101,7 @@ class AdvancedSearchSystem:
         self._update_progress("Knowledge compression complete", None)
         response = remove_think_tags(response.content)
         response = str(response) #+ "\n\n" + str(formatted_links)
-        print(response)
         return response
     def analyze_topic(self, query: str) -> Dict:
@@ -165,7 +165,7 @@ class AdvancedSearchSystem:
                         search_results = self.search.run(question)
                 except Exception as e:
                     error_msg = f"Error during search: {str(e)}"
-                    print(f"SEARCH ERROR: {error_msg}")
+                    logger.info(f"SEARCH ERROR: {error_msg}")
                     self._update_progress(error_msg,
                                         int(question_progress_base + 2),
                                         {"phase": "search_error", "error": str(e)})
@@ -190,7 +190,7 @@ class AdvancedSearchSystem:
                 self._update_progress(f"Analyzing results for: {question}",
                                      int(question_progress_base + 5),
                                      {"phase": "analysis"})
-                print("NR OF SOURCES: ", len(self.all_links_of_system))
                 try:
                     result = self.citation_handler.analyze_followup(
@@ -203,7 +203,7 @@ class AdvancedSearchSystem:
                     if links:
                         formatted_links=format_links(links=links)
-                    logger.debug(f"Generated questions: {formatted_links}")
+                    logger.info(f"Generated questions: {formatted_links}")
                     if result is not None:
                         results_with_links = str(result["content"])
                         findings.append(
@@ -219,7 +219,6 @@ class AdvancedSearchSystem:
                         if settings.general.knowledge_accumulation != str(KnowledgeAccumulationApproach.NO_KNOWLEDGE.value):
                             current_knowledge = current_knowledge + "\n\n\n New: \n" + results_with_links
-                        logger.info(settings.general.knowledge_accumulation)
                         if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
                             logger.info("Compressing knowledge")
                             self._update_progress(f"Compress Knowledge for: {question}",
@@ -232,7 +231,7 @@ class AdvancedSearchSystem:
                                             {"phase": "analysis_complete"})
                 except Exception as e:
                     error_msg = f"Error analyzing results: {str(e)}"
-                    print(f"ANALYSIS ERROR: {error_msg}")
+                    logger.info(f"ANALYSIS ERROR: {error_msg}")
                     self._update_progress(error_msg,
                                         int(question_progress_base + 10),
                                         {"phase": "analysis_error", "error": str(e)})
@@ -251,7 +250,7 @@ class AdvancedSearchSystem:
                     logger.info("FINISHED ITERATION - Compressing Knowledge")
                 except Exception as e:
                     error_msg = f"Error compressing knowledge: {str(e)}"
-                    print(f"COMPRESSION ERROR: {error_msg}")
+                    logger.info(f"COMPRESSION ERROR: {error_msg}")
                     self._update_progress(error_msg,
                                         int((iteration / total_iterations) * 100 - 3),
                                         {"phase": "compression_error", "error": str(e)})
@@ -266,7 +265,7 @@ class AdvancedSearchSystem:
                 formatted_findings = self._save_findings(findings, current_knowledge, query)
             except Exception as e:
                 error_msg = f"Error saving findings: {str(e)}"
-                print(f"SAVE ERROR: {error_msg}")
+                logger.info(f"SAVE ERROR: {error_msg}")
                 self._update_progress(error_msg,
                                     int((iteration / total_iterations) * 100),
                                     {"phase": "save_error", "error": str(e)})
@@ -278,7 +277,7 @@ class AdvancedSearchSystem:
             "findings": findings,
             "iterations": iteration,
             "questions": self.questions_by_iteration,
-            "formatted_findings": formatted_findings if 'formatted_findings' in locals() else "Error: Findings not available.",
+            "formatted_findings": formatted_findings,
             "current_knowledge": current_knowledge
         }

local_deep_research/web/app.py CHANGED Viewed

@@ -1001,27 +1001,14 @@ def run_research_process(research_id, query, mode):
         if mode == 'quick':
             # Quick Summary
             if results.get('findings'):
-                #initial_analysis = [finding['content'] for finding in results['findings']]
-                summary = ""
-                # Safer access to formatted_findings with logging
-                print(f"Results keys: {list(results.keys())}")
-                # Check if formatted_findings exists in results
-                if 'formatted_findings' not in results:
-                    logger.info("WARNING: 'formatted_findings' not found in results, using fallback")
-                    # Create fallback formatted findings from available data
-                    raw_formatted_findings = "# Research Findings\n\n"
-                    raw_formatted_findings = raw_formatted_findings + str(results.get('current_knowledge'))
-                    for i, finding in enumerate(results.get('findings', [])):
-                        raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
-                else:
-                    raw_formatted_findings = results['formatted_findings']
-                    logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
+                raw_formatted_findings = results['formatted_findings']
+                logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
                 try:
+                    clean_markdown = raw_formatted_findings
                     # ADDED CODE: Convert debug output to clean markdown
-                    clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
+                    #clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
                     print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
                     # First send a progress update for generating the summary
@@ -1693,10 +1680,7 @@ def convert_debug_to_markdown(raw_text, query):
         lines_after = len(content.split("\n"))
         print(f"Removed {lines_before - lines_after} divider lines")
-        # If COMPLETE RESEARCH OUTPUT exists, remove that section
-        if "COMPLETE RESEARCH OUTPUT" in content:
-            print("Found and removing COMPLETE RESEARCH OUTPUT section")
-            content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
         # Remove SEARCH QUESTIONS BY ITERATION section
         if "SEARCH QUESTIONS BY ITERATION:" in content:

local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py CHANGED Viewed

@@ -64,6 +64,7 @@ class SemanticScholarSearchEngine(BaseSearchEngine):
         """
         # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
         super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
         self.api_key = api_key
         self.year_range = year_range
         self.get_abstracts = get_abstracts
@@ -82,13 +83,7 @@ class SemanticScholarSearchEngine(BaseSearchEngine):
         # Base API URLs
         self.base_url = "https://api.semanticscholar.org/graph/v1"
         self.paper_search_url = f"{self.base_url}/paper/search"
-        self.paper_bulk_search_url = f"{self.base_url}/paper/search/bulk"
-        self.paper_batch_url = f"{self.base_url}/paper/batch"
         self.paper_details_url = f"{self.base_url}/paper"
-        self.author_search_url = f"{self.base_url}/author/search"
-        self.author_details_url = f"{self.base_url}/author"
-        self.recommendations_url = "https://api.semanticscholar.org/recommendations/v1/papers"
-        self.datasets_url = "https://api.semanticscholar.org/datasets/v1"
         # Create a session with retry capabilities
         self.session = self._create_session()
@@ -133,15 +128,6 @@ class SemanticScholarSearchEngine(BaseSearchEngine):
         self.last_request_time = time.time()
-    def _get_headers(self) -> Dict[str, str]:
-        """Get the headers for API requests"""
-        headers = {"Accept": "application/json"}
-        if self.api_key:
-            headers["x-api-key"] = self.api_key
-        return headers
     def _make_request(self, url: str, params: Optional[Dict] = None, data: Optional[Dict] = None,
                      method: str = "GET") -> Dict:
         """
@@ -226,16 +212,16 @@ Return ONLY the optimized search query with no explanation.
                 return query
             logger.info(f"Original query: '{query}'")
-            logger.info(f"Optimized for Semantic Scholar: '{optimized_query}'")
+            logger.info(f"Optimized for search: '{optimized_query}'")
             return optimized_query
         except Exception as e:
             logger.error(f"Error optimizing query: {e}")
             return query  # Fall back to original query on error
-    def _search_papers(self, query: str) -> List[Dict[str, Any]]:
+    def _direct_search(self, query: str) -> List[Dict[str, Any]]:
         """
-        Search for papers matching the query.
+        Make a direct search request to the Semantic Scholar API.
         Args:
             query: The search query
@@ -244,6 +230,7 @@ Return ONLY the optimized search query with no explanation.
             List of paper dictionaries
         """
         try:
+            # Configure fields to retrieve
             fields = [
                 "paperId",
                 "externalIds",
@@ -260,7 +247,7 @@ Return ONLY the optimized search query with no explanation.
             params = {
                 "query": query,
-                "limit": min(self.max_results, 100),  # Regular search API can return up to 100 results
+                "limit": min(self.max_results, 100),  # API limit is 100 per request
                 "fields": ",".join(fields)
             }
@@ -281,147 +268,29 @@ Return ONLY the optimized search query with no explanation.
             if "data" in response:
                 papers = response["data"]
-                logger.info(f"Found {len(papers)} papers matching query: '{query}'")
+                logger.info(f"Found {len(papers)} papers with direct search for query: '{query}'")
                 return papers
             else:
-                logger.warning(f"No data in response for query: '{query}'")
+                logger.warning(f"No data in response for direct search query: '{query}'")
                 return []
         except Exception as e:
-            logger.error(f"Error searching papers: {e}")
-            return []
-    def _search_papers_bulk(self, query: str, limit: int = 1000) -> List[Dict[str, Any]]:
-        """
-        Search for papers using the bulk search API, which can return up to 1000 papers.
-        Args:
-            query: The search query
-            limit: Maximum number of results (up to 1000)
-        Returns:
-            List of paper dictionaries
-        """
-        try:
-            fields = [
-                "paperId",
-                "externalIds",
-                "url",
-                "title",
-                "abstract",
-                "venue",
-                "year",
-                "authors",
-                "fieldsOfStudy"
-            ]
-            if self.get_tldr:
-                fields.append("tldr")
-            params = {
-                "query": query,
-                "limit": min(limit, 1000),  # Bulk search API can return up to 1000 results
-                "fields": ",".join(fields)
-            }
-            # Add year filter if specified
-            if self.year_range:
-                start_year, end_year = self.year_range
-                params["year"] = f"{start_year}-{end_year}"
-            # Add fields of study filter if specified
-            if self.fields_of_study:
-                params["fieldsOfStudy"] = ",".join(self.fields_of_study)
-            # Add publication types filter if specified
-            if self.publication_types:
-                params["publicationTypes"] = ",".join(self.publication_types)
-            response = self._make_request(self.paper_bulk_search_url, params)
-            if "data" in response:
-                papers = response["data"]
-                logger.info(f"Found {len(papers)} papers using bulk search for query: '{query}'")
-                total_count = response.get("total", 0)
-                logger.info(f"Total available results: {total_count}")
-                # Handle continuation token for pagination if needed
-                if "token" in response and len(papers) < min(total_count, limit):
-                    token = response["token"]
-                    logger.info(f"Continuation token available: {token}")
-                    # The caller would need to handle continuation tokens for pagination
-                return papers
-            else:
-                logger.warning(f"No data in response for bulk query: '{query}'")
-                return []
-        except Exception as e:
-            logger.error(f"Error in bulk paper search: {e}")
+            logger.error(f"Error in direct search: {e}")
             return []
-    def _get_paper_details(self, paper_id: str) -> Dict[str, Any]:
-        """
-        Get detailed information about a specific paper.
-        Args:
-            paper_id: Semantic Scholar Paper ID
-        Returns:
-            Dictionary with paper details
-        """
-        try:
-            # Construct fields parameter
-            fields = [
-                "paperId",
-                "externalIds",
-                "corpusId",
-                "url",
-                "title",
-                "abstract",
-                "venue",
-                "year",
-                "authors",
-                "fieldsOfStudy"
-            ]
-            if self.get_tldr:
-                fields.append("tldr")
-            if self.get_embeddings:
-                fields.append("embedding")
-            # Add citation and reference fields if requested
-            if self.get_citations:
-                fields.append(f"citations.limit({self.citation_limit})")
-            if self.get_references:
-                fields.append(f"references.limit({self.reference_limit})")
-            # Make the request
-            url = f"{self.paper_details_url}/{paper_id}"
-            params = {"fields": ",".join(fields)}
-            return self._make_request(url, params)
-        except Exception as e:
-            logger.error(f"Error getting paper details for {paper_id}: {e}")
-            return {}
     def _adaptive_search(self, query: str) -> Tuple[List[Dict[str, Any]], str]:
         """
         Perform an adaptive search that adjusts based on result volume.
         Uses LLM to generate better fallback queries when available.
         Args:
-            query: The search query (already optimized)
+            query: The search query
         Returns:
             Tuple of (list of paper results, search strategy used)
         """
         # Start with a standard search
-        papers = self._search_papers(query)
+        papers = self._direct_search(query)
         strategy = "standard"
         # If no results, try different variations
@@ -430,7 +299,7 @@ Return ONLY the optimized search query with no explanation.
             if '"' in query:
                 unquoted_query = query.replace('"', '')
                 logger.info(f"No results with quoted terms, trying without quotes: {unquoted_query}")
-                papers = self._search_papers(unquoted_query)
+                papers = self._direct_search(unquoted_query)
                 if papers:
                     strategy = "unquoted"
@@ -440,21 +309,20 @@ Return ONLY the optimized search query with no explanation.
             if self.llm:
                 try:
                     # Generate alternate search queries focusing on core concepts
-                    prompt = f"""You are helping refine a search query for academic papers related to cancer research that returned no results.
+                    prompt = f"""You are helping refine a search query that returned no results.
-    Original query: "{query}"
+Original query: "{query}"
-    The query might be too specific, contain future dates, or use natural language phrasing that doesn't match academic paper keywords.
+The query might be too specific or use natural language phrasing that doesn't match academic paper keywords.
-    Please provide THREE alternative search queries that:
-    1. Focus on the core academic concepts about cancer treatment, research, or therapies
-    2. Remove future dates or references to "latest" or "current" (replace with terms like "recent" or "novel")
-    3. Use precise medical/scientific terminology commonly found in academic papers
-    4. Break down complex queries into more searchable components
-    5. Format each as a concise keyword-focused search term (not a natural language question)
+Please provide THREE alternative search queries that:
+1. Focus on the core academic concepts
+2. Use precise terminology commonly found in academic papers
+3. Break down complex queries into more searchable components
+4. Format each as a concise keyword-focused search term (not a natural language question)
-    Format each query on a new line with no numbering or explanation. Keep each query under 8 words and very focused.
-    """
+Format each query on a new line with no numbering or explanation. Keep each query under 8 words and very focused.
+"""
                     # Get the LLM's response
                     response = self.llm.invoke(prompt)
@@ -469,7 +337,7 @@ Return ONLY the optimized search query with no explanation.
                     # Try each alternative query
                     for alt_query in alt_queries[:3]:  # Limit to first 3 alternatives
                         logger.info(f"Trying LLM-suggested query: {alt_query}")
-                        alt_papers = self._search_papers(alt_query)
+                        alt_papers = self._direct_search(alt_query)
                         if alt_papers:
                             logger.info(f"Found {len(alt_papers)} papers using LLM-suggested query: {alt_query}")
@@ -479,53 +347,80 @@ Return ONLY the optimized search query with no explanation.
                     logger.error(f"Error using LLM for query refinement: {e}")
                     # Fall through to simpler strategies
-            # Fallback 1: Try extracting important cancer-related terms
-            cancer_terms = ["cancer", "tumor", "oncology", "carcinoma", "sarcoma", "leukemia",
-                        "lymphoma", "metastasis", "therapy", "immunotherapy", "targeted",
-                        "treatment", "drug", "clinical", "trial", "biomarker"]
-            words = re.findall(r'\b\w+\b', query.lower())
-            important_terms = [word for word in words if word in cancer_terms or len(word) > 7]
-            if important_terms:
-                important_query = ' '.join(important_terms[:5])  # Limit to 5 terms
-                logger.info(f"Trying with important cancer terms: {important_query}")
-                papers = self._search_papers(important_query)
+            # Fallback: Try with the longest words (likely specific terms)
+            words = re.findall(r'\w+', query)
+            longer_words = [word for word in words if len(word) > 6]
+            if longer_words:
+                # Use up to 3 of the longest words
+                longer_words = sorted(longer_words, key=len, reverse=True)[:3]
+                key_terms_query = ' '.join(longer_words)
+                logger.info(f"Trying with key terms: {key_terms_query}")
+                papers = self._direct_search(key_terms_query)
                 if papers:
-                    strategy = "cancer_terms"
+                    strategy = "key_terms"
                     return papers, strategy
+            # Final fallback: Try with just the longest word
+            if words:
+                longest_word = max(words, key=len)
+                if len(longest_word) > 5:  # Only use if it's reasonably long
+                    logger.info(f"Trying with single key term: {longest_word}")
+                    papers = self._direct_search(longest_word)
-            # Fallback 2: Try with just specific cancer types or treatment modalities
-            cancer_types = ["breast", "lung", "colorectal", "prostate", "melanoma", "lymphoma",
-                        "leukemia", "myeloma", "sarcoma", "glioblastoma"]
-            treatment_types = ["immunotherapy", "chemotherapy", "radiotherapy", "targeted",
-                            "surgery", "vaccine", "antibody", "CAR-T", "inhibitor"]
+                    if papers:
+                        strategy = "single_term"
+                        return papers, strategy
+        return papers, strategy
+    def _get_paper_details(self, paper_id: str) -> Dict[str, Any]:
+        """
+        Get detailed information about a specific paper.
+        Args:
+            paper_id: Semantic Scholar Paper ID
-            cancer_matches = [word for word in words if word in cancer_types]
-            treatment_matches = [word for word in words if word in treatment_types]
+        Returns:
+            Dictionary with paper details
+        """
+        try:
+            # Construct fields parameter
+            fields = [
+                "paperId",
+                "externalIds",
+                "corpusId",
+                "url",
+                "title",
+                "abstract",
+                "venue",
+                "year",
+                "authors",
+                "fieldsOfStudy"
+            ]
-            if cancer_matches and treatment_matches:
-                specific_query = f"{cancer_matches[0]} {treatment_matches[0]}"
-                logger.info(f"Trying with specific cancer-treatment pair: {specific_query}")
-                papers = self._search_papers(specific_query)
+            if self.get_tldr:
+                fields.append("tldr")
-                if papers:
-                    strategy = "specific_pair"
-                    return papers, strategy
-            # Fallback 3: Extract the longest word (likely a specific term)
-            longest_word = max(re.findall(r'\w+', query), key=len, default='')
-            if len(longest_word) > 6:
-                logger.info(f"Trying with primary keyword: {longest_word}")
-                papers = self._search_papers(longest_word)
+            if self.get_embeddings:
+                fields.append("embedding")
-                if papers:
-                    strategy = "primary_keyword"
-                    return papers, strategy
-        return papers, strategy
+            # Add citation and reference fields if requested
+            if self.get_citations:
+                fields.append(f"citations.limit({self.citation_limit})")
+            if self.get_references:
+                fields.append(f"references.limit({self.reference_limit})")
+            # Make the request
+            url = f"{self.paper_details_url}/{paper_id}"
+            params = {"fields": ",".join(fields)}
+            return self._make_request(url, params)
+        except Exception as e:
+            logger.error(f"Error getting paper details for {paper_id}: {e}")
+            return {}
     def _get_previews(self, query: str) -> List[Dict[str, Any]]:
         """
@@ -542,11 +437,11 @@ Return ONLY the optimized search query with no explanation.
         # Optimize the query if LLM is available
         optimized_query = self._optimize_query(query)
-        # Perform adaptive search
+        # Use the adaptive search approach
         papers, strategy = self._adaptive_search(optimized_query)
         if not papers:
-            logger.warning(f"No Semantic Scholar results found using strategy: {strategy}")
+            logger.warning(f"No Semantic Scholar results found")
             return []
         # Format as previews
@@ -583,10 +478,10 @@ Return ONLY the optimized search query with no explanation.
                     "id": paper_id if paper_id else "",
                     "title": title if title else "",
                     "link": url if url else "",
-                    "snippet": snippet,  # Already handled above
-                    "authors": authors,  # List of strings, safe to use directly
+                    "snippet": snippet,
+                    "authors": authors,
                     "venue": venue if venue else "",
-                    "year": year,  # Can be None, handled in downstream processing
+                    "year": year,
                     "external_ids": external_ids if external_ids else {},
                     "source": "Semantic Scholar",
                     "_paper_id": paper_id if paper_id else "",
@@ -602,6 +497,13 @@ Return ONLY the optimized search query with no explanation.
                 logger.error(f"Error processing paper preview: {e}")
                 # Continue with the next paper
+        # Sort by year (newer first) if available
+        previews = sorted(
+            previews,
+            key=lambda p: p.get("year", 0) if p.get("year") is not None else 0,
+            reverse=True
+        )
         logger.info(f"Found {len(previews)} Semantic Scholar previews using strategy: {strategy}")
         return previews
@@ -664,465 +566,4 @@ Return ONLY the optimized search query with no explanation.
             results.append(result)
-        return results
-    def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
-        """
-        Search for papers by a specific author.
-        Args:
-            author_name: Name of the author
-            max_results: Maximum number of results (defaults to self.max_results)
-        Returns:
-            List of papers by the author
-        """
-        original_max_results = self.max_results
-        try:
-            if max_results:
-                self.max_results = max_results
-            # First search for the author
-            params = {
-                "query": author_name,
-                "limit": 5  # Limit to top 5 author matches
-            }
-            response = self._make_request(self.author_search_url, params)
-            if "data" not in response or not response["data"]:
-                logger.warning(f"No authors found matching: {author_name}")
-                return []
-            # Use the first (best) author match
-            author = response["data"][0]
-            author_id = author.get("authorId")
-            if not author_id:
-                logger.warning(f"No valid author ID found for: {author_name}")
-                return []
-            # Get the author's papers
-            fields = [
-                "papers.paperId",
-                "papers.title",
-                "papers.abstract",
-                "papers.venue",
-                "papers.year",
-                "papers.authors"
-            ]
-            if self.get_tldr:
-                fields.append("papers.tldr")
-            url = f"{self.author_details_url}/{author_id}"
-            author_params = {
-                "fields": ",".join(fields)
-            }
-            author_data = self._make_request(url, author_params)
-            if "papers" not in author_data or not author_data["papers"]:
-                logger.warning(f"No papers found for author: {author_name}")
-                return []
-            # Format as paper results
-            papers = author_data["papers"][:self.max_results]
-            # Convert to standard results format
-            results = []
-            for paper in papers:
-                # Format authors
-                authors = []
-                if "authors" in paper and paper["authors"]:
-                    authors = [author.get("name", "") for author in paper["authors"]]
-                result = {
-                    "id": paper.get("paperId", ""),
-                    "title": paper.get("title", ""),
-                    "link": f"https://www.semanticscholar.org/paper/{paper.get('paperId', '')}",
-                    "snippet": paper.get("abstract", "")[:250] + "..." if paper.get("abstract", "") and len(paper.get("abstract", "")) > 250 else paper.get("abstract", ""),
-                    "authors": authors,
-                    "venue": paper.get("venue", ""),
-                    "year": paper.get("year"),
-                    "source": "Semantic Scholar",
-                    # Include TLDR if available
-                    "tldr": paper.get("tldr", {}).get("text", "") if paper.get("tldr") else ""
-                }
-                results.append(result)
-            # Add citations and references if needed
-            if self.get_citations or self.get_references:
-                results = self._get_full_content(results)
-            return results
-        finally:
-            # Restore original value
-            self.max_results = original_max_results
-    def search_by_venue(self, venue_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
-        """
-        Search for papers in a specific venue.
-        Args:
-            venue_name: Name of the venue (conference or journal)
-            max_results: Maximum number of results (defaults to self.max_results)
-        Returns:
-            List of papers from the venue
-        """
-        original_max_results = self.max_results
-        try:
-            if max_results:
-                self.max_results = max_results
-            # Semantic Scholar doesn't have a dedicated venue search API
-            # So we search for papers with the venue in the query
-            query = f'venue:"{venue_name}"'
-            return self.run(query)
-        finally:
-            # Restore original value
-            self.max_results = original_max_results
-    def search_by_year(self, query: str, year: int, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
-        """
-        Search for papers from a specific year matching the query.
-        Args:
-            query: The search query
-            year: Publication year
-            max_results: Maximum number of results (defaults to self.max_results)
-        Returns:
-            List of papers from the specified year matching the query
-        """
-        original_max_results = self.max_results
-        original_year_range = self.year_range
-        try:
-            if max_results:
-                self.max_results = max_results
-            # Set year range for this search
-            self.year_range = (year, year)
-            return self.run(query)
-        finally:
-            # Restore original values
-            self.max_results = original_max_results
-            self.year_range = original_year_range
-    def search_by_field(self, query: str, field_of_study: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
-        """
-        Search for papers in a specific field of study.
-        Args:
-            query: The search query
-            field_of_study: Field of study (e.g., "Computer Science", "Medicine")
-            max_results: Maximum number of results (defaults to self.max_results)
-        Returns:
-            List of papers in the specified field matching the query
-        """
-        original_max_results = self.max_results
-        try:
-            if max_results:
-                self.max_results = max_results
-            # Add field of study to query
-            field_query = f'{query} fieldofstudy:"{field_of_study}"'
-            return self.run(field_query)
-        finally:
-            # Restore original value
-            self.max_results = original_max_results
-    def get_paper_by_id(self, paper_id: str) -> Dict[str, Any]:
-        """
-        Get a specific paper by its Semantic Scholar ID.
-        Args:
-            paper_id: Semantic Scholar paper ID
-        Returns:
-            Dictionary with paper information
-        """
-        paper_details = self._get_paper_details(paper_id)
-        if not paper_details:
-            return {}
-        # Format authors
-        authors = []
-        if "authors" in paper_details and paper_details["authors"]:
-            authors = [author.get("name", "") for author in paper_details["authors"]]
-        # Create formatted result
-        result = {
-            "id": paper_details.get("paperId", ""),
-            "title": paper_details.get("title", ""),
-            "link": paper_details.get("url", ""),
-            "abstract": paper_details.get("abstract", ""),
-            "authors": authors,
-            "venue": paper_details.get("venue", ""),
-            "year": paper_details.get("year"),
-            "fields_of_study": paper_details.get("fieldsOfStudy", []),
-            "external_ids": paper_details.get("externalIds", {}),
-            "source": "Semantic Scholar",
-            # Include TLDR if available
-            "tldr": paper_details.get("tldr", {}).get("text", "") if paper_details.get("tldr") else ""
-        }
-        # Add citations and references if requested
-        if self.get_citations and "citations" in paper_details:
-            result["citations"] = paper_details["citations"]
-        if self.get_references and "references" in paper_details:
-            result["references"] = paper_details["references"]
-        # Add embedding if requested
-        if self.get_embeddings and "embedding" in paper_details:
-            result["embedding"] = paper_details["embedding"]
-        return result
-    def get_paper_by_doi(self, doi: str) -> Dict[str, Any]:
-        """
-        Get a paper by its DOI.
-        Args:
-            doi: Digital Object Identifier
-        Returns:
-            Dictionary with paper information
-        """
-        try:
-            # The Semantic Scholar API supports DOI lookup
-            url = f"{self.paper_details_url}/DOI:{doi}"
-            fields = [
-                "paperId",
-                "externalIds",
-                "url",
-                "title",
-                "abstract",
-                "venue",
-                "year",
-                "authors",
-                "fieldsOfStudy"
-            ]
-            if self.get_tldr:
-                fields.append("tldr")
-            if self.get_embeddings:
-                fields.append("embedding")
-            # Add citation and reference fields if requested
-            if self.get_citations:
-                fields.append(f"citations.limit({self.citation_limit})")
-            if self.get_references:
-                fields.append(f"references.limit({self.reference_limit})")
-            params = {"fields": ",".join(fields)}
-            paper_details = self._make_request(url, params)
-            if not paper_details:
-                return {}
-            # Format the paper info the same way as get_paper_by_id
-            # Format authors
-            authors = []
-            if "authors" in paper_details and paper_details["authors"]:
-                authors = [author.get("name", "") for author in paper_details["authors"]]
-            # Create formatted result
-            result = {
-                "id": paper_details.get("paperId", ""),
-                "title": paper_details.get("title", ""),
-                "link": paper_details.get("url", ""),
-                "abstract": paper_details.get("abstract", ""),
-                "authors": authors,
-                "venue": paper_details.get("venue", ""),
-                "year": paper_details.get("year"),
-                "fields_of_study": paper_details.get("fieldsOfStudy", []),
-                "external_ids": paper_details.get("externalIds", {}),
-                "source": "Semantic Scholar",
-                # Include TLDR if available
-                "tldr": paper_details.get("tldr", {}).get("text", "") if paper_details.get("tldr") else ""
-            }
-            # Add citations and references if requested
-            if self.get_citations and "citations" in paper_details:
-                result["citations"] = paper_details["citations"]
-            if self.get_references and "references" in paper_details:
-                result["references"] = paper_details["references"]
-            # Add embedding if requested
-            if self.get_embeddings and "embedding" in paper_details:
-                result["embedding"] = paper_details["embedding"]
-            return result
-        except Exception as e:
-            logger.error(f"Error getting paper by DOI {doi}: {e}")
-            return {}
-    def get_papers_batch(self, paper_ids: List[str], fields: Optional[List[str]] = None) -> List[Dict[str, Any]]:
-        """
-        Get details for multiple papers in a single batch request.
-        Args:
-            paper_ids: List of paper IDs (Semantic Scholar IDs, DOIs, arXiv IDs, etc.)
-            fields: Fields to include in the response
-        Returns:
-            List of paper details
-        """
-        if not paper_ids:
-            return []
-        if fields is None:
-            fields = [
-                "paperId",
-                "externalIds",
-                "url",
-                "title",
-                "abstract",
-                "venue",
-                "year",
-                "authors",
-                "referenceCount",
-                "citationCount"
-            ]
-            if self.get_tldr:
-                fields.append("tldr")
-        try:
-            # Construct request params
-            params = {
-                "fields": ",".join(fields)
-            }
-            # Make POST request with paper IDs in the body
-            response = self._make_request(
-                self.paper_batch_url,
-                params=params,
-                data={"ids": paper_ids},
-                method="POST"
-            )
-            if isinstance(response, list):
-                return response
-            else:
-                logger.warning("Unexpected response format from batch API")
-                return []
-        except Exception as e:
-            logger.error(f"Error in batch paper lookup: {e}")
-            return []
-    def get_paper_recommendations(self,
-                                 positive_paper_ids: List[str],
-                                 negative_paper_ids: Optional[List[str]] = None,
-                                 max_results: Optional[int] = None) -> List[Dict[str, Any]]:
-        """
-        Get recommended papers based on positive and negative examples.
-        Args:
-            positive_paper_ids: List of paper IDs to use as positive examples
-            negative_paper_ids: Optional list of paper IDs to use as negative examples
-            max_results: Maximum number of recommendations to return
-        Returns:
-            List of recommended papers
-        """
-        if not positive_paper_ids:
-            return []
-        limit = max_results or self.max_results
-        try:
-            # Construct the request payload
-            payload = {
-                "positivePaperIds": positive_paper_ids
-            }
-            if negative_paper_ids:
-                payload["negativePaperIds"] = negative_paper_ids
-            # Define fields to include in the response
-            fields = [
-                "paperId",
-                "externalIds",
-                "url",
-                "title",
-                "abstract",
-                "venue",
-                "year",
-                "authors"
-            ]
-            if self.get_tldr:
-                fields.append("tldr")
-            # Request parameters
-            params = {
-                "fields": ",".join(fields),
-                "limit": limit
-            }
-            # Make POST request to recommendations endpoint
-            response = self._make_request(
-                self.recommendations_url,
-                params=params,
-                data=payload,
-                method="POST"
-            )
-            if "recommendedPapers" not in response:
-                return []
-            papers = response["recommendedPapers"]
-            # Format as standard results
-            results = []
-            for paper in papers:
-                # Format authors
-                authors = []
-                if "authors" in paper and paper["authors"]:
-                    authors = [author.get("name", "") for author in paper["authors"]]
-                result = {
-                    "id": paper.get("paperId", ""),
-                    "title": paper.get("title", ""),
-                    "link": paper.get("url", ""),
-                    "snippet": paper.get("abstract", "")[:250] + "..." if paper.get("abstract", "") and len(paper.get("abstract", "")) > 250 else paper.get("abstract", ""),
-                    "authors": authors,
-                    "venue": paper.get("venue", ""),
-                    "year": paper.get("year"),
-                    "source": "Semantic Scholar",
-                    # Include TLDR if available
-                    "tldr": paper.get("tldr", {}).get("text", "") if paper.get("tldr") else ""
-                }
-                results.append(result)
-            return results
-        except Exception as e:
-            logger.error(f"Error getting paper recommendations: {e}")
-            return []
+        return results

local_deep_research/web_search_engines/search_engine_base.py CHANGED Viewed

@@ -65,13 +65,8 @@ class BaseSearchEngine(ABC):
         filtered_items = self._filter_for_relevance(previews, query)
         if not filtered_items:
             logger.info(f"All preview results were filtered out as irrelevant for query: {query}")
-            # Fall back to preview items if everything was filtered
-            # Access config inside the method to avoid circular import
-            from local_deep_research import config
-            if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
-                return previews[:self.max_filtered_results or 5]  # Return unfiltered but limited results
-            else:
-                filtered_items = previews[:self.max_filtered_results or 5]
+            # Do not fall back to previews, return empty list instead
+            return []
         # Step 3: Get full content for filtered items
         # Import config inside the method to avoid circular import
@@ -166,17 +161,13 @@ Respond with ONLY the JSON array, no other text."""
                 return ranked_results
             else:
-                logger.info("Could not find JSON array in response, returning all previews")
-                if self.max_filtered_results and len(previews) > self.max_filtered_results:
-                    return previews[:self.max_filtered_results]
-                return previews
+                logger.info("Could not find JSON array in response, returning no previews")
+                return []
         except Exception as e:
             logger.info(f"Relevance filtering error: {e}")
             # Fall back to returning all previews (or top N) on error
-            if self.max_filtered_results and len(previews) > self.max_filtered_results:
-                return previews[:self.max_filtered_results]
-            return previews
+            return[]
     @abstractmethod
     def _get_previews(self, query: str) -> List[Dict[str, Any]]:

{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: local-deep-research
-Version: 0.1.14
+Version: 0.1.16
 Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
 Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
 License: MIT License

{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/RECORD RENAMED Viewed

@@ -4,19 +4,19 @@ local_deep_research/config.py,sha256=lucqOE4KeNm1ynYdcHYWJLE5fJ0QN-1QKZpRcBPsHe8
 local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
 local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
 local_deep_research/report_generator.py,sha256=UOiSw_vPHgtUpI8L9_UaOlpBVBloPB-ilhAo-1d2B9M,8200
-local_deep_research/search_system.py,sha256=qQoIBIfs1Lvc8vdYtluL62ej7iql0HGz_dbEeYdy8jc,15697
+local_deep_research/search_system.py,sha256=KNSn_8ciEGfSRR0k0ggIzZF6jqNXoYsCnUmdKgtKO0E,15481
 local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
 local_deep_research/defaults/llm_config.py,sha256=88IGWPPvikSKmAqfqsGovBx2Jac5eh2sBY_LIW624Ik,7910
 local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
 local_deep_research/defaults/main.toml,sha256=l_J9JAPhKEp63IsLBO0hQDVimxogEpnrEVnNjiOeUxg,1403
-local_deep_research/defaults/search_engines.toml,sha256=-jjuG9G6H8dQemZGs6ytZwAdJLbVcWTlCw_SVDNgQXQ,8101
+local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
 local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
 local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXhhVYSLTTSMnIA,4522
 local_deep_research/utilties/search_utilities.py,sha256=C8ycFd7blcq5vtnd6GxP8dkepZT6EEqHFtT3WYxF0Ck,4151
 local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
 local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
-local_deep_research/web/app.py,sha256=deo4dJKBhhcAdIgtZ3Js-CIb2kXZDk0uPyyW823FtF8,73608
+local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
 local_deep_research/web/static/css/styles.css,sha256=mW217FfZNW1pzMtlbuXE2fRBJekeIdIoy4m-yXFirj4,23782
 local_deep_research/web/static/js/app.js,sha256=GPncdWpw2YNTs56JY-0tjTTr9JnX-fIZSZX0agwKZMU,172813
 local_deep_research/web/templates/api_keys_config.html,sha256=jA8Y-nfUGJ1dTvbw2jK_8xPy2x6UG_5gHpbrTJAex2g,3527
@@ -29,7 +29,7 @@ local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaW
 local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
 local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 local_deep_research/web_search_engines/full_search.py,sha256=3SSTvD12g4pNlZCSGh8jwsyYWpQglgqjADnq8dG1zyI,9756
-local_deep_research/web_search_engines/search_engine_base.py,sha256=ig1sv1EVXZ9NqVA2lZJIKMjDxl9W8Gb7rTc_oRbsnSU,8803
+local_deep_research/web_search_engines/search_engine_base.py,sha256=Knmf45pMYd7hYc9x8jG8gOtMnribsXDcOXGrA50LK3E,8100
 local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
 local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
 local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -45,13 +45,13 @@ local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=uAs
 local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=CRNcxBzNd9kanyIJYaUDB7qfXYxVCvd4L2mX8jL73v0,5955
 local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
 local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
-local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=y-g7L9P91XXrO4-2tueHB0FoE4N2cPEOUhjYnXNvWYs,44950
+local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
 local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=iy-QmT99Tf2cJlfCrPbEhtMB7a_zCKppvlUKi7VBrlE,9118
 local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
 local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
-local_deep_research-0.1.14.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
-local_deep_research-0.1.14.dist-info/METADATA,sha256=Qo4XpPKiUmxCZ2JvL-WrDprZYuqq3BKFivAej7x7HhY,15151
-local_deep_research-0.1.14.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
-local_deep_research-0.1.14.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
-local_deep_research-0.1.14.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
-local_deep_research-0.1.14.dist-info/RECORD,,
+local_deep_research-0.1.16.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
+local_deep_research-0.1.16.dist-info/METADATA,sha256=KHrqDTRQmo_FAt1KFZLmYlbO0eQKfjqvpHFu_kRUd_w,15151
+local_deep_research-0.1.16.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
+local_deep_research-0.1.16.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
+local_deep_research-0.1.16.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
+local_deep_research-0.1.16.dist-info/RECORD,,

{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

local-deep-research 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

local-deep-research 0.1.14py3-none-any.whl → 0.1.16py3-none-any.whl