PyPI - academic-search-mcp - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4b1__py3-none-any.whl - Mend

academic-search-mcp 0.1.3py3-none-any.whl → 0.1.4b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{academic_search_mcp-0.1.3.dist-info → academic_search_mcp-0.1.4b1.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,9 @@
 Metadata-Version: 2.4
 Name: academic-search-mcp
-Version: 0.1.3
+Version: 0.1.4b1
 Summary: A MCP server for searching and downloading academic papers from multiple sources.
 Author-email: "P.S Zhang" <pengsongzhang96@gmail.com>
+Maintainer-email: "Zadneprovskiy A. A." <zadneprovskiy_econ@mail.ru>
 License-File: LICENSE
 Requires-Python: >=3.10
 Requires-Dist: beautifulsoup4>=4.12.0

{academic_search_mcp-0.1.3.dist-info → academic_search_mcp-0.1.4b1.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 paper_search_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 paper_search_mcp/paper.py,sha256=Flrn3ORhsojiEdEldUtKPvGF1RivXhl84zzq8mqAeFI,2969
-paper_search_mcp/pdf_utils.py,sha256=sylqOQTFyOSlYnEzUMpSIe4VkY2kfgaQw_xd_EBYw2g,1909
+paper_search_mcp/pdf_utils.py,sha256=kcutoE_HGrtaoheY9cFN0GW4LVkG-HnN54Mc15LPD44,2009
 paper_search_mcp/server.py,sha256=C542TF00oOUHF38F_5OU43D9RmIWQZSk7UiFHcXukWA,21663
 paper_search_mcp/academic_platforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 paper_search_mcp/academic_platforms/arxiv.py,sha256=5SsFudqH1PIXIEE_8saCQHcf75bqCL6ApRUltLpp9Ow,5911
 paper_search_mcp/academic_platforms/biorxiv.py,sha256=4k1Bg2BW-RBJiZ9jRVVmCEOge_4MtEDtXq2tMaPV0cg,6799
 paper_search_mcp/academic_platforms/core.py,sha256=6xDq3NmlVh1NIEFnTRLPNayodkztrS7CPUC-jupd-Lw,9632
 paper_search_mcp/academic_platforms/crossref.py,sha256=Zxj4U6SejaCa5o7whRmjjHVdd1U1H-DVtRP6DWzPwjk,14773
-paper_search_mcp/academic_platforms/cyberleninka.py,sha256=88p9RZxjBRn5jAaOhZLr3EpP5ibMzmd0vCh1jD6PPEs,13421
+paper_search_mcp/academic_platforms/cyberleninka.py,sha256=X6ka5QqE7pagDPcq0dZq0EXoDeKBzjq7XJYAQw3AA_g,16656
 paper_search_mcp/academic_platforms/google_scholar.py,sha256=B8VqgauJy3RJ8nR9woe107CXM-DrHQPapQAg_f948yg,9269
 paper_search_mcp/academic_platforms/hub.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 paper_search_mcp/academic_platforms/iacr.py,sha256=Vem7q18NZRm5WXsDHsqRefyRIpl4PCceGGWYXhbXB2s,21135
@@ -17,8 +17,8 @@ paper_search_mcp/academic_platforms/pubmed.py,sha256=oS-JRHNI7lcCqxUGTlSVKp2i_QK
 paper_search_mcp/academic_platforms/sci_hub.py,sha256=oma3M_gUseDByh-0Awi8Sxr0g3yojrb8XoD9iV0Exo8,7334
 paper_search_mcp/academic_platforms/semantic.py,sha256=nk7nzrlsnrDNrHNUuRIfIBQfagfAT750J5HtdLputHQ,20594
 paper_search_mcp/academic_platforms/ssrn.py,sha256=ntf22HRBZwNY6ctG5rdXjD5iT7CaML8k_xBbCn_qjbg,13694
-academic_search_mcp-0.1.3.dist-info/METADATA,sha256=uZcbHayXO9tURHo3Yl7P50e4j3v4p20a7GoPbtlLTe4,7203
-academic_search_mcp-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-academic_search_mcp-0.1.3.dist-info/entry_points.txt,sha256=RO1wFwD6a0WO_mZY8HZBYDtITfQ1dhnTR1BZlCrkRLc,69
-academic_search_mcp-0.1.3.dist-info/licenses/LICENSE,sha256=TwRnWq1drFhdcy43SdxndU0mcfBUYBnhSJTJ4hhjfwQ,1085
-academic_search_mcp-0.1.3.dist-info/RECORD,,
+academic_search_mcp-0.1.4b1.dist-info/METADATA,sha256=F3uHgcJYLLX58tRDUCGXhCoVkjc00C-qLrklwT_YJPE,7274
+academic_search_mcp-0.1.4b1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+academic_search_mcp-0.1.4b1.dist-info/entry_points.txt,sha256=RO1wFwD6a0WO_mZY8HZBYDtITfQ1dhnTR1BZlCrkRLc,69
+academic_search_mcp-0.1.4b1.dist-info/licenses/LICENSE,sha256=TwRnWq1drFhdcy43SdxndU0mcfBUYBnhSJTJ4hhjfwQ,1085
+academic_search_mcp-0.1.4b1.dist-info/RECORD,,

paper_search_mcp/academic_platforms/cyberleninka.py CHANGED Viewed

@@ -379,7 +379,9 @@ class CyberLeninkaSearcher:
             return f"Failed to download PDF: {e}"
     def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
-        """Read and extract text from a CyberLeninka paper PDF.
+        """Read and extract text from a CyberLeninka paper.
+        Tries PDF extraction first, falls back to scraping article page text.
         Args:
             paper_id: CyberLeninka article slug
@@ -388,9 +390,83 @@ class CyberLeninkaSearcher:
         Returns:
             Extracted text or error message
         """
+        # Try PDF extraction first
         pdf_path = self.download_pdf(paper_id, save_path)
-        if not os.path.exists(pdf_path):
-            return pdf_path  # Return error message
+        if os.path.exists(pdf_path):
+            text = extract_text_from_pdf(pdf_path)
+            if text and text.strip():
+                return text
+        # Fallback: extract text from article HTML page
+        return self._read_from_html(paper_id)
+    def _read_from_html(self, paper_id: str) -> str:
+        """Extract article text from the CyberLeninka HTML page."""
+        try:
+            self._rate_limit()
+            url = f"{self.BASE_URL}/article/n/{paper_id}"
+            response = requests.get(url, timeout=30, impersonate=self.impersonate)
+            if response.status_code != 200:
+                return f"Failed to read paper {paper_id}: HTTP {response.status_code}"
-        text = extract_text_from_pdf(pdf_path)
-        return text if text else "Failed to extract text from PDF"
+            soup = BeautifulSoup(response.text, 'lxml')
+            # Build metadata header
+            parts = []
+            title_meta = soup.find("meta", {"name": "citation_title"})
+            if title_meta and title_meta.get("content"):
+                parts.append(f"Title: {title_meta['content']}")
+            authors = [m.get("content", "") for m in soup.find_all("meta", {"name": "citation_author"}) if m.get("content")]
+            if authors:
+                parts.append(f"Authors: {', '.join(authors)}")
+            journal_meta = soup.find("meta", {"name": "citation_journal_title"})
+            if journal_meta and journal_meta.get("content"):
+                parts.append(f"Journal: {journal_meta['content']}")
+            date_meta = soup.find("meta", {"name": "citation_publication_date"})
+            if date_meta and date_meta.get("content"):
+                parts.append(f"Year: {date_meta['content']}")
+            if parts:
+                parts.append("")  # blank line separator
+            # Extract article body text
+            # CyberLeninka uses div with class "ocr" for full text
+            body_div = soup.find("div", {"class": "ocr"})
+            if not body_div:
+                # Try alternative selectors
+                body_div = soup.find("div", {"itemprop": "articleBody"})
+            if not body_div:
+                body_div = soup.find("div", {"class": "full-text"})
+            if body_div:
+                # Get text with paragraph separation
+                paragraphs = body_div.find_all(["p", "h2", "h3", "h4"])
+                if paragraphs:
+                    for p in paragraphs:
+                        text = p.get_text(strip=True)
+                        if text:
+                            parts.append(text)
+                else:
+                    body_text = body_div.get_text(separator="\n", strip=True)
+                    if body_text:
+                        parts.append(body_text)
+            if len(parts) <= 4:  # Only metadata, no body text
+                # Try description as last resort
+                desc_meta = soup.find("meta", {"name": "description"})
+                if desc_meta and desc_meta.get("content"):
+                    parts.append(f"Abstract: {desc_meta['content']}")
+                else:
+                    return f"No text content found for paper {paper_id}"
+            return "\n".join(parts)
+        except Exception as e:
+            logger.error(f"Error reading CyberLeninka article HTML: {e}")
+            return f"Failed to read paper: {e}"

paper_search_mcp/pdf_utils.py CHANGED Viewed

@@ -41,12 +41,14 @@ def _extract_with_pdftotext(pdf_path: str) -> Optional[str]:
             ['pdftotext', '-layout', '-enc', 'UTF-8', pdf_path, '-'],
             capture_output=True,
             text=True,
+            encoding='utf-8',
+            errors='replace',
             timeout=60
         )
-        if result.returncode == 0:
+        if result.returncode == 0 and result.stdout:
             return result.stdout.strip()
         return None
-    except (subprocess.TimeoutExpired, subprocess.SubprocessError):
+    except Exception:
         return None
@@ -61,7 +63,9 @@ def _extract_with_pypdf(pdf_path: str) -> str:
         reader = PdfReader(pdf_path)
         text = ""
         for page in reader.pages:
-            text += page.extract_text() + "\n"
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text + "\n"
         return text.strip()
     except Exception as e:
         return f"Error extracting text: {e}"

{academic_search_mcp-0.1.3.dist-info → academic_search_mcp-0.1.4b1.dist-info}/WHEEL RENAMED Viewed

File without changes

{academic_search_mcp-0.1.3.dist-info → academic_search_mcp-0.1.4b1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{academic_search_mcp-0.1.3.dist-info → academic_search_mcp-0.1.4b1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

academic-search-mcp 0.1.3__py3-none-any.whl → 0.1.4b1__py3-none-any.whl

academic-search-mcp 0.1.3py3-none-any.whl → 0.1.4b1py3-none-any.whl