academia-mcp 1.3.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/PKG-INFO +1 -6
  2. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/files.py +1 -0
  3. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/server.py +10 -2
  4. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/__init__.py +10 -2
  5. academia_mcp-1.4.0/academia_mcp/tools/latex.py +151 -0
  6. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp.egg-info/PKG-INFO +1 -6
  7. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp.egg-info/SOURCES.txt +2 -2
  8. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp.egg-info/requires.txt +0 -5
  9. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/pyproject.toml +9 -5
  10. academia_mcp-1.4.0/tests/test_latex.py +41 -0
  11. academia_mcp-1.3.0/academia_mcp/tools/md_to_pdf.py +0 -411
  12. academia_mcp-1.3.0/tests/test_md_to_pdf.py +0 -114
  13. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/LICENSE +0 -0
  14. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/README.md +0 -0
  15. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/__init__.py +0 -0
  16. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/__main__.py +0 -0
  17. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/llm.py +0 -0
  18. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/py.typed +0 -0
  19. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/anthology_search.py +0 -0
  20. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/arxiv_download.py +0 -0
  21. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/arxiv_search.py +0 -0
  22. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/bitflip.py +0 -0
  23. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/document_qa.py +0 -0
  24. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/hf_datasets_search.py +0 -0
  25. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/py.typed +0 -0
  26. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/s2_citations.py +0 -0
  27. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/visit_webpage.py +0 -0
  28. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/tools/web_search.py +0 -0
  29. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp/utils.py +0 -0
  30. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp.egg-info/dependency_links.txt +0 -0
  31. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp.egg-info/entry_points.txt +0 -0
  32. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/academia_mcp.egg-info/top_level.txt +0 -0
  33. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/setup.cfg +0 -0
  34. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_anthology_search.py +0 -0
  35. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_arxiv_download.py +0 -0
  36. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_arxiv_search.py +0 -0
  37. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_bitflip.py +0 -0
  38. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_document_qa.py +0 -0
  39. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_extract_json.py +0 -0
  40. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_hf_dataset_search.py +0 -0
  41. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_s2_citations.py +0 -0
  42. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_visit_webpage.py +0 -0
  43. {academia_mcp-1.3.0 → academia_mcp-1.4.0}/tests/test_web_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -22,16 +22,11 @@ Requires-Dist: markdownify==0.14.1
22
22
  Requires-Dist: acl-anthology==0.5.2
23
23
  Requires-Dist: markdown==3.7.0
24
24
  Requires-Dist: types-markdown==3.7.0.20250322
25
- Requires-Dist: black==25.1.0
26
- Requires-Dist: mypy==1.16.0
27
- Requires-Dist: flake8==7.2.0
28
25
  Requires-Dist: huggingface-hub>=0.32.4
29
26
  Requires-Dist: fire>=0.7.0
30
- Requires-Dist: pytest>=8.4.1
31
27
  Requires-Dist: openai>=1.97.1
32
28
  Requires-Dist: jinja2>=3.1.6
33
29
  Requires-Dist: datasets>=4.0.0
34
- Requires-Dist: pytest-asyncio>=1.1.0
35
30
  Dynamic: license-file
36
31
 
37
32
  # Academia MCP
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  DIR_PATH = Path(__file__).parent
6
6
  ROOT_PATH = DIR_PATH.parent
7
7
  DEFAULT_WORKSPACE_DIR_PATH: Path = DIR_PATH / "workdir"
8
+ DEFAULT_LATEX_TEMPLATES_DIR_PATH: Path = DIR_PATH / "latex_templates"
8
9
 
9
10
 
10
11
  class WorkspaceDirectory:
@@ -12,7 +12,12 @@ from .tools.s2_citations import s2_get_citations, s2_get_references
12
12
  from .tools.hf_datasets_search import hf_datasets_search
13
13
  from .tools.anthology_search import anthology_search
14
14
  from .tools.document_qa import document_qa
15
- from .tools.md_to_pdf import md_to_pdf
15
+ from .tools.latex import (
16
+ compile_latex_from_file,
17
+ compile_latex_from_str,
18
+ get_latex_template,
19
+ get_latex_templates_list,
20
+ )
16
21
  from .tools.web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
17
22
  from .tools.visit_webpage import visit_webpage
18
23
  from .tools.bitflip import (
@@ -58,7 +63,10 @@ def run(
58
63
  server.add_tool(s2_get_references)
59
64
  server.add_tool(hf_datasets_search)
60
65
  server.add_tool(anthology_search)
61
- server.add_tool(md_to_pdf)
66
+ server.add_tool(compile_latex_from_file)
67
+ server.add_tool(compile_latex_from_str)
68
+ server.add_tool(get_latex_template)
69
+ server.add_tool(get_latex_templates_list)
62
70
  server.add_tool(visit_webpage)
63
71
 
64
72
  if not disable_web_search_tools:
@@ -4,7 +4,12 @@ from .arxiv_download import arxiv_download
4
4
  from .hf_datasets_search import hf_datasets_search
5
5
  from .s2_citations import s2_get_references, s2_get_citations
6
6
  from .document_qa import document_qa
7
- from .md_to_pdf import md_to_pdf
7
+ from .latex import (
8
+ compile_latex_from_file,
9
+ compile_latex_from_str,
10
+ get_latex_template,
11
+ get_latex_templates_list,
12
+ )
8
13
  from .web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
9
14
  from .visit_webpage import visit_webpage
10
15
  from .bitflip import extract_bitflip_info, generate_research_proposal, score_research_proposals
@@ -18,7 +23,10 @@ __all__ = [
18
23
  "s2_get_citations",
19
24
  "hf_datasets_search",
20
25
  "document_qa",
21
- "md_to_pdf",
26
+ "compile_latex_from_file",
27
+ "compile_latex_from_str",
28
+ "get_latex_template",
29
+ "get_latex_templates_list",
22
30
  "web_search",
23
31
  "tavily_web_search",
24
32
  "exa_web_search",
@@ -0,0 +1,151 @@
1
+ import re
2
+ import subprocess
3
+ import shutil
4
+ import tempfile
5
+ import json
6
+ from pathlib import Path
7
+
8
+
9
+ from academia_mcp.files import get_workspace_dir, DEFAULT_LATEX_TEMPLATES_DIR_PATH
10
+
11
+
12
+ def get_latex_templates_list() -> str:
13
+ """
14
+ Get the list of available latex templates.
15
+ Always use one of the templates from the list.
16
+
17
+ Returns a JSON list serialized to a string.
18
+ Use `json.loads` to deserialize the result.
19
+ """
20
+ return json.dumps([str(path.name) for path in DEFAULT_LATEX_TEMPLATES_DIR_PATH.glob("*")])
21
+
22
+
23
+ def get_latex_template(template_name: str) -> str:
24
+ """
25
+ Get the latex template by name.
26
+
27
+ Returns a JSON object serialized to a string.
28
+ Use `json.loads` to deserialize the result.
29
+ The structure is: {"template": "...", "style": "..."}
30
+
31
+ Args:
32
+ template_name: The name of the latex template.
33
+ """
34
+ template_dir_path = DEFAULT_LATEX_TEMPLATES_DIR_PATH / template_name
35
+ if not template_dir_path.exists():
36
+ raise FileNotFoundError(
37
+ f"Template {template_name} not found in {DEFAULT_LATEX_TEMPLATES_DIR_PATH}"
38
+ )
39
+ template_path = template_dir_path / f"{template_name}.tex"
40
+ style_path = template_dir_path / f"{template_name}.sty"
41
+ if not template_path.exists():
42
+ raise FileNotFoundError(f"Template file {template_path} not found in {template_dir_path}")
43
+ if not style_path.exists():
44
+ raise FileNotFoundError(f"Style file {style_path} not found in {template_dir_path}")
45
+ return json.dumps({"template": template_path.read_text(), "style": style_path.read_text()})
46
+
47
+
48
+ def compile_latex_from_file(
49
+ input_filename: str, output_filename: str = "output.pdf", timeout: int = 60
50
+ ) -> str:
51
+ """
52
+ Compile a latex file.
53
+
54
+ Returns a string with the result of the compilation.
55
+
56
+ Args:
57
+ input_filename: The path to the latex file.
58
+ output_filename: The path to the output pdf file.
59
+ timeout: The timeout for the compilation. 60 seconds by default.
60
+ """
61
+ with open(input_filename, "r", encoding="utf-8") as file:
62
+ latex_code = file.read()
63
+ return compile_latex_from_str(latex_code, output_filename, timeout)
64
+
65
+
66
+ def compile_latex_from_str(
67
+ latex_code: str, output_filename: str = "output.pdf", timeout: int = 60
68
+ ) -> str:
69
+ """
70
+ Compile a latex code.
71
+
72
+ Returns a string with the result of the compilation.
73
+
74
+ Args:
75
+ latex_code: The latex code to compile.
76
+ output_filename: The path to the output pdf file.
77
+ timeout: The timeout for the compilation. 60 seconds by default.
78
+ """
79
+ if shutil.which("pdflatex") is None:
80
+ return "pdflatex is not installed or not found in PATH."
81
+
82
+ destination_name = (
83
+ output_filename if output_filename.lower().endswith(".pdf") else f"{output_filename}.pdf"
84
+ )
85
+
86
+ try:
87
+ with tempfile.TemporaryDirectory(
88
+ dir=str(get_workspace_dir()), prefix="temp_latex_"
89
+ ) as temp_dir:
90
+ temp_dir_path = Path(temp_dir)
91
+ tex_filename = "temp.tex"
92
+ pdf_filename = "temp.pdf"
93
+ tex_file_path = temp_dir_path / tex_filename
94
+ tex_file_path.write_text(latex_code, encoding="utf-8")
95
+
96
+ # Detect and copy local .sty packages referenced by \usepackage{...}
97
+ # Supports optional arguments: \usepackage[opts]{pkgA,pkgB}
98
+ try:
99
+ package_names: set[str] = set()
100
+ for match in re.finditer(r"\\usepackage(?:\[[^\]]*\])?\{([^}]+)\}", latex_code):
101
+ for name in match.group(1).split(","):
102
+ pkg = name.strip()
103
+ if pkg:
104
+ package_names.add(pkg)
105
+
106
+ for pkg in package_names:
107
+ sty_name = f"{pkg}.sty"
108
+ for candidate in DEFAULT_LATEX_TEMPLATES_DIR_PATH.rglob(sty_name):
109
+ shutil.copyfile(candidate, temp_dir_path / sty_name)
110
+ break
111
+ except Exception:
112
+ pass
113
+
114
+ try:
115
+ subprocess.run(
116
+ [
117
+ "pdflatex",
118
+ "-interaction=nonstopmode",
119
+ tex_filename,
120
+ ],
121
+ cwd=str(temp_dir_path),
122
+ check=True,
123
+ capture_output=True,
124
+ text=True,
125
+ timeout=timeout,
126
+ )
127
+ except subprocess.TimeoutExpired:
128
+ return f"Compilation timed out after {timeout} seconds"
129
+ except subprocess.CalledProcessError as e:
130
+ combined_output = (e.stdout or "") + "\n" + (e.stderr or "")
131
+ error_lines = [
132
+ line
133
+ for line in combined_output.split("\n")
134
+ if ("error" in line.lower() or "!" in line)
135
+ ]
136
+ if error_lines:
137
+ return "Compilation failed. LaTeX errors:\n" + "\n".join(error_lines)
138
+ return f"Compilation failed. Full LaTeX output:\n{combined_output}"
139
+
140
+ pdf_path = temp_dir_path / pdf_filename
141
+ output_pdf_path = Path(get_workspace_dir()) / destination_name
142
+
143
+ if pdf_path.exists():
144
+ shutil.move(str(pdf_path), str(output_pdf_path))
145
+ return f"Compilation successful! PDF file saved as {destination_name}"
146
+
147
+ return (
148
+ "Compilation completed, but PDF file was not created. Check LaTeX code for errors."
149
+ )
150
+ except Exception as e:
151
+ return f"Compilation failed due to an unexpected error: {e}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -22,16 +22,11 @@ Requires-Dist: markdownify==0.14.1
22
22
  Requires-Dist: acl-anthology==0.5.2
23
23
  Requires-Dist: markdown==3.7.0
24
24
  Requires-Dist: types-markdown==3.7.0.20250322
25
- Requires-Dist: black==25.1.0
26
- Requires-Dist: mypy==1.16.0
27
- Requires-Dist: flake8==7.2.0
28
25
  Requires-Dist: huggingface-hub>=0.32.4
29
26
  Requires-Dist: fire>=0.7.0
30
- Requires-Dist: pytest>=8.4.1
31
27
  Requires-Dist: openai>=1.97.1
32
28
  Requires-Dist: jinja2>=3.1.6
33
29
  Requires-Dist: datasets>=4.0.0
34
- Requires-Dist: pytest-asyncio>=1.1.0
35
30
  Dynamic: license-file
36
31
 
37
32
  # Academia MCP
@@ -21,7 +21,7 @@ academia_mcp/tools/arxiv_search.py
21
21
  academia_mcp/tools/bitflip.py
22
22
  academia_mcp/tools/document_qa.py
23
23
  academia_mcp/tools/hf_datasets_search.py
24
- academia_mcp/tools/md_to_pdf.py
24
+ academia_mcp/tools/latex.py
25
25
  academia_mcp/tools/py.typed
26
26
  academia_mcp/tools/s2_citations.py
27
27
  academia_mcp/tools/visit_webpage.py
@@ -33,7 +33,7 @@ tests/test_bitflip.py
33
33
  tests/test_document_qa.py
34
34
  tests/test_extract_json.py
35
35
  tests/test_hf_dataset_search.py
36
- tests/test_md_to_pdf.py
36
+ tests/test_latex.py
37
37
  tests/test_s2_citations.py
38
38
  tests/test_visit_webpage.py
39
39
  tests/test_web_search.py
@@ -10,13 +10,8 @@ markdownify==0.14.1
10
10
  acl-anthology==0.5.2
11
11
  markdown==3.7.0
12
12
  types-markdown==3.7.0.20250322
13
- black==25.1.0
14
- mypy==1.16.0
15
- flake8==7.2.0
16
13
  huggingface-hub>=0.32.4
17
14
  fire>=0.7.0
18
- pytest>=8.4.1
19
15
  openai>=1.97.1
20
16
  jinja2>=3.1.6
21
17
  datasets>=4.0.0
22
- pytest-asyncio>=1.1.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.3.0"
7
+ version = "1.4.0"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -29,15 +29,19 @@ dependencies = [
29
29
  "acl-anthology==0.5.2",
30
30
  "markdown==3.7.0",
31
31
  "types-markdown==3.7.0.20250322",
32
- "black==25.1.0",
33
- "mypy==1.16.0",
34
- "flake8==7.2.0",
35
32
  "huggingface-hub>=0.32.4",
36
33
  "fire>=0.7.0",
37
- "pytest>=8.4.1",
38
34
  "openai>=1.97.1",
39
35
  "jinja2>=3.1.6",
40
36
  "datasets>=4.0.0",
37
+ ]
38
+
39
+ [dependency-groups]
40
+ dev = [
41
+ "black==25.1.0",
42
+ "mypy==1.16.0",
43
+ "flake8==7.2.0",
44
+ "pytest>=8.4.1",
41
45
  "pytest-asyncio>=1.1.0",
42
46
  ]
43
47
 
@@ -0,0 +1,41 @@
1
+ import json
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
+ from academia_mcp.tools.latex import (
6
+ compile_latex_from_file,
7
+ compile_latex_from_str,
8
+ get_latex_template,
9
+ get_latex_templates_list,
10
+ )
11
+
12
+
13
+ def test_latex_get_latex_templates_list() -> None:
14
+ templates_list = json.loads(get_latex_templates_list())
15
+ assert len(templates_list) > 0
16
+ assert "agents4science_2025" in templates_list
17
+
18
+
19
+ def test_latex_get_latex_template() -> None:
20
+ result = json.loads(get_latex_template("agents4science_2025"))
21
+ assert "template" in result
22
+ assert "style" in result
23
+ assert result["template"] is not None
24
+ assert result["style"] is not None
25
+
26
+
27
+ def test_latex_compile_latex_from_str() -> None:
28
+ template = json.loads(get_latex_template("agents4science_2025"))
29
+ result = compile_latex_from_str(template["template"], "test.pdf")
30
+ assert "Compilation successful" in result
31
+
32
+
33
+ def test_latex_compile_latex_from_file() -> None:
34
+ template = json.loads(get_latex_template("agents4science_2025"))
35
+ with tempfile.TemporaryDirectory() as temp_dir:
36
+ temp_dir_path = Path(temp_dir)
37
+ tex_filename = "temp.tex"
38
+ tex_file_path = temp_dir_path / tex_filename
39
+ tex_file_path.write_text(template["template"], encoding="utf-8")
40
+ result = compile_latex_from_file(str(tex_file_path), "test.pdf")
41
+ assert "Compilation successful" in result
@@ -1,411 +0,0 @@
1
- import markdown
2
- import re
3
- import os
4
- import subprocess
5
- import shutil
6
- import xml.dom.minidom
7
- import xml.etree.ElementTree as etree
8
- from typing import Optional, Any
9
-
10
- from markdown.core import Markdown
11
-
12
- from academia_mcp.files import get_workspace_dir
13
-
14
-
15
- START_SINGLE_QUOTE_RE = re.compile(r"(^|\s|\")'")
16
- START_DOUBLE_QUOTE_RE = re.compile(r"(^|\s|'|`)\"")
17
- END_DOUBLE_QUOTE_RE = re.compile(r'"(,|\.|\s|$)')
18
-
19
-
20
- MAIN_TEMPLATE = """\\documentclass{{article}}
21
- \\usepackage[utf8]{{inputenc}}
22
- \\usepackage[T1]{{fontenc}}
23
- \\usepackage{{textcomp}}
24
- \\usepackage{{amsmath}}
25
- \\usepackage{{float}}
26
- \\usepackage{{graphicx}}
27
- \\usepackage{{enumitem}}
28
- \\usepackage{{quoting}}
29
- \\usepackage{{booktabs}}
30
- \\usepackage{{caption}}
31
- \\usepackage{{siunitx}}
32
- \\sisetup{{
33
- group-separator = {{,}},
34
- output-decimal-marker = {{.}}
35
- }}
36
- \\usepackage{{hyperref}}
37
-
38
- \\author{{Holosophos}}
39
-
40
- \\begin{{document}}
41
-
42
- {latex_content}
43
-
44
- \\end{{document}}"""
45
-
46
-
47
- IMAGE_TEMPLATE = """\\begin{{figure}}[H]
48
- \\centering
49
- \\includegraphics[width=\\linewidth]{{{src}}}
50
- \\caption{{{alt}}}
51
- \\end{{figure}}"""
52
-
53
-
54
- TABLE_TEMPLATE = """
55
- \\begin{{table}}[h]
56
- \\begin{{tabular}}{{{descriptor}}}
57
- {core}
58
- \\hline
59
- \\end{{tabular}}
60
- \\\\[5pt]
61
- \\caption{{{caption}}}
62
- \\end{{table}}
63
- """
64
-
65
- ITEMIZE_TEMPLATE = """
66
- \\begin{{itemize}}
67
- {content}
68
- \\end{{itemize}}
69
- """
70
-
71
- QUOTE_TEMPLATE = """
72
- \\begin{{quotation}}
73
- {content}
74
- \\end{{quotation}}
75
- """
76
-
77
- VERBATIM_TEMPLATE = """
78
- \\begin{{verbatim}}
79
- {content}
80
- \\end{{verbatim}}
81
- """
82
-
83
-
84
- MAKETITLE = """
85
- % ----------------------------------------------------------------
86
- \\maketitle
87
- % ----------------------------------------------------------------
88
- """
89
-
90
-
91
- def inline_html_latex(text: str) -> str:
92
- out = text
93
- if re.search(r"&ldquo;.*?&rdquo;", text, flags=re.DOTALL):
94
- out = out.replace("&ldquo;", "\\enquote{").replace("&rdquo;", "}")
95
- if re.search(r"&lsquo;.*?&rsquo;", text, flags=re.DOTALL):
96
- out = out.replace("&lsquo;", "\\enquote{").replace("&rsquo;", "}")
97
- if re.search(r"&ldquo;.*?&ldquo;", text, flags=re.DOTALL):
98
- out = out.replace("&ldquo;", "\\enquote{", 1).replace("&ldquo;", "}", 1)
99
- if re.search(r"&laquo;.*?&raquo;", text, flags=re.DOTALL):
100
- out = out.replace("&laquo;", "\\enquote{").replace("&raquo;", "}")
101
- out = out.replace("...", "\\dots")
102
- out = out.replace("&hellip;", "\\dots")
103
- out = out.replace("&ndash;", "--")
104
- out = out.replace("&mdash;", "---")
105
- out = out.replace("\\|", "|")
106
- return out
107
-
108
-
109
- def unescape_html_entities(text: str) -> str:
110
- mapping = {
111
- "&amp;": "&",
112
- "&lt;": "<",
113
- "&gt;": ">",
114
- "&quot;": '"',
115
- }
116
- for k, v in mapping.items():
117
- text = text.replace(k, v)
118
- return text
119
-
120
-
121
- def escape_latex_entities(text: str) -> str:
122
- out = unescape_html_entities(text)
123
- out = out.replace("%", "\\%")
124
- out = out.replace("&", "\\&")
125
- out = out.replace("#", "\\#")
126
- out = START_SINGLE_QUOTE_RE.sub(r"\g<1>`", out)
127
- out = START_DOUBLE_QUOTE_RE.sub(r"\g<1>``", out)
128
- out = END_DOUBLE_QUOTE_RE.sub(r"''\g<1>", out)
129
- return out
130
-
131
-
132
- class LaTeXExtension(markdown.Extension):
133
- def __init__(self, configs: Optional[Any] = None) -> None:
134
- self.reset()
135
-
136
- def extendMarkdown(self, md: Markdown) -> None:
137
- self.md = md
138
- latex_tp = LaTeXTreeProcessor()
139
- math_pp = MathTextPostProcessor()
140
- table_pp = TableTextPostProcessor()
141
- image_pp = ImageTextPostProcessor()
142
- link_pp = LinkTextPostProcessor()
143
- unescape_html_pp = UnescapeHtmlTextPostProcessor()
144
-
145
- md.treeprocessors.register(latex_tp, "latex", 20)
146
- md.postprocessors.register(unescape_html_pp, "unescape_html", 20)
147
- md.postprocessors.register(math_pp, "math", 20)
148
- md.postprocessors.register(image_pp, "image", 20)
149
- md.postprocessors.register(table_pp, "table", 20)
150
- md.postprocessors.register(link_pp, "link", 20)
151
-
152
- def reset(self) -> None:
153
- pass
154
-
155
-
156
- class LaTeXTreeProcessor(markdown.treeprocessors.Treeprocessor):
157
- def run(self, doc: etree.Element) -> None:
158
- latex_text = self.tolatex(doc)
159
- doc.clear()
160
- latex_node = etree.Element("plaintext")
161
- latex_node.text = latex_text
162
- doc.append(latex_node)
163
-
164
- def tolatex(self, ournode: etree.Element) -> str:
165
- buf = ""
166
- subcontent = ""
167
-
168
- if ournode.text:
169
- subcontent += escape_latex_entities(ournode.text)
170
-
171
- for child in list(ournode):
172
- subcontent += self.tolatex(child)
173
-
174
- tag = ournode.tag
175
- if tag == "h1":
176
- buf += "\n\\title{%s}\n" % subcontent
177
- buf += MAKETITLE
178
- elif tag == "h2":
179
- buf += "\n\n\\section{%s}\n" % subcontent
180
- elif tag == "h3":
181
- buf += "\n\n\\subsection{%s}\n" % subcontent
182
- elif tag == "h4":
183
- buf += "\n\\subsubsection{%s}\n" % subcontent
184
- elif tag == "hr":
185
- buf += "\\noindent\\makebox[\\linewidth]{\\rule{\\linewidth}{0.4pt}}"
186
- elif tag == "ul":
187
- buf += ITEMIZE_TEMPLATE.format(content=subcontent.strip())
188
- elif tag == "ol":
189
- buf += " \\begin{enumerate}"
190
- if "start" in ournode.attrib:
191
- start = int(ournode.attrib["start"]) - 1
192
- buf += "\\setcounter{enumi}{" + str(start) + "}"
193
- buf += f"\n{subcontent}\n\\end{{enumerate}}"
194
- elif tag == "li":
195
- buf += "\n \\item %s" % subcontent.strip()
196
- elif tag == "blockquote":
197
- buf += QUOTE_TEMPLATE.format(content=subcontent.strip())
198
- elif tag == "pre":
199
- buf += VERBATIM_TEMPLATE.format(content=subcontent.strip())
200
- elif tag == "q":
201
- buf += "`%s'" % subcontent.strip()
202
- elif tag == "p":
203
- buf += "\n%s\n" % subcontent.strip()
204
- elif tag == "sup":
205
- buf += "\\footnote{%s}" % subcontent.strip()
206
- elif tag == "strong":
207
- buf += "\\textbf{%s}" % subcontent.strip()
208
- elif tag == "em":
209
- buf += "\\emph{%s}" % subcontent.strip()
210
- elif tag == "table":
211
- buf += "\n\n<table>%s</table>\n\n" % subcontent
212
- elif tag == "thead":
213
- buf += "<thead>%s</thead>" % subcontent
214
- elif tag == "tbody":
215
- buf += "<tbody>%s</tbody>" % subcontent
216
- elif tag == "tr":
217
- buf += "<tr>%s</tr>" % subcontent
218
- elif tag == "th":
219
- buf += "<th>%s</th>" % subcontent
220
- elif tag == "td":
221
- buf += "<td>%s</td>" % subcontent
222
- elif tag == "img":
223
- buf += '<img src="%s" alt="%s" />' % (
224
- ournode.get("src"),
225
- ournode.get("alt"),
226
- )
227
- elif tag == "a":
228
- href = ournode.get("href")
229
- assert href
230
- buf += '<a href="%s">%s</a>' % (
231
- escape_latex_entities(href),
232
- subcontent,
233
- )
234
- else:
235
- buf = subcontent
236
-
237
- if ournode.tail:
238
- buf += escape_latex_entities(ournode.tail)
239
-
240
- return buf
241
-
242
-
243
- class Table2Latex:
244
- def convert_markdown_table(self, instr: str) -> str:
245
- lines = instr.split("\n")
246
- headers = lines[0].strip("|").split("|")
247
- cols = len(headers)
248
- buf = (
249
- "\\begin{table}[h]\n\\centering\n\\begin{tabular}{|"
250
- + "|".join(["l"] * cols)
251
- + "|}\n\\hline\n"
252
- )
253
- buf += (
254
- " & ".join([f"\\textbf{{{header.strip()}}}" for header in headers]) + " \\\\\n\\hline\n"
255
- )
256
- for line in lines[2:]:
257
- cells = line.strip("|").split("|")
258
- buf += " & ".join([cell.strip() for cell in cells]) + " \\\\\n\\hline\n"
259
- buf += "\\end{tabular}\n\\end{table}"
260
- return buf
261
-
262
-
263
- class Img2Latex:
264
- def convert(self, instr: str) -> str:
265
- dom = xml.dom.minidom.parseString(instr)
266
- img = dom.documentElement
267
- assert img is not None
268
- src = img.getAttribute("src")
269
- alt = img.getAttribute("alt")
270
- return IMAGE_TEMPLATE.format(src=src, alt=alt)
271
-
272
-
273
- class Link2Latex:
274
- def convert(self, instr: str) -> str:
275
- dom = xml.dom.minidom.parseString(instr)
276
- link = dom.documentElement
277
- assert link is not None
278
- href = link.getAttribute("href")
279
- matches = re.search(r">([^<]+)", instr)
280
- desc = ""
281
- if matches:
282
- desc = matches.group(1)
283
- return r"\href{%s}{%s}" % (href, desc) if href != desc else r"\url{%s}" % href
284
-
285
-
286
- class ImageTextPostProcessor(markdown.postprocessors.Postprocessor):
287
- def run(self, instr: str) -> str:
288
- converter = Img2Latex()
289
- new_blocks = []
290
- for block in instr.split("\n\n"):
291
- stripped = block.strip()
292
- if stripped.startswith("<img"):
293
- stripped = re.sub(r"<\/?plaintext[^>]*>", "", stripped, flags=re.IGNORECASE)
294
- new_blocks.append(converter.convert(stripped).strip())
295
- else:
296
- new_blocks.append(block)
297
- return "\n\n".join(new_blocks)
298
-
299
-
300
- class LinkTextPostProcessor(markdown.postprocessors.Postprocessor):
301
- def run(self, instr: str) -> str:
302
- converter = Link2Latex()
303
- new_blocks = []
304
- for block in instr.split("\n\n"):
305
- stripped = block.strip()
306
- matches = re.findall(r"<a[^>]*>[^<]+</a>", stripped)
307
- if matches:
308
- for match in matches:
309
- stripped = stripped.replace(match, converter.convert(match).strip())
310
- new_blocks.append(stripped)
311
- else:
312
- new_blocks.append(block)
313
- return "\n\n".join(new_blocks)
314
-
315
-
316
- class UnescapeHtmlTextPostProcessor(markdown.postprocessors.Postprocessor):
317
- def run(self, text: str) -> str:
318
- return unescape_html_entities(inline_html_latex(text))
319
-
320
-
321
- class MathTextPostProcessor(markdown.postprocessors.Postprocessor):
322
- def run(self, instr: str) -> str:
323
- instr = re.sub(r"\$\$([^\$]*)\$\$", r"\\[\1\\]", instr)
324
- instr = re.sub(r"\$([^\$]*)\$", r"\\(\1\\)", instr)
325
- instr = instr.replace("\\lt", "<").replace(" * ", " \\cdot ").replace("\\del", "\\partial")
326
- return instr
327
-
328
-
329
- class TableTextPostProcessor(markdown.postprocessors.Postprocessor):
330
- def run(self, instr: str) -> str:
331
- converter = Table2Latex()
332
- new_blocks = []
333
- for block in instr.split("\n\n"):
334
- stripped = block.strip()
335
- if re.match(r"\|.*\|", stripped): # Check for Markdown table
336
- new_blocks.append(converter.convert_markdown_table(stripped).strip())
337
- else:
338
- new_blocks.append(block)
339
- return "\n\n".join(new_blocks)
340
-
341
-
342
- def convert_md_to_latex(md_content: str) -> str:
343
- md = markdown.Markdown(extensions=[LaTeXExtension()])
344
- latex_content = md.convert(md_content)
345
- latex_content = re.sub(r"<\/?plaintext[^>]*>", "", latex_content, flags=re.IGNORECASE)
346
- return MAIN_TEMPLATE.format(latex_content=latex_content)
347
-
348
-
349
- def md_to_pdf(markdown_text: str, output_filename: str = "output") -> str:
350
- """
351
- Convert Markdown to PDF via LaTeX.
352
-
353
- Args:
354
- markdown_text: Markdown text
355
- output_filename: Output filename (without extension)
356
-
357
- Returns:
358
- Message about the compilation result
359
- """
360
-
361
- latex_code = convert_md_to_latex(markdown_text)
362
-
363
- temp_dir = get_workspace_dir() / "temp_latex"
364
- temp_dir.mkdir(parents=True, exist_ok=True)
365
-
366
- tex_file_path = temp_dir / "temp.tex"
367
- with open(tex_file_path, "w", encoding="utf-8") as f:
368
- f.write(latex_code)
369
-
370
- if shutil.which("pdflatex") is None:
371
- shutil.rmtree(temp_dir, ignore_errors=True)
372
- return "pdflatex is not installed or not found in PATH."
373
-
374
- try:
375
- subprocess.run(
376
- [
377
- "pdflatex",
378
- "-interaction=nonstopmode",
379
- "-output-directory",
380
- temp_dir,
381
- tex_file_path,
382
- ],
383
- check=True,
384
- stdout=subprocess.PIPE,
385
- stderr=subprocess.PIPE,
386
- timeout=30,
387
- )
388
-
389
- except subprocess.TimeoutExpired:
390
- shutil.rmtree(temp_dir, ignore_errors=True)
391
- return "Compilation timed out after 30 seconds"
392
- except subprocess.CalledProcessError as e:
393
- error_msg = e.stdout.decode("utf-8")
394
- error_lines = [
395
- line for line in error_msg.split("\n") if "error" in line.lower() or "!" in line
396
- ]
397
- shutil.rmtree(temp_dir, ignore_errors=True)
398
- if error_lines:
399
- return "Compilation failed. LaTeX errors:\n" + "\n".join(error_lines)
400
- return f"Compilation failed. Full LaTeX output:\n{error_msg}"
401
-
402
- pdf_path = os.path.join(temp_dir, "temp.pdf")
403
- output_pdf_path = os.path.join(get_workspace_dir(), f"{output_filename}.pdf")
404
-
405
- if os.path.exists(pdf_path):
406
- shutil.move(pdf_path, output_pdf_path)
407
- shutil.rmtree(temp_dir, ignore_errors=True)
408
- return f"Compilation successful! PDF file saved as {output_filename}.pdf"
409
-
410
- shutil.rmtree(temp_dir, ignore_errors=True)
411
- return "Compilation completed, but PDF file was not created. Check LaTeX code for errors."
@@ -1,114 +0,0 @@
1
- from academia_mcp.tools.md_to_pdf import convert_md_to_latex, md_to_pdf
2
-
3
-
4
- TEST_MD_BASIC = """
5
- # Markdown syntax guide
6
-
7
- ## Headers
8
-
9
- ### This is a Heading h3
10
- ###### This is a Heading h6
11
-
12
- ## Emphasis
13
-
14
- *This text will be italic*
15
- _This will also be italic_
16
-
17
- **This text will be bold**
18
- __This will also be bold__
19
-
20
- _You **can** combine them_
21
- """
22
-
23
- TEST_MD_TABLES = """
24
- ## Tables
25
-
26
- | Left columns | Right columns |
27
- | ------------- |:-------------:|
28
- | left foo | right foo |
29
- | left bar | right bar |
30
- | left baz | right baz |
31
- """
32
-
33
- TEST_MD_LINKS = """
34
- ## Links
35
-
36
- You may be using [Markdown Live Preview](https://markdownlivepreview.com/).
37
- """
38
-
39
- TEST_MD_IMAGES = """
40
- ## Images
41
-
42
- ![This is an alt text.](/image/sample.webp "This is a sample image.")
43
- """
44
-
45
- TEST_MD_LISTS = """
46
- ## Lists
47
-
48
- ### Unordered
49
-
50
- * Item 1
51
- * Item 2
52
- * Item 2a
53
- * Item 2b
54
- * Item 3a
55
- * Item 3b
56
-
57
- ### Ordered
58
-
59
- 1. Item 1
60
- 2. Item 2
61
- 3. Item 3
62
- 1. Item 3a
63
- 2. Item 3b
64
- """
65
-
66
-
67
- def test_md_to_latex_basic() -> None:
68
- latex = convert_md_to_latex(TEST_MD_BASIC)
69
- assert "\\documentclass{article}" in latex
70
- assert "\\title{Markdown syntax guide}" in latex
71
- assert "\\section{Headers}" in latex
72
- assert "\\section{Emphasis}" in latex
73
- assert "\\subsection{This is a Heading h3}" in latex
74
- assert "\\emph{This text will be italic}" in latex
75
- assert "\\emph{This will also be italic}" in latex
76
- assert "\\textbf{This text will be bold}" in latex
77
- assert "\\textbf{This will also be bold}" in latex
78
- assert "\\emph{You \\textbf{can} combine them}" in latex
79
-
80
-
81
- def test_md_to_latex_tables() -> None:
82
- latex = convert_md_to_latex(TEST_MD_TABLES)
83
- assert "\\begin{table}[h]" in latex
84
- assert "\\begin{tabular}{|l|l|}" in latex
85
- assert "left foo & right foo" in latex
86
-
87
-
88
- def test_md_to_latex_links() -> None:
89
- latex = convert_md_to_latex(TEST_MD_LINKS)
90
- assert (
91
- "You may be using \\href{https://markdownlivepreview.com/}{Markdown Live Preview}." in latex
92
- )
93
-
94
-
95
- def test_md_to_latex_images() -> None:
96
- latex = convert_md_to_latex(TEST_MD_IMAGES)
97
- assert "\\begin{figure}" in latex
98
- assert "\\includegraphics[width=\\linewidth]{/image/sample.webp}" in latex
99
- assert "\\caption{This is an alt text.}" in latex
100
- assert "\\end{figure}" in latex
101
-
102
-
103
- def test_md_to_latex_lists() -> None:
104
- latex = convert_md_to_latex(TEST_MD_LISTS)
105
- assert "\\begin{itemize}" in latex
106
- assert "\\end{itemize}" in latex
107
- assert "\\begin{enumerate}" in latex
108
- assert "\\end{enumerate}" in latex
109
- assert "\\item Item 3a" in latex
110
-
111
-
112
- def test_md_to_pdf() -> None:
113
- result = md_to_pdf(TEST_MD_LISTS, "output")
114
- assert "Compilation successful" in result
File without changes
File without changes
File without changes