academia-mcp 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- academia_mcp/llm.py +38 -0
- academia_mcp/server.py +9 -0
- academia_mcp/tools/__init__.py +4 -0
- academia_mcp/tools/bitflip.py +282 -0
- academia_mcp/tools/document_qa.py +8 -37
- academia_mcp/utils.py +84 -0
- {academia_mcp-1.1.3.dist-info → academia_mcp-1.2.0.dist-info}/METADATA +4 -1
- {academia_mcp-1.1.3.dist-info → academia_mcp-1.2.0.dist-info}/RECORD +12 -10
- {academia_mcp-1.1.3.dist-info → academia_mcp-1.2.0.dist-info}/WHEEL +0 -0
- {academia_mcp-1.1.3.dist-info → academia_mcp-1.2.0.dist-info}/entry_points.txt +0 -0
- {academia_mcp-1.1.3.dist-info → academia_mcp-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {academia_mcp-1.1.3.dist-info → academia_mcp-1.2.0.dist-info}/top_level.txt +0 -0
academia_mcp/llm.py
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Dict, Any
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
5
|
+
from openai import AsyncOpenAI
|
6
|
+
from openai.types.chat.chat_completion_message import ChatCompletionMessage
|
7
|
+
|
8
|
+
|
9
|
+
class ChatMessage(BaseModel): # type: ignore
|
10
|
+
role: str
|
11
|
+
content: str | List[Dict[str, Any]]
|
12
|
+
|
13
|
+
|
14
|
+
ChatMessages = List[ChatMessage]
|
15
|
+
|
16
|
+
|
17
|
+
async def llm_acall(model_name: str, prompt: str) -> str:
|
18
|
+
key = os.getenv("OPENROUTER_API_KEY", "")
|
19
|
+
assert key, "Please set OPENROUTER_API_KEY in the environment variables"
|
20
|
+
base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
|
21
|
+
|
22
|
+
messages: ChatMessages = [
|
23
|
+
ChatMessage(role="user", content=prompt),
|
24
|
+
]
|
25
|
+
client = AsyncOpenAI(base_url=base_url, api_key=key)
|
26
|
+
response: ChatCompletionMessage = (
|
27
|
+
(
|
28
|
+
await client.chat.completions.create(
|
29
|
+
model=model_name,
|
30
|
+
messages=messages,
|
31
|
+
temperature=0.0,
|
32
|
+
)
|
33
|
+
)
|
34
|
+
.choices[0]
|
35
|
+
.message
|
36
|
+
)
|
37
|
+
assert response.content, "Response content is None"
|
38
|
+
return response.content
|
academia_mcp/server.py
CHANGED
@@ -15,6 +15,12 @@ from .tools.document_qa import document_qa
|
|
15
15
|
from .tools.md_to_pdf import md_to_pdf
|
16
16
|
from .tools.web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
|
17
17
|
from .tools.visit_webpage import visit_webpage
|
18
|
+
from .tools.bitflip import (
|
19
|
+
extract_bitflip_info,
|
20
|
+
generate_research_proposal,
|
21
|
+
score_research_proposals,
|
22
|
+
)
|
23
|
+
|
18
24
|
|
19
25
|
load_dotenv()
|
20
26
|
|
@@ -52,6 +58,9 @@ def run(
|
|
52
58
|
server.add_tool(anthology_search)
|
53
59
|
server.add_tool(md_to_pdf)
|
54
60
|
server.add_tool(visit_webpage)
|
61
|
+
server.add_tool(extract_bitflip_info)
|
62
|
+
server.add_tool(generate_research_proposal)
|
63
|
+
server.add_tool(score_research_proposals)
|
55
64
|
|
56
65
|
if os.getenv("TAVILY_API_KEY"):
|
57
66
|
server.add_tool(tavily_web_search)
|
academia_mcp/tools/__init__.py
CHANGED
@@ -7,6 +7,7 @@ from .document_qa import document_qa
|
|
7
7
|
from .md_to_pdf import md_to_pdf
|
8
8
|
from .web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
|
9
9
|
from .visit_webpage import visit_webpage
|
10
|
+
from .bitflip import extract_bitflip_info, generate_research_proposal, score_research_proposals
|
10
11
|
|
11
12
|
|
12
13
|
__all__ = [
|
@@ -23,4 +24,7 @@ __all__ = [
|
|
23
24
|
"exa_web_search",
|
24
25
|
"brave_web_search",
|
25
26
|
"visit_webpage",
|
27
|
+
"extract_bitflip_info",
|
28
|
+
"generate_research_proposal",
|
29
|
+
"score_research_proposals",
|
26
30
|
]
|
@@ -0,0 +1,282 @@
|
|
1
|
+
# https://arxiv.org/abs/2504.12976
|
2
|
+
# https://web.stanford.edu/class/cs197c/slides/02-literature-search.pdf
|
3
|
+
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
import random
|
7
|
+
from typing import List, Optional, Any
|
8
|
+
|
9
|
+
from pydantic import BaseModel
|
10
|
+
from datasets import load_dataset # type: ignore
|
11
|
+
|
12
|
+
from academia_mcp.tools.arxiv_download import arxiv_download
|
13
|
+
from academia_mcp.utils import extract_json, encode_prompt
|
14
|
+
from academia_mcp.llm import llm_acall
|
15
|
+
|
16
|
+
|
17
|
+
class ProposalDataset:
|
18
|
+
dataset: Optional[List[Any]] = None
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
def get_dataset(cls) -> List[Any]:
|
22
|
+
if cls.dataset is None:
|
23
|
+
cls.dataset = list(load_dataset("UniverseTBD/hypogen-dr1")["train"])
|
24
|
+
return cls.dataset
|
25
|
+
|
26
|
+
|
27
|
+
EXTRACT_PROMPT = """
|
28
|
+
You are a highly advanced research assistant.
|
29
|
+
You specialize in reading scientific papers for hypothesis generation and identifying innovative ideas.
|
30
|
+
|
31
|
+
|
32
|
+
## Example (BERT in NLP)
|
33
|
+
Before you begin, let 's revisit the Bit-Flip concept with an example (BERT in NLP):
|
34
|
+
- Bit: Traditional NLP models (RNNs, LSTMs) process text sequentially,
|
35
|
+
limiting their ability to understand long-range dependencies and fully capture bidirectional context.
|
36
|
+
- Flip: Instead, consider entire sentences at once, allowing context from both directions. This helps capture nuanced relationships among words.
|
37
|
+
- Spark: Bidirectional context for NLP.
|
38
|
+
|
39
|
+
## Framework
|
40
|
+
A Bit-Flip inverts a commonly held assumption,
|
41
|
+
questioning existing constraints or reapplying techniques to new domains/scales.
|
42
|
+
The "Bit" is the prevailing belief, and the "Flip" is the counterargument.
|
43
|
+
|
44
|
+
## Guidance for analysis
|
45
|
+
1. Bit (Technical Insight):
|
46
|
+
- Provide at least two sentences clearly stating the status quo or conventional approach.
|
47
|
+
- Highlight the limitation or problem it creates.
|
48
|
+
- Include enough detail so it is self-contained and does not rely on additional context from elsewhere.
|
49
|
+
2. Flip (Innovation):
|
50
|
+
- Provide at least two sentences describing the novel approach or perspective.
|
51
|
+
- Explain the method or technique that enables this change.
|
52
|
+
- Include enough detail so the Flip is understandable on its own.
|
53
|
+
3. Spark (Core Summary):
|
54
|
+
- A concise 4-6 word phrase capturing the core idea.
|
55
|
+
|
56
|
+
Now, consider this research abstract:
|
57
|
+
{{abstract}}
|
58
|
+
|
59
|
+
Your task:
|
60
|
+
Identify the Bit, Flip, and Spark from the abstract in a detailed manner:
|
61
|
+
- Bit: at least two sentences, with sufficient detail about the conventional approach and its limitation.
|
62
|
+
- Flip: at least two sentences, describing the new approach or perspective with enough detail to understand the main technique.
|
63
|
+
- Spark: a concise 4-6 word summary of the core idea.
|
64
|
+
|
65
|
+
Follow these rules:
|
66
|
+
- Do not cite the paper itself or its authors.
|
67
|
+
- Instead of saying "We/I introduced an idea", just say "An idea was introduced ...".
|
68
|
+
|
69
|
+
Return only the JSON object in this exact format (no extra text):
|
70
|
+
{
|
71
|
+
"bit": "Technical limitation or conventional approach, in at least two sentences",
|
72
|
+
"flip": "Innovative approach or solution, in at least two sentences",
|
73
|
+
"spark": "4-6 word summary"
|
74
|
+
}
|
75
|
+
"""
|
76
|
+
|
77
|
+
IMPROVEMENT_PROMPT = """
|
78
|
+
You are a highly advanced research assistant.
|
79
|
+
You specialize in hypothesis generation and identifying innovative ideas.
|
80
|
+
|
81
|
+
You are given a Bit, which is a technical limitation or conventional approach of some paper.
|
82
|
+
Your task is to propose an improvement idea for the Bit called Flip and summarize it in a Spark.
|
83
|
+
Do not propose any human annotations or human-in-the-loop, the idea should be automatically verifiable.
|
84
|
+
Try to be as specific as possible.
|
85
|
+
|
86
|
+
{% for example in examples %}
|
87
|
+
## Example {{loop.index}}
|
88
|
+
- Bit: {{example["bit"]}}
|
89
|
+
- Chain of reasoning: {{example["chain_of_reasoning"]}}
|
90
|
+
- Flip: {{example["flip"]}}
|
91
|
+
- Spark: {{example["spark"]}}
|
92
|
+
{% endfor %}
|
93
|
+
|
94
|
+
Now, please propose a chain of reasoning that leads to an improvement idea for this Bit:
|
95
|
+
{{bit}}
|
96
|
+
|
97
|
+
{% if additional_context %}Additional context:
|
98
|
+
{{additional_context}}{% endif %}
|
99
|
+
|
100
|
+
Finalize your idea by providing the idea details:
|
101
|
+
- Abstract: An abstract that summarizes the proposal in conference format (approximately 250 words).
|
102
|
+
- Experiments: A list of experiments that would be conducted to validate the proposal. Ensure these are simple and feasible. Be specific in exactly how you would test the hypothesis, and detail precise algorithmic changes. Include the evaluation metrics you would use.
|
103
|
+
- Risks and limitations: A list of potential risks and limitations of the proposal.
|
104
|
+
|
105
|
+
Return only the JSON object in this exact format (no extra text):
|
106
|
+
{
|
107
|
+
"chain_of_reasoning": "Chain of reasoning that leads to an improvement idea for this Bit. At least 5 sentences.",
|
108
|
+
"flip": "Innovative approach or solution, in at least two sentences",
|
109
|
+
"spark": "4-6 word summary",
|
110
|
+
"abstract": "An abstract that summarizes the proposal in conference format (approximately 250 words).",
|
111
|
+
"experiments": ["...", "..."],
|
112
|
+
"risks_and_limitations": "A list of potential risks and limitations of the proposal."
|
113
|
+
}
|
114
|
+
"""
|
115
|
+
|
116
|
+
|
117
|
+
SCORE_PROMPT = """
|
118
|
+
You are a highly advanced research assistant.
|
119
|
+
You are given a list of research proposals.
|
120
|
+
Your task is to score the proposals.
|
121
|
+
|
122
|
+
Proposals:
|
123
|
+
{% for proposal in proposals %}
|
124
|
+
- Proposal ID: {{proposal["proposal_id"]}}
|
125
|
+
- Spark: {{proposal["spark"]}}
|
126
|
+
- Abstract: {{proposal["abstract"]}}
|
127
|
+
- Experiments: {{proposal["experiments"]}}
|
128
|
+
- Risks and limitations: {{proposal["risks_and_limitations"]}}
|
129
|
+
{% endfor %}
|
130
|
+
|
131
|
+
Here are the criteria:
|
132
|
+
- "Strengths": A list of strengths of the proposal.
|
133
|
+
- "Weaknesses": A list of weaknesses of the proposal.
|
134
|
+
- "Novelty": Is the proposal novel? A rating from 1 to 4 (low, medium, high, very high).
|
135
|
+
- "Clarity": Is the proposal clear? A rating from 1 to 4 (low, medium, high, very high).
|
136
|
+
- "Significance": Is the proposal significant? A rating from 1 to 4 (low, medium, high, very high).
|
137
|
+
- "Feasibility": Is the proposal feasible and easy to implement? A rating from 1 to 4 (low, medium, high, very high).
|
138
|
+
- "Soundness": Is the proposal sound? A rating from 1 to 4 (poor, fair, good, excellent).
|
139
|
+
- "Overall": A rating from 1 to 10 (very strong reject to award quality).
|
140
|
+
|
141
|
+
Return only scores for all proposals in this exact format (no extra text):
|
142
|
+
[
|
143
|
+
{
|
144
|
+
"proposal_id": 0,
|
145
|
+
"spark": "...",
|
146
|
+
"strengths": ["...", "..."],
|
147
|
+
"weaknesses": ["...", "..."],
|
148
|
+
"novelty": 2,
|
149
|
+
"clarity": 2,
|
150
|
+
"significance": 2,
|
151
|
+
"feasibility": 2,
|
152
|
+
"soundness": 2,
|
153
|
+
"overall": 5
|
154
|
+
},
|
155
|
+
...
|
156
|
+
]
|
157
|
+
"""
|
158
|
+
|
159
|
+
|
160
|
+
class BitFlipInfo(BaseModel): # type: ignore
|
161
|
+
bit: str
|
162
|
+
flip: str
|
163
|
+
spark: str
|
164
|
+
|
165
|
+
|
166
|
+
class Proposal(BaseModel): # type: ignore
|
167
|
+
proposal_id: Optional[int] = None
|
168
|
+
flip: str
|
169
|
+
spark: str
|
170
|
+
abstract: str
|
171
|
+
experiments: List[str]
|
172
|
+
risks_and_limitations: List[str]
|
173
|
+
|
174
|
+
|
175
|
+
class ProposalScores(BaseModel): # type: ignore
|
176
|
+
proposal_id: int
|
177
|
+
spark: str
|
178
|
+
strengths: List[str]
|
179
|
+
weaknesses: List[str]
|
180
|
+
novelty: int
|
181
|
+
clarity: int
|
182
|
+
significance: int
|
183
|
+
feasibility: int
|
184
|
+
soundness: int
|
185
|
+
overall: int
|
186
|
+
|
187
|
+
|
188
|
+
async def extract_bitflip_info(arxiv_id: str) -> str:
|
189
|
+
"""
|
190
|
+
Extracts the Bit-Flip information from the arXiv paper.
|
191
|
+
|
192
|
+
A Bit-Flip is a technique that inverts a commonly held assumption,
|
193
|
+
questioning existing constraints or reapplying techniques to new domains/scales.
|
194
|
+
The "Bit" is the prevailing belief, and the "Flip" is the counterargument.
|
195
|
+
|
196
|
+
Returns a JSON object in this format:
|
197
|
+
{
|
198
|
+
"bit": "Technical limitation or conventional approach, in at least two sentences",
|
199
|
+
"flip": "Innovative approach or solution, in at least two sentences",
|
200
|
+
"spark": "4-6 word summary of the core idea"
|
201
|
+
}
|
202
|
+
Use `json.loads` to deserialize the result if you want to get specific fields.
|
203
|
+
|
204
|
+
Args:
|
205
|
+
arxiv_id: The arXiv ID of the paper to extract the Bit-Flip information from.
|
206
|
+
"""
|
207
|
+
model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
|
208
|
+
paper = arxiv_download(arxiv_id)
|
209
|
+
abstract = json.loads(paper)["abstract"]
|
210
|
+
prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
|
211
|
+
content = await llm_acall(model_name=model_name, prompt=prompt)
|
212
|
+
result = extract_json(content)
|
213
|
+
bitflip_info: BitFlipInfo = BitFlipInfo.model_validate(result)
|
214
|
+
return str(bitflip_info.model_dump_json())
|
215
|
+
|
216
|
+
|
217
|
+
async def generate_research_proposal(bit: str, additional_context: str = "") -> str:
|
218
|
+
"""
|
219
|
+
Proposes an improvement idea for the Bit.
|
220
|
+
|
221
|
+
Args:
|
222
|
+
bit: The Bit to propose an improvement idea for. The bit is a technical limitation or conventional approach of some paper.
|
223
|
+
additional_context: Additional context to use when proposing the improvement idea.
|
224
|
+
|
225
|
+
Returns a JSON string with a research proposal in this format:
|
226
|
+
{
|
227
|
+
"proposal_id": ...,
|
228
|
+
"flip": "Innovative approach or solution, in at least two sentences",
|
229
|
+
"spark": "4-6 word summary",
|
230
|
+
"abstract": "An abstract that summarizes the proposal in conference format (approximately 250 words).",
|
231
|
+
"experiments": ["...", "..."],
|
232
|
+
"risks_and_limitations": "A list of potential risks and limitations of the proposal."
|
233
|
+
}
|
234
|
+
Use `json.loads` to deserialize the result if you want to get specific fields.
|
235
|
+
"""
|
236
|
+
model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
|
237
|
+
examples = ProposalDataset.get_dataset()[:]
|
238
|
+
examples = random.choices(examples, k=4)
|
239
|
+
|
240
|
+
prompt = encode_prompt(
|
241
|
+
IMPROVEMENT_PROMPT, bit=bit, examples=examples, additional_context=additional_context
|
242
|
+
)
|
243
|
+
content = await llm_acall(model_name=model_name, prompt=prompt)
|
244
|
+
result = extract_json(content)
|
245
|
+
proposal: Proposal = Proposal.model_validate(result)
|
246
|
+
proposal.proposal_id = random.randint(0, 1000000)
|
247
|
+
return str(proposal.model_dump_json())
|
248
|
+
|
249
|
+
|
250
|
+
async def score_research_proposals(proposals: List[str]) -> str:
|
251
|
+
"""
|
252
|
+
Scores a list of research proposals.
|
253
|
+
Use proposals obtained with the `generate_research_proposal` tool.
|
254
|
+
|
255
|
+
Returns a JSON string with a list of scores in this format:
|
256
|
+
[
|
257
|
+
{
|
258
|
+
"proposal_id": 0,
|
259
|
+
"spark": "...",
|
260
|
+
"strengths": ["...", "..."],
|
261
|
+
"weaknesses": ["...", "..."],
|
262
|
+
"novelty": 2,
|
263
|
+
"clarity": 2,
|
264
|
+
"significance": 2,
|
265
|
+
"feasibility": 2,
|
266
|
+
"soundness": 2,
|
267
|
+
"overall": 5
|
268
|
+
},
|
269
|
+
...
|
270
|
+
]
|
271
|
+
Use `json.loads` to deserialize the result if you want to get specific fields.
|
272
|
+
|
273
|
+
Args:
|
274
|
+
proposals: A list of JSON strings with research proposals.
|
275
|
+
"""
|
276
|
+
model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
|
277
|
+
proposals = [Proposal.model_validate_json(proposal) for proposal in proposals]
|
278
|
+
prompt = encode_prompt(SCORE_PROMPT, proposals=proposals)
|
279
|
+
content = await llm_acall(model_name=model_name, prompt=prompt)
|
280
|
+
scores = extract_json(content)
|
281
|
+
final_scores = [ProposalScores.model_validate(score) for score in scores]
|
282
|
+
return json.dumps([s.model_dump() for s in final_scores], ensure_ascii=False)
|
@@ -1,18 +1,15 @@
|
|
1
1
|
import os
|
2
|
-
from typing import List, Any, Dict
|
2
|
+
from typing import List, Any, Dict
|
3
3
|
from dotenv import load_dotenv
|
4
4
|
|
5
5
|
from pydantic import BaseModel
|
6
|
-
from openai import OpenAI
|
7
|
-
from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessage
|
8
6
|
|
7
|
+
from academia_mcp.llm import llm_acall
|
9
8
|
|
10
9
|
load_dotenv()
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
)
|
15
|
-
PROMPT = """Please answer the following questions based solely on the provided document.
|
11
|
+
PROMPT = """You are a helpful assistant that answers questions about documents accurately and concisely.
|
12
|
+
Please answer the following questions based solely on the provided document.
|
16
13
|
If there is no answer in the document, output "There is no answer in the provided document".
|
17
14
|
First cite ALL relevant document fragments, then provide a final answer.
|
18
15
|
Answer all given questions one by one.
|
@@ -40,7 +37,7 @@ class ChatMessage(BaseModel): # type: ignore
|
|
40
37
|
ChatMessages = List[ChatMessage]
|
41
38
|
|
42
39
|
|
43
|
-
def document_qa(
|
40
|
+
async def document_qa(
|
44
41
|
document: str,
|
45
42
|
question: str,
|
46
43
|
) -> str:
|
@@ -64,33 +61,7 @@ def document_qa(
|
|
64
61
|
assert question and question.strip(), "Please provide non-empty 'question'"
|
65
62
|
assert document and document.strip(), "Please provide non-empty 'document'"
|
66
63
|
|
67
|
-
base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
|
68
|
-
key = os.getenv("OPENROUTER_API_KEY", "")
|
69
|
-
assert key, "Please set OPENROUTER_API_KEY in the environment variables"
|
70
64
|
model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
ChatMessage(
|
75
|
-
role="user",
|
76
|
-
content=PROMPT.format(question=question, document=document),
|
77
|
-
),
|
78
|
-
]
|
79
|
-
|
80
|
-
sdk_messages = [
|
81
|
-
cast(ChatCompletionMessageParam, m.model_dump(exclude_none=True)) for m in messages
|
82
|
-
]
|
83
|
-
client = OpenAI(base_url=base_url, api_key=key)
|
84
|
-
response: ChatCompletionMessage = (
|
85
|
-
client.chat.completions.create(
|
86
|
-
model=model_name,
|
87
|
-
messages=sdk_messages,
|
88
|
-
temperature=0.0,
|
89
|
-
)
|
90
|
-
.choices[0]
|
91
|
-
.message
|
92
|
-
)
|
93
|
-
|
94
|
-
if response.content is None:
|
95
|
-
raise Exception("Response content is None")
|
96
|
-
return response.content.strip()
|
65
|
+
prompt = PROMPT.format(question=question, document=document)
|
66
|
+
content = await llm_acall(model_name=model_name, prompt=prompt)
|
67
|
+
return content.strip()
|
academia_mcp/utils.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
+
import re
|
2
|
+
import json
|
1
3
|
from urllib3.util.retry import Retry
|
2
4
|
from typing import Dict, Any, Optional
|
3
5
|
|
4
6
|
import requests
|
7
|
+
from jinja2 import Template
|
5
8
|
|
6
9
|
|
7
10
|
def post_with_retries(
|
@@ -61,3 +64,84 @@ def get_with_retries(
|
|
61
64
|
response = session.get(url, headers=headers, timeout=timeout, params=params)
|
62
65
|
response.raise_for_status()
|
63
66
|
return response
|
67
|
+
|
68
|
+
|
69
|
+
def clean_json_string(text: str) -> str:
|
70
|
+
try:
|
71
|
+
return json.dumps(json.loads(text))
|
72
|
+
except json.JSONDecodeError:
|
73
|
+
pass
|
74
|
+
text = text.strip()
|
75
|
+
text = re.sub(r",(\s*[}\]])", r"\1", text)
|
76
|
+
text = re.sub(r"'([^']*)':", r'"\1":', text)
|
77
|
+
text = re.sub(r":\s*'([^']*)'", r': "\1"', text)
|
78
|
+
text = re.sub(r"//.*?$", "", text, flags=re.MULTILINE)
|
79
|
+
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
|
80
|
+
|
81
|
+
prefixes_to_remove = [
|
82
|
+
"json:",
|
83
|
+
"JSON:",
|
84
|
+
"Here is the JSON:",
|
85
|
+
"Here's the JSON:",
|
86
|
+
"The JSON is:",
|
87
|
+
"Result:",
|
88
|
+
"Output:",
|
89
|
+
"Response:",
|
90
|
+
]
|
91
|
+
|
92
|
+
for prefix in prefixes_to_remove:
|
93
|
+
if text.lower().startswith(prefix.lower()):
|
94
|
+
text = text[len(prefix) :].strip()
|
95
|
+
|
96
|
+
return text
|
97
|
+
|
98
|
+
|
99
|
+
def extract_json(text: str) -> Any:
|
100
|
+
assert isinstance(text, str), "Input must be a string"
|
101
|
+
|
102
|
+
text = text.strip()
|
103
|
+
assert text, "Input must be a non-empty string"
|
104
|
+
|
105
|
+
json_blocks = re.findall(r"```json\s*(.*?)\s*```", text, re.DOTALL | re.IGNORECASE)
|
106
|
+
for block in json_blocks:
|
107
|
+
try:
|
108
|
+
return json.loads(block.strip())
|
109
|
+
except json.JSONDecodeError:
|
110
|
+
continue
|
111
|
+
|
112
|
+
code_blocks = re.findall(r"```\s*(.*?)\s*```", text, re.DOTALL)
|
113
|
+
for block in code_blocks:
|
114
|
+
block = block.strip()
|
115
|
+
if block.startswith(("{", "[")):
|
116
|
+
try:
|
117
|
+
return json.loads(block)
|
118
|
+
except json.JSONDecodeError:
|
119
|
+
continue
|
120
|
+
|
121
|
+
try:
|
122
|
+
return json.loads(clean_json_string(text))
|
123
|
+
except json.JSONDecodeError:
|
124
|
+
pass
|
125
|
+
|
126
|
+
json_patterns = [
|
127
|
+
r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}",
|
128
|
+
r"\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]",
|
129
|
+
r"\{.*\}",
|
130
|
+
r"\[.*\]",
|
131
|
+
]
|
132
|
+
|
133
|
+
for pattern in json_patterns:
|
134
|
+
matches = re.findall(pattern, text, re.DOTALL)
|
135
|
+
for match in sorted(matches, key=len, reverse=True):
|
136
|
+
try:
|
137
|
+
cleaned = clean_json_string(match.strip())
|
138
|
+
return json.loads(cleaned)
|
139
|
+
except json.JSONDecodeError:
|
140
|
+
continue
|
141
|
+
|
142
|
+
return None
|
143
|
+
|
144
|
+
|
145
|
+
def encode_prompt(template: str, **kwargs: Any) -> str:
|
146
|
+
template_obj = Template(template)
|
147
|
+
return template_obj.render(**kwargs).strip()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2.0
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -29,6 +29,9 @@ Requires-Dist: huggingface-hub>=0.32.4
|
|
29
29
|
Requires-Dist: fire>=0.7.0
|
30
30
|
Requires-Dist: pytest>=8.4.1
|
31
31
|
Requires-Dist: openai>=1.97.1
|
32
|
+
Requires-Dist: jinja2>=3.1.6
|
33
|
+
Requires-Dist: datasets>=4.0.0
|
34
|
+
Requires-Dist: pytest-asyncio>=1.1.0
|
32
35
|
Dynamic: license-file
|
33
36
|
|
34
37
|
# Academia MCP
|
@@ -1,23 +1,25 @@
|
|
1
1
|
academia_mcp/__init__.py,sha256=2Ru2I5u4cE7DrkkAsibDUEF1K6sYtqppb9VyFrRoQKI,94
|
2
2
|
academia_mcp/__main__.py,sha256=rcmsOtJd3SA82exjrcGBuxuptcoxF8AXI7jNjiVq2BY,59
|
3
3
|
academia_mcp/files.py,sha256=hI5dj4h0fX8V3DXKI_C8vs1fte2uc9gsBXC6prLV4o4,745
|
4
|
+
academia_mcp/llm.py,sha256=o84FQNSbjjVSk9DlvFXWsUDiz5IOaavYU6kOqnPEG7E,1071
|
4
5
|
academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
academia_mcp/server.py,sha256=
|
6
|
-
academia_mcp/utils.py,sha256=
|
7
|
-
academia_mcp/tools/__init__.py,sha256=
|
6
|
+
academia_mcp/server.py,sha256=q9bhacVm-8uuIMc_xSeymmVaIB8pQCqfTQx5GU8hhpM,2492
|
7
|
+
academia_mcp/utils.py,sha256=tkHBho-NfzAR8rplFaiRYq4sBmQ9V3JPjDJTDrz58Xs,4041
|
8
|
+
academia_mcp/tools/__init__.py,sha256=8_8QWPRYmPiUjdiNrQilEEnCRR-UBU7g-56jT52V3VQ,934
|
8
9
|
academia_mcp/tools/anthology_search.py,sha256=_5s8EzdV7NQD_F3bjVH4XlKKHOJlFtWlQVrPbODuc3I,7847
|
9
10
|
academia_mcp/tools/arxiv_download.py,sha256=xanzt77TZBQRngzGbKCRz4Hp-Mwfe_q-46eRW23TpVs,11219
|
10
11
|
academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
|
11
|
-
academia_mcp/tools/
|
12
|
+
academia_mcp/tools/bitflip.py,sha256=u0hSOPWbnCDu2EbA_RkueX496SvTKz9QhZcXugshSfI,10949
|
13
|
+
academia_mcp/tools/document_qa.py,sha256=Z51X_rm9p53SltKQJeN0-1CDrCefiYujap1vZZ88uU8,2132
|
12
14
|
academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQogY5-DUm3LQo,2854
|
13
15
|
academia_mcp/tools/md_to_pdf.py,sha256=Ovc_-8j7gIZNEM1d0ZDH-8qbtfZLSaNmCm5DQjrtM0k,12810
|
14
16
|
academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
17
|
academia_mcp/tools/s2_citations.py,sha256=dqrBp76RrX1zH2XzcMAoWBbvbtyhxLeF-xnqOKD_JiM,4852
|
16
18
|
academia_mcp/tools/visit_webpage.py,sha256=0zAZYeQxPDu0OjgAAvbMLZh0ttaS5q-_4WhgsEPrbsI,1542
|
17
19
|
academia_mcp/tools/web_search.py,sha256=NAkbXdD9mKxsIXhWN32dRd_EiaB3G6ENy-n-bc7HAaQ,5448
|
18
|
-
academia_mcp-1.
|
19
|
-
academia_mcp-1.
|
20
|
-
academia_mcp-1.
|
21
|
-
academia_mcp-1.
|
22
|
-
academia_mcp-1.
|
23
|
-
academia_mcp-1.
|
20
|
+
academia_mcp-1.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
21
|
+
academia_mcp-1.2.0.dist-info/METADATA,sha256=bVlYEucMW7togYS7J-nwq609Qx-MFCSx8vADvucSvvo,1899
|
22
|
+
academia_mcp-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
academia_mcp-1.2.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
|
24
|
+
academia_mcp-1.2.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
|
25
|
+
academia_mcp-1.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|