aiagents4pharma 1.30.2__py3-none-any.whl → 1.30.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/__init__.py +2 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +8 -0
- aiagents4pharma/talk2scholars/configs/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/agents/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/app/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +9 -0
- aiagents4pharma/talk2scholars/state/__init__.py +4 -2
- aiagents4pharma/talk2scholars/tests/test_s2_multi.py +10 -8
- aiagents4pharma/talk2scholars/tests/test_s2_search.py +9 -5
- aiagents4pharma/talk2scholars/tests/test_s2_single.py +7 -7
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py +25 -11
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +49 -35
- aiagents4pharma/talk2scholars/tests/test_zotero_write.py +10 -10
- aiagents4pharma/talk2scholars/tools/__init__.py +3 -0
- aiagents4pharma/talk2scholars/tools/pdf/__init__.py +4 -2
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +9 -0
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +9 -135
- aiagents4pharma/talk2scholars/tools/s2/search.py +8 -114
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +8 -126
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +194 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +175 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +186 -0
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +2 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +5 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +167 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +78 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +197 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +1 -1
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +9 -136
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +14 -48
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +22 -147
- {aiagents4pharma-1.30.2.dist-info → aiagents4pharma-1.30.3.dist-info}/METADATA +1 -1
- {aiagents4pharma-1.30.2.dist-info → aiagents4pharma-1.30.3.dist-info}/RECORD +38 -31
- {aiagents4pharma-1.30.2.dist-info → aiagents4pharma-1.30.3.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.30.2.dist-info → aiagents4pharma-1.30.3.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.30.2.dist-info → aiagents4pharma-1.30.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,167 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility for zotero read tool.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import Any, Dict, List
|
9
|
+
import hydra
|
10
|
+
from pyzotero import zotero
|
11
|
+
from .zotero_path import get_item_collections
|
12
|
+
|
13
|
+
|
14
|
+
# Configure logging
|
15
|
+
logging.basicConfig(level=logging.INFO)
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class ZoteroSearchData:
|
20
|
+
"""Helper class to organize Zotero search-related data."""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
query: str,
|
25
|
+
only_articles: bool,
|
26
|
+
limit: int,
|
27
|
+
tool_call_id: str,
|
28
|
+
):
|
29
|
+
self.query = query
|
30
|
+
self.only_articles = only_articles
|
31
|
+
self.limit = limit
|
32
|
+
self.tool_call_id = tool_call_id
|
33
|
+
self.cfg = self._load_config()
|
34
|
+
self.zot = self._init_zotero_client()
|
35
|
+
self.item_to_collections = get_item_collections(self.zot)
|
36
|
+
self.filtered_papers = {}
|
37
|
+
self.content = ""
|
38
|
+
|
39
|
+
def process_search(self) -> None:
|
40
|
+
"""Process the search request and prepare results."""
|
41
|
+
items = self._fetch_items()
|
42
|
+
self._filter_and_format_papers(items)
|
43
|
+
self._create_content()
|
44
|
+
|
45
|
+
def get_search_results(self) -> Dict[str, Any]:
|
46
|
+
"""Get the search results and content."""
|
47
|
+
return {
|
48
|
+
"filtered_papers": self.filtered_papers,
|
49
|
+
"content": self.content,
|
50
|
+
}
|
51
|
+
|
52
|
+
def _load_config(self) -> Any:
|
53
|
+
"""Load hydra configuration."""
|
54
|
+
with hydra.initialize(version_base=None, config_path="../../../configs"):
|
55
|
+
cfg = hydra.compose(
|
56
|
+
config_name="config", overrides=["tools/zotero_read=default"]
|
57
|
+
)
|
58
|
+
logger.info("Loaded configuration for Zotero search tool")
|
59
|
+
return cfg.tools.zotero_read
|
60
|
+
|
61
|
+
def _init_zotero_client(self) -> zotero.Zotero:
|
62
|
+
"""Initialize Zotero client."""
|
63
|
+
logger.info(
|
64
|
+
"Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
|
65
|
+
self.query,
|
66
|
+
self.only_articles,
|
67
|
+
self.limit,
|
68
|
+
)
|
69
|
+
return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
|
70
|
+
|
71
|
+
def _fetch_items(self) -> List[Dict[str, Any]]:
|
72
|
+
"""Fetch items from Zotero."""
|
73
|
+
try:
|
74
|
+
if self.query.strip() == "":
|
75
|
+
logger.info(
|
76
|
+
"Empty query provided, fetching all items up to max_limit: %d",
|
77
|
+
self.cfg.zotero.max_limit,
|
78
|
+
)
|
79
|
+
items = self.zot.items(limit=self.cfg.zotero.max_limit)
|
80
|
+
else:
|
81
|
+
items = self.zot.items(
|
82
|
+
q=self.query, limit=min(self.limit, self.cfg.zotero.max_limit)
|
83
|
+
)
|
84
|
+
except Exception as e:
|
85
|
+
logger.error("Failed to fetch items from Zotero: %s", e)
|
86
|
+
raise RuntimeError(
|
87
|
+
"Failed to fetch items from Zotero. Please retry the same query."
|
88
|
+
) from e
|
89
|
+
|
90
|
+
logger.info("Received %d items from Zotero", len(items))
|
91
|
+
|
92
|
+
if not items:
|
93
|
+
logger.error("No items returned from Zotero for query: '%s'", self.query)
|
94
|
+
raise RuntimeError(
|
95
|
+
"No items returned from Zotero. Please retry the same query."
|
96
|
+
)
|
97
|
+
|
98
|
+
return items
|
99
|
+
|
100
|
+
def _filter_and_format_papers(self, items: List[Dict[str, Any]]) -> None:
|
101
|
+
"""Filter and format papers from items."""
|
102
|
+
filter_item_types = (
|
103
|
+
self.cfg.zotero.filter_item_types if self.only_articles else []
|
104
|
+
)
|
105
|
+
logger.debug("Filtering item types: %s", filter_item_types)
|
106
|
+
|
107
|
+
for item in items:
|
108
|
+
if not isinstance(item, dict):
|
109
|
+
continue
|
110
|
+
|
111
|
+
data = item.get("data")
|
112
|
+
if not isinstance(data, dict):
|
113
|
+
continue
|
114
|
+
|
115
|
+
item_type = data.get("itemType", "N/A")
|
116
|
+
logger.debug("Item type: %s", item_type)
|
117
|
+
|
118
|
+
key = data.get("key")
|
119
|
+
if not key:
|
120
|
+
continue
|
121
|
+
|
122
|
+
collection_paths = self.item_to_collections.get(key, ["/Unknown"])
|
123
|
+
|
124
|
+
self.filtered_papers[key] = {
|
125
|
+
"Title": data.get("title", "N/A"),
|
126
|
+
"Abstract": data.get("abstractNote", "N/A"),
|
127
|
+
"Publication Date": data.get("date", "N/A"),
|
128
|
+
"URL": data.get("url", "N/A"),
|
129
|
+
"Type": item_type if isinstance(item_type, str) else "N/A",
|
130
|
+
"Collections": collection_paths,
|
131
|
+
"Citation Count": data.get("citationCount", "N/A"),
|
132
|
+
"Venue": data.get("venue", "N/A"),
|
133
|
+
"Publication Venue": data.get("publicationTitle", "N/A"),
|
134
|
+
"Journal Name": data.get("journalAbbreviation", "N/A"),
|
135
|
+
"Authors": [
|
136
|
+
f"{creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
|
137
|
+
for creator in data.get("creators", [])
|
138
|
+
if isinstance(creator, dict)
|
139
|
+
and creator.get("creatorType") == "author"
|
140
|
+
],
|
141
|
+
}
|
142
|
+
|
143
|
+
if not self.filtered_papers:
|
144
|
+
logger.error(
|
145
|
+
"No matching papers returned from Zotero for query: '%s'", self.query
|
146
|
+
)
|
147
|
+
raise RuntimeError(
|
148
|
+
"No matching papers returned from Zotero. Please retry the same query."
|
149
|
+
)
|
150
|
+
|
151
|
+
logger.info("Filtered %d items", len(self.filtered_papers))
|
152
|
+
|
153
|
+
def _create_content(self) -> None:
|
154
|
+
"""Create the content message for the response."""
|
155
|
+
top_papers = list(self.filtered_papers.values())[:2]
|
156
|
+
top_papers_info = "\n".join(
|
157
|
+
[
|
158
|
+
f"{i+1}. {paper['Title']} ({paper['Type']})"
|
159
|
+
for i, paper in enumerate(top_papers)
|
160
|
+
]
|
161
|
+
)
|
162
|
+
|
163
|
+
self.content = "Retrieval was successful. Papers are attached as an artifact."
|
164
|
+
self.content += " And here is a summary of the retrieval results:\n"
|
165
|
+
self.content += f"Number of papers found: {len(self.filtered_papers)}\n"
|
166
|
+
self.content += f"Query: {self.query}\n"
|
167
|
+
self.content += "Here are a few of these papers:\n" + top_papers_info
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility for reviewing papers and saving them to Zotero.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import List
|
9
|
+
|
10
|
+
# Configure logging
|
11
|
+
logging.basicConfig(level=logging.INFO)
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class ReviewData:
|
16
|
+
"""Helper class to organize review-related data."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
collection_path: str,
|
21
|
+
fetched_papers: dict,
|
22
|
+
tool_call_id: str,
|
23
|
+
state: dict,
|
24
|
+
):
|
25
|
+
self.collection_path = collection_path
|
26
|
+
self.fetched_papers = fetched_papers
|
27
|
+
self.tool_call_id = tool_call_id
|
28
|
+
self.state = state
|
29
|
+
self.total_papers = len(fetched_papers)
|
30
|
+
self.papers_summary = self._create_papers_summary()
|
31
|
+
self.papers_preview = "\n".join(self.papers_summary)
|
32
|
+
self.review_info = self._create_review_info()
|
33
|
+
|
34
|
+
def get_approval_message(self) -> str:
|
35
|
+
"""Get the formatted approval message for the review."""
|
36
|
+
return (
|
37
|
+
f"Human approved saving {self.total_papers} papers to Zotero "
|
38
|
+
f"collection '{self.collection_path}'."
|
39
|
+
)
|
40
|
+
|
41
|
+
def get_custom_path_approval_message(self, custom_path: str) -> str:
|
42
|
+
"""Get the formatted approval message for a custom collection path."""
|
43
|
+
return (
|
44
|
+
f"Human approved saving papers to custom Zotero "
|
45
|
+
f"collection '{custom_path}'."
|
46
|
+
)
|
47
|
+
|
48
|
+
def _create_papers_summary(self) -> List[str]:
|
49
|
+
"""Create a summary of papers for review."""
|
50
|
+
summary = []
|
51
|
+
for paper_id, paper in list(self.fetched_papers.items())[:5]:
|
52
|
+
logger.info("Paper ID: %s", paper_id)
|
53
|
+
title = paper.get("Title", "N/A")
|
54
|
+
authors = ", ".join(
|
55
|
+
[author.split(" (ID: ")[0] for author in paper.get("Authors", [])[:2]]
|
56
|
+
)
|
57
|
+
if len(paper.get("Authors", [])) > 2:
|
58
|
+
authors += " et al."
|
59
|
+
summary.append(f"- {title} by {authors}")
|
60
|
+
|
61
|
+
if self.total_papers > 5:
|
62
|
+
summary.append(f"... and {self.total_papers - 5} more papers")
|
63
|
+
return summary
|
64
|
+
|
65
|
+
def _create_review_info(self) -> dict:
|
66
|
+
"""Create the review information dictionary."""
|
67
|
+
return {
|
68
|
+
"action": "save_to_zotero",
|
69
|
+
"collection_path": self.collection_path,
|
70
|
+
"total_papers": self.total_papers,
|
71
|
+
"papers_preview": self.papers_preview,
|
72
|
+
"message": (
|
73
|
+
f"Would you like to save {self.total_papers} papers to Zotero "
|
74
|
+
f"collection '{self.collection_path}'? Please respond with a "
|
75
|
+
f"structured decision using one of the following options: 'approve', "
|
76
|
+
f"'reject', or 'custom' (with a custom_path)."
|
77
|
+
),
|
78
|
+
}
|
@@ -0,0 +1,197 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility for zotero write tool.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import Any, Dict
|
9
|
+
import hydra
|
10
|
+
from pyzotero import zotero
|
11
|
+
from .zotero_path import (
|
12
|
+
find_or_create_collection,
|
13
|
+
fetch_papers_for_save,
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
# Configure logging
|
18
|
+
logging.basicConfig(level=logging.INFO)
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class ZoteroWriteData:
|
23
|
+
"""Helper class to organize Zotero write-related data."""
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
tool_call_id: str,
|
28
|
+
collection_path: str,
|
29
|
+
state: dict,
|
30
|
+
):
|
31
|
+
self.tool_call_id = tool_call_id
|
32
|
+
self.collection_path = collection_path
|
33
|
+
self.state = state
|
34
|
+
self.cfg = self._load_config()
|
35
|
+
self.zot = self._init_zotero_client()
|
36
|
+
self.fetched_papers = fetch_papers_for_save(state)
|
37
|
+
self.normalized_path = collection_path.rstrip("/").lower()
|
38
|
+
self.zotero_items = []
|
39
|
+
self.content = ""
|
40
|
+
|
41
|
+
def _load_config(self) -> Any:
|
42
|
+
"""Load hydra configuration."""
|
43
|
+
with hydra.initialize(version_base=None, config_path="../../../configs"):
|
44
|
+
cfg = hydra.compose(
|
45
|
+
config_name="config", overrides=["tools/zotero_write=default"]
|
46
|
+
)
|
47
|
+
logger.info("Loaded configuration for Zotero write tool")
|
48
|
+
return cfg.tools.zotero_write
|
49
|
+
|
50
|
+
def _init_zotero_client(self) -> zotero.Zotero:
|
51
|
+
"""Initialize Zotero client."""
|
52
|
+
logger.info(
|
53
|
+
"Saving fetched papers to Zotero under collection path: %s",
|
54
|
+
self.collection_path,
|
55
|
+
)
|
56
|
+
return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
|
57
|
+
|
58
|
+
def _validate_papers(self) -> None:
|
59
|
+
"""Validate that papers exist to save."""
|
60
|
+
if not self.fetched_papers:
|
61
|
+
raise ValueError(
|
62
|
+
"No fetched papers were found to save. "
|
63
|
+
"Please retrieve papers using Zotero Read or Semantic Scholar first."
|
64
|
+
)
|
65
|
+
|
66
|
+
def _find_collection(self) -> str:
|
67
|
+
"""Find or create the target collection."""
|
68
|
+
matched_collection_key = find_or_create_collection(
|
69
|
+
self.zot, self.normalized_path, create_missing=False
|
70
|
+
)
|
71
|
+
|
72
|
+
if not matched_collection_key:
|
73
|
+
available_collections = self.zot.collections()
|
74
|
+
collection_names = [col["data"]["name"] for col in available_collections]
|
75
|
+
names_display = ", ".join(collection_names)
|
76
|
+
|
77
|
+
raise ValueError(
|
78
|
+
f"Error: The collection path '{self.collection_path}' does "
|
79
|
+
f"not exist in Zotero. "
|
80
|
+
f"Available collections are: {names_display}. "
|
81
|
+
f"Please try saving to one of these existing collections."
|
82
|
+
)
|
83
|
+
|
84
|
+
return matched_collection_key
|
85
|
+
|
86
|
+
def _format_papers_for_zotero(self, matched_collection_key: str) -> None:
|
87
|
+
"""Format papers for Zotero and assign to the specified collection."""
|
88
|
+
for paper_id, paper in self.fetched_papers.items():
|
89
|
+
title = paper.get("Title", "N/A")
|
90
|
+
abstract = paper.get("Abstract", "N/A")
|
91
|
+
publication_date = paper.get("Publication Date", "N/A")
|
92
|
+
url = paper.get("URL", "N/A")
|
93
|
+
citations = paper.get("Citation Count", "N/A")
|
94
|
+
venue = paper.get("Venue", "N/A")
|
95
|
+
publication_venue = paper.get("Publication Venue", "N/A")
|
96
|
+
journal_name = paper.get("Journal Name", "N/A")
|
97
|
+
journal_volume = paper.get("Journal Volume", "N/A")
|
98
|
+
journal_pages = paper.get("Journal Pages", "N/A")
|
99
|
+
|
100
|
+
authors = [
|
101
|
+
(
|
102
|
+
{
|
103
|
+
"creatorType": "author",
|
104
|
+
"firstName": name.split(" ")[0],
|
105
|
+
"lastName": " ".join(name.split(" ")[1:]),
|
106
|
+
}
|
107
|
+
if " " in name
|
108
|
+
else {"creatorType": "author", "lastName": name}
|
109
|
+
)
|
110
|
+
for name in [
|
111
|
+
author.split(" (ID: ")[0] for author in paper.get("Authors", [])
|
112
|
+
]
|
113
|
+
]
|
114
|
+
|
115
|
+
self.zotero_items.append(
|
116
|
+
{
|
117
|
+
"itemType": "journalArticle",
|
118
|
+
"title": title,
|
119
|
+
"abstractNote": abstract,
|
120
|
+
"date": publication_date,
|
121
|
+
"url": url,
|
122
|
+
"extra": f"Paper ID: {paper_id}\nCitations: {citations}",
|
123
|
+
"collections": [matched_collection_key],
|
124
|
+
"publicationTitle": (
|
125
|
+
publication_venue if publication_venue != "N/A" else venue
|
126
|
+
),
|
127
|
+
"journalAbbreviation": journal_name,
|
128
|
+
"volume": journal_volume if journal_volume != "N/A" else None,
|
129
|
+
"pages": journal_pages if journal_pages != "N/A" else None,
|
130
|
+
"creators": authors,
|
131
|
+
}
|
132
|
+
)
|
133
|
+
|
134
|
+
def _save_to_zotero(self) -> None:
|
135
|
+
"""Save items to Zotero."""
|
136
|
+
try:
|
137
|
+
response = self.zot.create_items(self.zotero_items)
|
138
|
+
logger.info("Papers successfully saved to Zotero: %s", response)
|
139
|
+
except Exception as e:
|
140
|
+
logger.error("Error saving to Zotero: %s", str(e))
|
141
|
+
raise RuntimeError(f"Error saving papers to Zotero: {str(e)}") from e
|
142
|
+
|
143
|
+
def _create_content(self, collection_name: str) -> None:
|
144
|
+
"""Create the content message for the response."""
|
145
|
+
self.content = (
|
146
|
+
f"Save was successful. Papers have been saved to Zotero collection "
|
147
|
+
f"'{collection_name}' with the requested path '{self.get_collection_path()}'.\n"
|
148
|
+
)
|
149
|
+
self.content += "Summary of saved papers:\n"
|
150
|
+
self.content += f"Number of articles saved: {self.get_paper_count()}\n"
|
151
|
+
self.content += f"Query: {self.state.get('query', 'N/A')}\n"
|
152
|
+
top_papers = list(self.fetched_papers.values())[:2]
|
153
|
+
top_papers_info = "\n".join(
|
154
|
+
[
|
155
|
+
f"{i+1}. {paper.get('Title', 'N/A')} ({paper.get('URL', 'N/A')})"
|
156
|
+
for i, paper in enumerate(top_papers)
|
157
|
+
]
|
158
|
+
)
|
159
|
+
self.content += "Here are a few of these articles:\n" + top_papers_info
|
160
|
+
|
161
|
+
def process_write(self) -> Dict[str, Any]:
|
162
|
+
"""Process the write operation and return results."""
|
163
|
+
self._validate_papers()
|
164
|
+
matched_collection_key = self._find_collection()
|
165
|
+
self._format_papers_for_zotero(matched_collection_key)
|
166
|
+
self._save_to_zotero()
|
167
|
+
|
168
|
+
# Get collection name for feedback
|
169
|
+
collections = self.zot.collections()
|
170
|
+
collection_name = ""
|
171
|
+
for col in collections:
|
172
|
+
if col["key"] == matched_collection_key:
|
173
|
+
collection_name = col["data"]["name"]
|
174
|
+
break
|
175
|
+
|
176
|
+
self._create_content(collection_name)
|
177
|
+
|
178
|
+
return {
|
179
|
+
"content": self.content,
|
180
|
+
"fetched_papers": self.fetched_papers,
|
181
|
+
}
|
182
|
+
|
183
|
+
def get_paper_count(self) -> int:
|
184
|
+
"""Get the number of papers to be saved.
|
185
|
+
|
186
|
+
Returns:
|
187
|
+
int: The number of papers in the fetched papers dictionary.
|
188
|
+
"""
|
189
|
+
return len(self.fetched_papers)
|
190
|
+
|
191
|
+
def get_collection_path(self) -> str:
|
192
|
+
"""Get the normalized collection path.
|
193
|
+
|
194
|
+
Returns:
|
195
|
+
str: The normalized collection path where papers will be saved.
|
196
|
+
"""
|
197
|
+
return self.collection_path
|
@@ -6,16 +6,13 @@ This tool is used to search for papers in Zotero library.
|
|
6
6
|
|
7
7
|
import logging
|
8
8
|
from typing import Annotated, Any
|
9
|
-
import hydra
|
10
|
-
from pyzotero import zotero
|
11
9
|
from langchain_core.messages import ToolMessage
|
12
10
|
from langchain_core.tools import tool
|
13
11
|
from langchain_core.tools.base import InjectedToolCallId
|
14
12
|
from langgraph.types import Command
|
15
13
|
from pydantic import BaseModel, Field
|
16
|
-
from .utils.
|
14
|
+
from .utils.read_helper import ZoteroSearchData
|
17
15
|
|
18
|
-
# pylint: disable=R0914,R0912,R0915
|
19
16
|
|
20
17
|
# Configure logging
|
21
18
|
logging.basicConfig(level=logging.INFO)
|
@@ -56,146 +53,22 @@ def zotero_read(
|
|
56
53
|
Returns:
|
57
54
|
Dict[str, Any]: The search results and related information.
|
58
55
|
"""
|
59
|
-
#
|
60
|
-
|
61
|
-
cfg = hydra.compose(
|
62
|
-
config_name="config", overrides=["tools/zotero_read=default"]
|
63
|
-
)
|
64
|
-
logger.info("Loaded configuration for Zotero search tool")
|
65
|
-
cfg = cfg.tools.zotero_read
|
66
|
-
logger.info(
|
67
|
-
"Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
|
68
|
-
query,
|
69
|
-
only_articles,
|
70
|
-
limit,
|
71
|
-
)
|
56
|
+
# Create search data object to organize variables
|
57
|
+
search_data = ZoteroSearchData(query, only_articles, limit, tool_call_id)
|
72
58
|
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
# Fetch collection mapping once
|
77
|
-
item_to_collections = get_item_collections(zot)
|
78
|
-
|
79
|
-
# If the query is empty, fetch all items (up to max_limit), otherwise use the query
|
80
|
-
try:
|
81
|
-
if query.strip() == "":
|
82
|
-
logger.info(
|
83
|
-
"Empty query provided, fetching all items up to max_limit: %d",
|
84
|
-
cfg.zotero.max_limit,
|
85
|
-
)
|
86
|
-
items = zot.items(limit=cfg.zotero.max_limit)
|
87
|
-
else:
|
88
|
-
items = zot.items(q=query, limit=min(limit, cfg.zotero.max_limit))
|
89
|
-
except Exception as e:
|
90
|
-
logger.error("Failed to fetch items from Zotero: %s", e)
|
91
|
-
raise RuntimeError(
|
92
|
-
"Failed to fetch items from Zotero. Please retry the same query."
|
93
|
-
) from e
|
94
|
-
|
95
|
-
logger.info("Received %d items from Zotero", len(items))
|
96
|
-
|
97
|
-
if not items:
|
98
|
-
logger.error("No items returned from Zotero for query: '%s'", query)
|
99
|
-
raise RuntimeError(
|
100
|
-
"No items returned from Zotero. Please retry the same query."
|
101
|
-
)
|
102
|
-
|
103
|
-
# Define filter criteria
|
104
|
-
filter_item_types = cfg.zotero.filter_item_types if only_articles else []
|
105
|
-
logger.debug("Filtering item types: %s", filter_item_types)
|
106
|
-
# filter_excluded_types = (
|
107
|
-
# cfg.zotero.filter_excluded_types
|
108
|
-
# ) # Exclude non-research items
|
109
|
-
|
110
|
-
# Filter and format papers
|
111
|
-
filtered_papers = {}
|
112
|
-
|
113
|
-
for item in items:
|
114
|
-
if not isinstance(item, dict):
|
115
|
-
continue
|
116
|
-
|
117
|
-
data = item.get("data")
|
118
|
-
if not isinstance(data, dict):
|
119
|
-
continue
|
120
|
-
|
121
|
-
item_type = data.get("itemType", "N/A")
|
122
|
-
logger.debug("Item type: %s", item_type)
|
123
|
-
|
124
|
-
# Exclude attachments, notes, and other unwanted types
|
125
|
-
# if (
|
126
|
-
# not item_type
|
127
|
-
# or not isinstance(item_type, str)
|
128
|
-
# or item_type in filter_excluded_types # Skip attachments & notes
|
129
|
-
# or (
|
130
|
-
# only_articles and item_type not in filter_item_types
|
131
|
-
# ) # Skip non-research types
|
132
|
-
# ):
|
133
|
-
# continue
|
134
|
-
|
135
|
-
key = data.get("key")
|
136
|
-
if not key:
|
137
|
-
continue
|
138
|
-
|
139
|
-
# Use the imported utility function's mapping to get collection paths
|
140
|
-
collection_paths = item_to_collections.get(key, ["/Unknown"])
|
141
|
-
|
142
|
-
# Extract metadata safely
|
143
|
-
filtered_papers[key] = {
|
144
|
-
"Title": data.get("title", "N/A"),
|
145
|
-
"Abstract": data.get("abstractNote", "N/A"),
|
146
|
-
"Publication Date": data.get(
|
147
|
-
"date", "N/A"
|
148
|
-
), # Correct field for publication date
|
149
|
-
"URL": data.get("url", "N/A"),
|
150
|
-
"Type": item_type if isinstance(item_type, str) else "N/A",
|
151
|
-
"Collections": collection_paths, # Displays full collection paths
|
152
|
-
"Citation Count": data.get("citationCount", "N/A"), # Shows citations
|
153
|
-
"Venue": data.get("venue", "N/A"), # Displays venue
|
154
|
-
"Publication Venue": data.get(
|
155
|
-
"publicationTitle", "N/A"
|
156
|
-
), # Matches with Zotero Write
|
157
|
-
"Journal Name": data.get("journalAbbreviation", "N/A"), # Journal Name
|
158
|
-
# "Journal Volume": data.get("volume", "N/A"), # Journal Volume
|
159
|
-
# "Journal Pages": data.get("pages", "N/A"), # Journal Pages
|
160
|
-
"Authors": [
|
161
|
-
f"{creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
|
162
|
-
for creator in data.get("creators", []) # Prevents NoneType error
|
163
|
-
if isinstance(creator, dict) and creator.get("creatorType") == "author"
|
164
|
-
],
|
165
|
-
}
|
166
|
-
|
167
|
-
if not filtered_papers:
|
168
|
-
logger.error("No matching papers returned from Zotero for query: '%s'", query)
|
169
|
-
raise RuntimeError(
|
170
|
-
"No matching papers returned from Zotero. Please retry the same query."
|
171
|
-
)
|
172
|
-
|
173
|
-
logger.info("Filtered %d items", len(filtered_papers))
|
174
|
-
|
175
|
-
# Prepare content with top 2 paper titles and types
|
176
|
-
top_papers = list(filtered_papers.values())[:2]
|
177
|
-
top_papers_info = "\n".join(
|
178
|
-
[
|
179
|
-
f"{i+1}. {paper['Title']} ({paper['Type']})"
|
180
|
-
for i, paper in enumerate(top_papers)
|
181
|
-
]
|
182
|
-
)
|
183
|
-
|
184
|
-
content = "Retrieval was successful. Papers are attached as an artifact."
|
185
|
-
content += " And here is a summary of the retrieval results:\n"
|
186
|
-
content += f"Number of papers found: {len(filtered_papers)}\n"
|
187
|
-
content += f"Query: {query}\n"
|
188
|
-
content += "Here are a few of these papers:\n" + top_papers_info
|
59
|
+
# Process the search
|
60
|
+
search_data.process_search()
|
61
|
+
results = search_data.get_search_results()
|
189
62
|
|
190
63
|
return Command(
|
191
64
|
update={
|
192
|
-
"zotero_read": filtered_papers,
|
65
|
+
"zotero_read": results["filtered_papers"],
|
193
66
|
"last_displayed_papers": "zotero_read",
|
194
67
|
"messages": [
|
195
68
|
ToolMessage(
|
196
|
-
content=content,
|
69
|
+
content=results["content"],
|
197
70
|
tool_call_id=tool_call_id,
|
198
|
-
artifact=filtered_papers,
|
71
|
+
artifact=results["filtered_papers"],
|
199
72
|
)
|
200
73
|
],
|
201
74
|
}
|