aiagents4pharma 1.30.1__py3-none-any.whl → 1.30.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/__init__.py +2 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +8 -0
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +9 -7
- aiagents4pharma/talk2scholars/configs/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/agents/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +9 -15
- aiagents4pharma/talk2scholars/configs/app/__init__.py +2 -0
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +9 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
- aiagents4pharma/talk2scholars/state/__init__.py +4 -2
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +3 -0
- aiagents4pharma/talk2scholars/tests/test_routing_logic.py +1 -2
- aiagents4pharma/talk2scholars/tests/test_s2_multi.py +10 -8
- aiagents4pharma/talk2scholars/tests/test_s2_search.py +9 -5
- aiagents4pharma/talk2scholars/tests/test_s2_single.py +7 -7
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +3 -2
- aiagents4pharma/talk2scholars/tests/test_zotero_human_in_the_loop.py +273 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py +433 -1
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +57 -43
- aiagents4pharma/talk2scholars/tests/test_zotero_write.py +123 -588
- aiagents4pharma/talk2scholars/tools/__init__.py +3 -0
- aiagents4pharma/talk2scholars/tools/pdf/__init__.py +4 -2
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +9 -0
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +9 -135
- aiagents4pharma/talk2scholars/tools/s2/search.py +8 -114
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +8 -126
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +194 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +175 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +186 -0
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +3 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +5 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +167 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +78 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +197 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +126 -1
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +10 -139
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +164 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +40 -229
- {aiagents4pharma-1.30.1.dist-info → aiagents4pharma-1.30.3.dist-info}/METADATA +3 -2
- {aiagents4pharma-1.30.1.dist-info → aiagents4pharma-1.30.3.dist-info}/RECORD +45 -35
- {aiagents4pharma-1.30.1.dist-info → aiagents4pharma-1.30.3.dist-info}/WHEEL +1 -1
- {aiagents4pharma-1.30.1.dist-info → aiagents4pharma-1.30.3.dist-info/licenses}/LICENSE +0 -0
- {aiagents4pharma-1.30.1.dist-info → aiagents4pharma-1.30.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,167 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility for zotero read tool.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import Any, Dict, List
|
9
|
+
import hydra
|
10
|
+
from pyzotero import zotero
|
11
|
+
from .zotero_path import get_item_collections
|
12
|
+
|
13
|
+
|
14
|
+
# Configure logging
|
15
|
+
logging.basicConfig(level=logging.INFO)
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class ZoteroSearchData:
|
20
|
+
"""Helper class to organize Zotero search-related data."""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
query: str,
|
25
|
+
only_articles: bool,
|
26
|
+
limit: int,
|
27
|
+
tool_call_id: str,
|
28
|
+
):
|
29
|
+
self.query = query
|
30
|
+
self.only_articles = only_articles
|
31
|
+
self.limit = limit
|
32
|
+
self.tool_call_id = tool_call_id
|
33
|
+
self.cfg = self._load_config()
|
34
|
+
self.zot = self._init_zotero_client()
|
35
|
+
self.item_to_collections = get_item_collections(self.zot)
|
36
|
+
self.filtered_papers = {}
|
37
|
+
self.content = ""
|
38
|
+
|
39
|
+
def process_search(self) -> None:
|
40
|
+
"""Process the search request and prepare results."""
|
41
|
+
items = self._fetch_items()
|
42
|
+
self._filter_and_format_papers(items)
|
43
|
+
self._create_content()
|
44
|
+
|
45
|
+
def get_search_results(self) -> Dict[str, Any]:
|
46
|
+
"""Get the search results and content."""
|
47
|
+
return {
|
48
|
+
"filtered_papers": self.filtered_papers,
|
49
|
+
"content": self.content,
|
50
|
+
}
|
51
|
+
|
52
|
+
def _load_config(self) -> Any:
|
53
|
+
"""Load hydra configuration."""
|
54
|
+
with hydra.initialize(version_base=None, config_path="../../../configs"):
|
55
|
+
cfg = hydra.compose(
|
56
|
+
config_name="config", overrides=["tools/zotero_read=default"]
|
57
|
+
)
|
58
|
+
logger.info("Loaded configuration for Zotero search tool")
|
59
|
+
return cfg.tools.zotero_read
|
60
|
+
|
61
|
+
def _init_zotero_client(self) -> zotero.Zotero:
|
62
|
+
"""Initialize Zotero client."""
|
63
|
+
logger.info(
|
64
|
+
"Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
|
65
|
+
self.query,
|
66
|
+
self.only_articles,
|
67
|
+
self.limit,
|
68
|
+
)
|
69
|
+
return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
|
70
|
+
|
71
|
+
def _fetch_items(self) -> List[Dict[str, Any]]:
|
72
|
+
"""Fetch items from Zotero."""
|
73
|
+
try:
|
74
|
+
if self.query.strip() == "":
|
75
|
+
logger.info(
|
76
|
+
"Empty query provided, fetching all items up to max_limit: %d",
|
77
|
+
self.cfg.zotero.max_limit,
|
78
|
+
)
|
79
|
+
items = self.zot.items(limit=self.cfg.zotero.max_limit)
|
80
|
+
else:
|
81
|
+
items = self.zot.items(
|
82
|
+
q=self.query, limit=min(self.limit, self.cfg.zotero.max_limit)
|
83
|
+
)
|
84
|
+
except Exception as e:
|
85
|
+
logger.error("Failed to fetch items from Zotero: %s", e)
|
86
|
+
raise RuntimeError(
|
87
|
+
"Failed to fetch items from Zotero. Please retry the same query."
|
88
|
+
) from e
|
89
|
+
|
90
|
+
logger.info("Received %d items from Zotero", len(items))
|
91
|
+
|
92
|
+
if not items:
|
93
|
+
logger.error("No items returned from Zotero for query: '%s'", self.query)
|
94
|
+
raise RuntimeError(
|
95
|
+
"No items returned from Zotero. Please retry the same query."
|
96
|
+
)
|
97
|
+
|
98
|
+
return items
|
99
|
+
|
100
|
+
def _filter_and_format_papers(self, items: List[Dict[str, Any]]) -> None:
|
101
|
+
"""Filter and format papers from items."""
|
102
|
+
filter_item_types = (
|
103
|
+
self.cfg.zotero.filter_item_types if self.only_articles else []
|
104
|
+
)
|
105
|
+
logger.debug("Filtering item types: %s", filter_item_types)
|
106
|
+
|
107
|
+
for item in items:
|
108
|
+
if not isinstance(item, dict):
|
109
|
+
continue
|
110
|
+
|
111
|
+
data = item.get("data")
|
112
|
+
if not isinstance(data, dict):
|
113
|
+
continue
|
114
|
+
|
115
|
+
item_type = data.get("itemType", "N/A")
|
116
|
+
logger.debug("Item type: %s", item_type)
|
117
|
+
|
118
|
+
key = data.get("key")
|
119
|
+
if not key:
|
120
|
+
continue
|
121
|
+
|
122
|
+
collection_paths = self.item_to_collections.get(key, ["/Unknown"])
|
123
|
+
|
124
|
+
self.filtered_papers[key] = {
|
125
|
+
"Title": data.get("title", "N/A"),
|
126
|
+
"Abstract": data.get("abstractNote", "N/A"),
|
127
|
+
"Publication Date": data.get("date", "N/A"),
|
128
|
+
"URL": data.get("url", "N/A"),
|
129
|
+
"Type": item_type if isinstance(item_type, str) else "N/A",
|
130
|
+
"Collections": collection_paths,
|
131
|
+
"Citation Count": data.get("citationCount", "N/A"),
|
132
|
+
"Venue": data.get("venue", "N/A"),
|
133
|
+
"Publication Venue": data.get("publicationTitle", "N/A"),
|
134
|
+
"Journal Name": data.get("journalAbbreviation", "N/A"),
|
135
|
+
"Authors": [
|
136
|
+
f"{creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
|
137
|
+
for creator in data.get("creators", [])
|
138
|
+
if isinstance(creator, dict)
|
139
|
+
and creator.get("creatorType") == "author"
|
140
|
+
],
|
141
|
+
}
|
142
|
+
|
143
|
+
if not self.filtered_papers:
|
144
|
+
logger.error(
|
145
|
+
"No matching papers returned from Zotero for query: '%s'", self.query
|
146
|
+
)
|
147
|
+
raise RuntimeError(
|
148
|
+
"No matching papers returned from Zotero. Please retry the same query."
|
149
|
+
)
|
150
|
+
|
151
|
+
logger.info("Filtered %d items", len(self.filtered_papers))
|
152
|
+
|
153
|
+
def _create_content(self) -> None:
|
154
|
+
"""Create the content message for the response."""
|
155
|
+
top_papers = list(self.filtered_papers.values())[:2]
|
156
|
+
top_papers_info = "\n".join(
|
157
|
+
[
|
158
|
+
f"{i+1}. {paper['Title']} ({paper['Type']})"
|
159
|
+
for i, paper in enumerate(top_papers)
|
160
|
+
]
|
161
|
+
)
|
162
|
+
|
163
|
+
self.content = "Retrieval was successful. Papers are attached as an artifact."
|
164
|
+
self.content += " And here is a summary of the retrieval results:\n"
|
165
|
+
self.content += f"Number of papers found: {len(self.filtered_papers)}\n"
|
166
|
+
self.content += f"Query: {self.query}\n"
|
167
|
+
self.content += "Here are a few of these papers:\n" + top_papers_info
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility for reviewing papers and saving them to Zotero.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import List
|
9
|
+
|
10
|
+
# Configure logging
|
11
|
+
logging.basicConfig(level=logging.INFO)
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class ReviewData:
|
16
|
+
"""Helper class to organize review-related data."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
collection_path: str,
|
21
|
+
fetched_papers: dict,
|
22
|
+
tool_call_id: str,
|
23
|
+
state: dict,
|
24
|
+
):
|
25
|
+
self.collection_path = collection_path
|
26
|
+
self.fetched_papers = fetched_papers
|
27
|
+
self.tool_call_id = tool_call_id
|
28
|
+
self.state = state
|
29
|
+
self.total_papers = len(fetched_papers)
|
30
|
+
self.papers_summary = self._create_papers_summary()
|
31
|
+
self.papers_preview = "\n".join(self.papers_summary)
|
32
|
+
self.review_info = self._create_review_info()
|
33
|
+
|
34
|
+
def get_approval_message(self) -> str:
|
35
|
+
"""Get the formatted approval message for the review."""
|
36
|
+
return (
|
37
|
+
f"Human approved saving {self.total_papers} papers to Zotero "
|
38
|
+
f"collection '{self.collection_path}'."
|
39
|
+
)
|
40
|
+
|
41
|
+
def get_custom_path_approval_message(self, custom_path: str) -> str:
|
42
|
+
"""Get the formatted approval message for a custom collection path."""
|
43
|
+
return (
|
44
|
+
f"Human approved saving papers to custom Zotero "
|
45
|
+
f"collection '{custom_path}'."
|
46
|
+
)
|
47
|
+
|
48
|
+
def _create_papers_summary(self) -> List[str]:
|
49
|
+
"""Create a summary of papers for review."""
|
50
|
+
summary = []
|
51
|
+
for paper_id, paper in list(self.fetched_papers.items())[:5]:
|
52
|
+
logger.info("Paper ID: %s", paper_id)
|
53
|
+
title = paper.get("Title", "N/A")
|
54
|
+
authors = ", ".join(
|
55
|
+
[author.split(" (ID: ")[0] for author in paper.get("Authors", [])[:2]]
|
56
|
+
)
|
57
|
+
if len(paper.get("Authors", [])) > 2:
|
58
|
+
authors += " et al."
|
59
|
+
summary.append(f"- {title} by {authors}")
|
60
|
+
|
61
|
+
if self.total_papers > 5:
|
62
|
+
summary.append(f"... and {self.total_papers - 5} more papers")
|
63
|
+
return summary
|
64
|
+
|
65
|
+
def _create_review_info(self) -> dict:
|
66
|
+
"""Create the review information dictionary."""
|
67
|
+
return {
|
68
|
+
"action": "save_to_zotero",
|
69
|
+
"collection_path": self.collection_path,
|
70
|
+
"total_papers": self.total_papers,
|
71
|
+
"papers_preview": self.papers_preview,
|
72
|
+
"message": (
|
73
|
+
f"Would you like to save {self.total_papers} papers to Zotero "
|
74
|
+
f"collection '{self.collection_path}'? Please respond with a "
|
75
|
+
f"structured decision using one of the following options: 'approve', "
|
76
|
+
f"'reject', or 'custom' (with a custom_path)."
|
77
|
+
),
|
78
|
+
}
|
@@ -0,0 +1,197 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility for zotero write tool.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import Any, Dict
|
9
|
+
import hydra
|
10
|
+
from pyzotero import zotero
|
11
|
+
from .zotero_path import (
|
12
|
+
find_or_create_collection,
|
13
|
+
fetch_papers_for_save,
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
# Configure logging
|
18
|
+
logging.basicConfig(level=logging.INFO)
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class ZoteroWriteData:
|
23
|
+
"""Helper class to organize Zotero write-related data."""
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
tool_call_id: str,
|
28
|
+
collection_path: str,
|
29
|
+
state: dict,
|
30
|
+
):
|
31
|
+
self.tool_call_id = tool_call_id
|
32
|
+
self.collection_path = collection_path
|
33
|
+
self.state = state
|
34
|
+
self.cfg = self._load_config()
|
35
|
+
self.zot = self._init_zotero_client()
|
36
|
+
self.fetched_papers = fetch_papers_for_save(state)
|
37
|
+
self.normalized_path = collection_path.rstrip("/").lower()
|
38
|
+
self.zotero_items = []
|
39
|
+
self.content = ""
|
40
|
+
|
41
|
+
def _load_config(self) -> Any:
|
42
|
+
"""Load hydra configuration."""
|
43
|
+
with hydra.initialize(version_base=None, config_path="../../../configs"):
|
44
|
+
cfg = hydra.compose(
|
45
|
+
config_name="config", overrides=["tools/zotero_write=default"]
|
46
|
+
)
|
47
|
+
logger.info("Loaded configuration for Zotero write tool")
|
48
|
+
return cfg.tools.zotero_write
|
49
|
+
|
50
|
+
def _init_zotero_client(self) -> zotero.Zotero:
|
51
|
+
"""Initialize Zotero client."""
|
52
|
+
logger.info(
|
53
|
+
"Saving fetched papers to Zotero under collection path: %s",
|
54
|
+
self.collection_path,
|
55
|
+
)
|
56
|
+
return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
|
57
|
+
|
58
|
+
def _validate_papers(self) -> None:
|
59
|
+
"""Validate that papers exist to save."""
|
60
|
+
if not self.fetched_papers:
|
61
|
+
raise ValueError(
|
62
|
+
"No fetched papers were found to save. "
|
63
|
+
"Please retrieve papers using Zotero Read or Semantic Scholar first."
|
64
|
+
)
|
65
|
+
|
66
|
+
def _find_collection(self) -> str:
|
67
|
+
"""Find or create the target collection."""
|
68
|
+
matched_collection_key = find_or_create_collection(
|
69
|
+
self.zot, self.normalized_path, create_missing=False
|
70
|
+
)
|
71
|
+
|
72
|
+
if not matched_collection_key:
|
73
|
+
available_collections = self.zot.collections()
|
74
|
+
collection_names = [col["data"]["name"] for col in available_collections]
|
75
|
+
names_display = ", ".join(collection_names)
|
76
|
+
|
77
|
+
raise ValueError(
|
78
|
+
f"Error: The collection path '{self.collection_path}' does "
|
79
|
+
f"not exist in Zotero. "
|
80
|
+
f"Available collections are: {names_display}. "
|
81
|
+
f"Please try saving to one of these existing collections."
|
82
|
+
)
|
83
|
+
|
84
|
+
return matched_collection_key
|
85
|
+
|
86
|
+
def _format_papers_for_zotero(self, matched_collection_key: str) -> None:
|
87
|
+
"""Format papers for Zotero and assign to the specified collection."""
|
88
|
+
for paper_id, paper in self.fetched_papers.items():
|
89
|
+
title = paper.get("Title", "N/A")
|
90
|
+
abstract = paper.get("Abstract", "N/A")
|
91
|
+
publication_date = paper.get("Publication Date", "N/A")
|
92
|
+
url = paper.get("URL", "N/A")
|
93
|
+
citations = paper.get("Citation Count", "N/A")
|
94
|
+
venue = paper.get("Venue", "N/A")
|
95
|
+
publication_venue = paper.get("Publication Venue", "N/A")
|
96
|
+
journal_name = paper.get("Journal Name", "N/A")
|
97
|
+
journal_volume = paper.get("Journal Volume", "N/A")
|
98
|
+
journal_pages = paper.get("Journal Pages", "N/A")
|
99
|
+
|
100
|
+
authors = [
|
101
|
+
(
|
102
|
+
{
|
103
|
+
"creatorType": "author",
|
104
|
+
"firstName": name.split(" ")[0],
|
105
|
+
"lastName": " ".join(name.split(" ")[1:]),
|
106
|
+
}
|
107
|
+
if " " in name
|
108
|
+
else {"creatorType": "author", "lastName": name}
|
109
|
+
)
|
110
|
+
for name in [
|
111
|
+
author.split(" (ID: ")[0] for author in paper.get("Authors", [])
|
112
|
+
]
|
113
|
+
]
|
114
|
+
|
115
|
+
self.zotero_items.append(
|
116
|
+
{
|
117
|
+
"itemType": "journalArticle",
|
118
|
+
"title": title,
|
119
|
+
"abstractNote": abstract,
|
120
|
+
"date": publication_date,
|
121
|
+
"url": url,
|
122
|
+
"extra": f"Paper ID: {paper_id}\nCitations: {citations}",
|
123
|
+
"collections": [matched_collection_key],
|
124
|
+
"publicationTitle": (
|
125
|
+
publication_venue if publication_venue != "N/A" else venue
|
126
|
+
),
|
127
|
+
"journalAbbreviation": journal_name,
|
128
|
+
"volume": journal_volume if journal_volume != "N/A" else None,
|
129
|
+
"pages": journal_pages if journal_pages != "N/A" else None,
|
130
|
+
"creators": authors,
|
131
|
+
}
|
132
|
+
)
|
133
|
+
|
134
|
+
def _save_to_zotero(self) -> None:
|
135
|
+
"""Save items to Zotero."""
|
136
|
+
try:
|
137
|
+
response = self.zot.create_items(self.zotero_items)
|
138
|
+
logger.info("Papers successfully saved to Zotero: %s", response)
|
139
|
+
except Exception as e:
|
140
|
+
logger.error("Error saving to Zotero: %s", str(e))
|
141
|
+
raise RuntimeError(f"Error saving papers to Zotero: {str(e)}") from e
|
142
|
+
|
143
|
+
def _create_content(self, collection_name: str) -> None:
|
144
|
+
"""Create the content message for the response."""
|
145
|
+
self.content = (
|
146
|
+
f"Save was successful. Papers have been saved to Zotero collection "
|
147
|
+
f"'{collection_name}' with the requested path '{self.get_collection_path()}'.\n"
|
148
|
+
)
|
149
|
+
self.content += "Summary of saved papers:\n"
|
150
|
+
self.content += f"Number of articles saved: {self.get_paper_count()}\n"
|
151
|
+
self.content += f"Query: {self.state.get('query', 'N/A')}\n"
|
152
|
+
top_papers = list(self.fetched_papers.values())[:2]
|
153
|
+
top_papers_info = "\n".join(
|
154
|
+
[
|
155
|
+
f"{i+1}. {paper.get('Title', 'N/A')} ({paper.get('URL', 'N/A')})"
|
156
|
+
for i, paper in enumerate(top_papers)
|
157
|
+
]
|
158
|
+
)
|
159
|
+
self.content += "Here are a few of these articles:\n" + top_papers_info
|
160
|
+
|
161
|
+
def process_write(self) -> Dict[str, Any]:
|
162
|
+
"""Process the write operation and return results."""
|
163
|
+
self._validate_papers()
|
164
|
+
matched_collection_key = self._find_collection()
|
165
|
+
self._format_papers_for_zotero(matched_collection_key)
|
166
|
+
self._save_to_zotero()
|
167
|
+
|
168
|
+
# Get collection name for feedback
|
169
|
+
collections = self.zot.collections()
|
170
|
+
collection_name = ""
|
171
|
+
for col in collections:
|
172
|
+
if col["key"] == matched_collection_key:
|
173
|
+
collection_name = col["data"]["name"]
|
174
|
+
break
|
175
|
+
|
176
|
+
self._create_content(collection_name)
|
177
|
+
|
178
|
+
return {
|
179
|
+
"content": self.content,
|
180
|
+
"fetched_papers": self.fetched_papers,
|
181
|
+
}
|
182
|
+
|
183
|
+
def get_paper_count(self) -> int:
|
184
|
+
"""Get the number of papers to be saved.
|
185
|
+
|
186
|
+
Returns:
|
187
|
+
int: The number of papers in the fetched papers dictionary.
|
188
|
+
"""
|
189
|
+
return len(self.fetched_papers)
|
190
|
+
|
191
|
+
def get_collection_path(self) -> str:
|
192
|
+
"""Get the normalized collection path.
|
193
|
+
|
194
|
+
Returns:
|
195
|
+
str: The normalized collection path where papers will be saved.
|
196
|
+
"""
|
197
|
+
return self.collection_path
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
Utility functions for Zotero
|
4
|
+
Utility functions for Zotero path operations.
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
@@ -9,6 +9,7 @@ import logging
|
|
9
9
|
# Configure logging
|
10
10
|
logging.basicConfig(level=logging.INFO)
|
11
11
|
logger = logging.getLogger(__name__)
|
12
|
+
# pylint: disable=broad-exception-caught
|
12
13
|
|
13
14
|
|
14
15
|
def get_item_collections(zot):
|
@@ -34,6 +35,7 @@ def get_item_collections(zot):
|
|
34
35
|
|
35
36
|
# Build full paths for collections
|
36
37
|
def build_collection_path(col_key):
|
38
|
+
"""build collection path from collection key"""
|
37
39
|
path = []
|
38
40
|
while col_key:
|
39
41
|
path.insert(0, collection_map.get(col_key, "Unknown"))
|
@@ -61,3 +63,126 @@ def get_item_collections(zot):
|
|
61
63
|
logger.info("Successfully mapped items to collection paths.")
|
62
64
|
|
63
65
|
return item_to_collections
|
66
|
+
|
67
|
+
|
68
|
+
def find_or_create_collection(zot, path, create_missing=False):
|
69
|
+
"""find collection or create if missing"""
|
70
|
+
logger.info(
|
71
|
+
"Finding collection for path: %s (create_missing=%s)", path, create_missing
|
72
|
+
)
|
73
|
+
# Normalize path: remove leading/trailing slashes and convert to lowercase
|
74
|
+
normalized = path.strip("/").lower()
|
75
|
+
path_parts = normalized.split("/") if normalized else []
|
76
|
+
|
77
|
+
if not path_parts:
|
78
|
+
logger.warning("Empty path provided")
|
79
|
+
return None
|
80
|
+
|
81
|
+
# Get all collections from Zotero
|
82
|
+
all_collections = zot.collections()
|
83
|
+
logger.info("Found %d collections in Zotero", len(all_collections))
|
84
|
+
|
85
|
+
# Determine target name (last part) and, if nested, find the parent's key
|
86
|
+
target_name = path_parts[-1]
|
87
|
+
parent_key = None
|
88
|
+
if len(path_parts) > 1:
|
89
|
+
parent_name = path_parts[-2]
|
90
|
+
# Look for a collection with name matching the parent (case-insensitive)
|
91
|
+
for col in all_collections:
|
92
|
+
if col["data"]["name"].lower() == parent_name:
|
93
|
+
parent_key = col["key"]
|
94
|
+
break
|
95
|
+
|
96
|
+
# Try to find an existing collection by direct match (ignoring hierarchy)
|
97
|
+
for col in all_collections:
|
98
|
+
if col["data"]["name"].lower() == target_name:
|
99
|
+
logger.info("Found direct match for %s: %s", target_name, col["key"])
|
100
|
+
return col["key"]
|
101
|
+
|
102
|
+
# No match found: create one if allowed
|
103
|
+
if create_missing:
|
104
|
+
payload = {"name": target_name}
|
105
|
+
if parent_key:
|
106
|
+
payload["parentCollection"] = parent_key
|
107
|
+
try:
|
108
|
+
result = zot.create_collection(payload)
|
109
|
+
# Interpret result based on structure
|
110
|
+
if "success" in result:
|
111
|
+
new_key = result["success"]["0"]
|
112
|
+
else:
|
113
|
+
new_key = result["successful"]["0"]["data"]["key"]
|
114
|
+
logger.info("Created collection %s with key %s", target_name, new_key)
|
115
|
+
return new_key
|
116
|
+
except Exception as e:
|
117
|
+
logger.error("Failed to create collection: %s", e)
|
118
|
+
return None
|
119
|
+
else:
|
120
|
+
logger.warning("No matching collection found for %s", target_name)
|
121
|
+
return None
|
122
|
+
|
123
|
+
|
124
|
+
def get_all_collection_paths(zot):
|
125
|
+
"""
|
126
|
+
Get all available collection paths in Zotero.
|
127
|
+
|
128
|
+
Args:
|
129
|
+
zot (Zotero): An initialized Zotero client.
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
list: List of all available collection paths
|
133
|
+
"""
|
134
|
+
logger.info("Getting all collection paths")
|
135
|
+
collections = zot.collections()
|
136
|
+
|
137
|
+
# Create mappings: collection key → name and collection key → parent key
|
138
|
+
collection_map = {col["key"]: col["data"]["name"] for col in collections}
|
139
|
+
parent_map = {
|
140
|
+
col["key"]: col["data"].get("parentCollection") for col in collections
|
141
|
+
}
|
142
|
+
|
143
|
+
# Build full paths for collections
|
144
|
+
def build_collection_path(col_key):
|
145
|
+
path = []
|
146
|
+
while col_key:
|
147
|
+
path.insert(0, collection_map.get(col_key, "Unknown"))
|
148
|
+
col_key = parent_map.get(col_key)
|
149
|
+
return "/" + "/".join(path)
|
150
|
+
|
151
|
+
collection_paths = [build_collection_path(key) for key in collection_map]
|
152
|
+
logger.info("Found %d collection paths", len(collection_paths))
|
153
|
+
return collection_paths
|
154
|
+
|
155
|
+
|
156
|
+
def fetch_papers_for_save(state):
|
157
|
+
"""
|
158
|
+
Retrieve papers from the state for saving to Zotero.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
state (dict): The state containing previously fetched papers.
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
dict: Dictionary of papers to save, or None if no papers found
|
165
|
+
"""
|
166
|
+
logger.info("Fetching papers from state for saving")
|
167
|
+
|
168
|
+
# Retrieve last displayed papers from the agent state
|
169
|
+
last_displayed_key = state.get("last_displayed_papers", "")
|
170
|
+
|
171
|
+
if not last_displayed_key:
|
172
|
+
logger.warning("No last_displayed_papers key in state")
|
173
|
+
return None
|
174
|
+
|
175
|
+
if isinstance(last_displayed_key, str):
|
176
|
+
# If it's a string (key to another state object), get that object
|
177
|
+
fetched_papers = state.get(last_displayed_key, {})
|
178
|
+
logger.info("Using papers from '%s' state key", last_displayed_key)
|
179
|
+
else:
|
180
|
+
# If it's already the papers object
|
181
|
+
fetched_papers = last_displayed_key
|
182
|
+
logger.info("Using papers directly from last_displayed_papers")
|
183
|
+
|
184
|
+
if not fetched_papers:
|
185
|
+
logger.warning("No fetched papers found to save.")
|
186
|
+
return None
|
187
|
+
|
188
|
+
return fetched_papers
|