aiagents4pharma 1.28.0__py3-none-any.whl → 1.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +35 -209
  2. aiagents4pharma/talk2scholars/agents/s2_agent.py +10 -6
  3. aiagents4pharma/talk2scholars/agents/zotero_agent.py +12 -6
  4. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +2 -48
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +5 -28
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +5 -21
  7. aiagents4pharma/talk2scholars/configs/config.yaml +1 -0
  8. aiagents4pharma/talk2scholars/configs/tools/__init__.py +1 -0
  9. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +1 -1
  10. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +1 -1
  11. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +1 -1
  12. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +42 -1
  13. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  14. aiagents4pharma/talk2scholars/tests/test_main_agent.py +186 -111
  15. aiagents4pharma/talk2scholars/tests/test_s2_display.py +74 -0
  16. aiagents4pharma/talk2scholars/tests/test_s2_multi.py +282 -0
  17. aiagents4pharma/talk2scholars/tests/test_s2_query.py +78 -0
  18. aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +65 -0
  19. aiagents4pharma/talk2scholars/tests/test_s2_search.py +266 -0
  20. aiagents4pharma/talk2scholars/tests/test_s2_single.py +274 -0
  21. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +57 -0
  22. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +412 -0
  23. aiagents4pharma/talk2scholars/tests/test_zotero_write.py +626 -0
  24. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +50 -34
  25. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +8 -8
  26. aiagents4pharma/talk2scholars/tools/s2/search.py +36 -23
  27. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +44 -38
  28. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +2 -0
  29. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +5 -0
  30. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +63 -0
  31. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +64 -19
  32. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +247 -0
  33. {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.29.0.dist-info}/METADATA +6 -5
  34. {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.29.0.dist-info}/RECORD +37 -28
  35. aiagents4pharma/talk2scholars/tests/test_call_s2.py +0 -100
  36. aiagents4pharma/talk2scholars/tests/test_call_zotero.py +0 -94
  37. aiagents4pharma/talk2scholars/tests/test_s2_tools.py +0 -355
  38. aiagents4pharma/talk2scholars/tests/test_zotero_tool.py +0 -171
  39. {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.29.0.dist-info}/LICENSE +0 -0
  40. {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.29.0.dist-info}/WHEEL +0 -0
  41. {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.29.0.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,7 @@ from langchain_core.tools.base import InjectedToolCallId
16
16
  from langgraph.types import Command
17
17
  from pydantic import BaseModel, Field
18
18
 
19
+ # pylint: disable=R0914,R0912,R0915
19
20
 
20
21
  # Configure logging
21
22
  logging.basicConfig(level=logging.INFO)
@@ -26,7 +27,7 @@ class MultiPaperRecInput(BaseModel):
26
27
  """Input schema for multiple paper recommendations tool."""
27
28
 
28
29
  paper_ids: List[str] = Field(
29
- description=("List of Semantic Scholar Paper IDs to get recommendations for")
30
+ description="List of Semantic Scholar Paper IDs to get recommendations for"
30
31
  )
31
32
  limit: int = Field(
32
33
  default=2,
@@ -44,14 +45,6 @@ class MultiPaperRecInput(BaseModel):
44
45
  model_config = {"arbitrary_types_allowed": True}
45
46
 
46
47
 
47
- # Load hydra configuration
48
- with hydra.initialize(version_base=None, config_path="../../configs"):
49
- cfg = hydra.compose(
50
- config_name="config", overrides=["tools/multi_paper_recommendation=default"]
51
- )
52
- cfg = cfg.tools.multi_paper_recommendation
53
-
54
-
55
48
  @tool(args_schema=MultiPaperRecInput, parse_docstring=True)
56
49
  def get_multi_paper_recommendations(
57
50
  paper_ids: List[str],
@@ -73,7 +66,14 @@ def get_multi_paper_recommendations(
73
66
  Returns:
74
67
  Dict[str, Any]: The recommendations and related information.
75
68
  """
76
- logging.info(
69
+ # Load hydra configuration
70
+ with hydra.initialize(version_base=None, config_path="../../configs"):
71
+ cfg = hydra.compose(
72
+ config_name="config", overrides=["tools/multi_paper_recommendation=default"]
73
+ )
74
+ cfg = cfg.tools.multi_paper_recommendation
75
+ logger.info("Loaded configuration for multi-paper recommendation tool")
76
+ logger.info(
77
77
  "Starting multi-paper recommendations search with paper IDs: %s", paper_ids
78
78
  )
79
79
 
@@ -89,45 +89,61 @@ def get_multi_paper_recommendations(
89
89
  if year:
90
90
  params["year"] = year
91
91
 
92
- # Getting recommendations
93
- response = requests.post(
94
- endpoint,
95
- headers=headers,
96
- params=params,
97
- data=json.dumps(payload),
98
- timeout=cfg.request_timeout,
99
- )
100
- logging.info(
92
+ # Wrap API call in try/except to catch connectivity issues and validate response format
93
+ try:
94
+ response = requests.post(
95
+ endpoint,
96
+ headers=headers,
97
+ params=params,
98
+ data=json.dumps(payload),
99
+ timeout=cfg.request_timeout,
100
+ )
101
+ response.raise_for_status() # Raises HTTPError for bad responses
102
+ except requests.exceptions.RequestException as e:
103
+ logger.error(
104
+ "Failed to connect to Semantic Scholar API for multi-paper recommendations: %s",
105
+ e,
106
+ )
107
+ raise RuntimeError(
108
+ "Failed to connect to Semantic Scholar API. Please retry the same query."
109
+ ) from e
110
+
111
+ logger.info(
101
112
  "API Response Status for multi-paper recommendations: %s", response.status_code
102
113
  )
114
+ logger.info("Request params: %s", params)
103
115
 
104
116
  data = response.json()
105
- recommendations = data.get("recommendedPapers", [])
106
117
 
118
+ # Check for expected data format
119
+ if "recommendedPapers" not in data:
120
+ logger.error("Unexpected API response format: %s", data)
121
+ raise RuntimeError(
122
+ "Unexpected response from Semantic Scholar API. The results could not be "
123
+ "retrieved due to an unexpected format. "
124
+ "Please modify your search query and try again."
125
+ )
126
+
127
+ recommendations = data.get("recommendedPapers", [])
107
128
  if not recommendations:
108
- return Command(
109
- update={ # Place 'messages' inside 'update'
110
- "messages": [
111
- ToolMessage(
112
- content="No recommendations found based on multiple papers.",
113
- tool_call_id=tool_call_id,
114
- )
115
- ]
116
- }
129
+ logger.error(
130
+ "No recommendations returned from API for paper IDs: %s", paper_ids
131
+ )
132
+ raise RuntimeError(
133
+ "No recommendations were found for your query. Consider refining your search "
134
+ "by using more specific keywords or different terms."
117
135
  )
118
136
 
119
137
  # Create a dictionary to store the papers
120
138
  filtered_papers = {
121
139
  paper["paperId"]: {
122
- # "semantic_scholar_id": paper["paperId"], # Store Semantic Scholar ID
140
+ "paper_id": paper["paperId"],
123
141
  "Title": paper.get("title", "N/A"),
124
142
  "Abstract": paper.get("abstract", "N/A"),
125
143
  "Year": paper.get("year", "N/A"),
126
144
  "Citation Count": paper.get("citationCount", "N/A"),
127
145
  "URL": paper.get("url", "N/A"),
128
- # "arXiv_ID": paper.get("externalIds", {}).get(
129
- # "ArXiv", "N/A"
130
- # ), # Extract arXiv ID
146
+ "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
131
147
  }
132
148
  for paper in recommendations
133
149
  if paper.get("title") and paper.get("authors")
@@ -156,7 +172,7 @@ def get_multi_paper_recommendations(
156
172
 
157
173
  return Command(
158
174
  update={
159
- "multi_papers": filtered_papers, # Now sending the dictionary directly
175
+ "multi_papers": filtered_papers, # Sending the dictionary directly
160
176
  "last_displayed_papers": "multi_papers",
161
177
  "messages": [
162
178
  ToolMessage(
@@ -19,14 +19,6 @@ from pydantic import Field
19
19
  logging.basicConfig(level=logging.INFO)
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
- # Load hydra configuration
23
- with hydra.initialize(version_base=None, config_path="../../configs"):
24
- cfg = hydra.compose(
25
- config_name="config",
26
- overrides=["tools/retrieve_semantic_scholar_paper_id=default"],
27
- )
28
- cfg = cfg.tools.retrieve_semantic_scholar_paper_id
29
-
30
22
 
31
23
  @tool("retrieve_semantic_scholar_paper_id", parse_docstring=True)
32
24
  def retrieve_semantic_scholar_paper_id(
@@ -49,6 +41,14 @@ def retrieve_semantic_scholar_paper_id(
49
41
  Returns:
50
42
  ToolMessage: A message containing the paper ID.
51
43
  """
44
+ # Load hydra configuration
45
+ with hydra.initialize(version_base=None, config_path="../../configs"):
46
+ cfg = hydra.compose(
47
+ config_name="config",
48
+ overrides=["tools/retrieve_semantic_scholar_paper_id=default"],
49
+ )
50
+ cfg = cfg.tools.retrieve_semantic_scholar_paper_id
51
+ logger.info("Loaded configuration for Semantic Scholar paper ID retrieval tool")
52
52
  logger.info("Retrieving ID of paper with title: %s", paper_title)
53
53
  endpoint = cfg.api_endpoint
54
54
  params = {
@@ -37,12 +37,6 @@ class SearchInput(BaseModel):
37
37
  tool_call_id: Annotated[str, InjectedToolCallId]
38
38
 
39
39
 
40
- # Load hydra configuration
41
- with hydra.initialize(version_base=None, config_path="../../configs"):
42
- cfg = hydra.compose(config_name="config", overrides=["tools/search=default"])
43
- cfg = cfg.tools.search
44
-
45
-
46
40
  @tool("search_tool", args_schema=SearchInput, parse_docstring=True)
47
41
  def search_tool(
48
42
  query: str,
@@ -56,13 +50,18 @@ def search_tool(
56
50
  Args:
57
51
  query (str): The search query string to find academic papers.
58
52
  tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
59
- limit (int, optional): The maximum number of results to return. Defaults to 2.
53
+ limit (int, optional): The maximum number of results to return. Defaults to 5.
60
54
  year (str, optional): Year range for papers.
61
55
  Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
62
56
 
63
57
  Returns:
64
58
  The number of papers found on Semantic Scholar.
65
59
  """
60
+ # Load hydra configuration
61
+ with hydra.initialize(version_base=None, config_path="../../configs"):
62
+ cfg = hydra.compose(config_name="config", overrides=["tools/search=default"])
63
+ cfg = cfg.tools.search
64
+ logger.info("Loaded configuration for search tool")
66
65
  logger.info("Searching for papers on %s", query)
67
66
  endpoint = cfg.api_endpoint
68
67
  params = {
@@ -75,33 +74,47 @@ def search_tool(
75
74
  if year:
76
75
  params["year"] = year
77
76
 
78
- response = requests.get(endpoint, params=params, timeout=10)
77
+ # Wrap API call in try/except to catch connectivity issues
78
+ try:
79
+ response = requests.get(endpoint, params=params, timeout=10)
80
+ response.raise_for_status() # Raises HTTPError for bad responses
81
+ except requests.exceptions.RequestException as e:
82
+ logger.error("Failed to connect to Semantic Scholar API: %s", e)
83
+ raise RuntimeError(
84
+ "Failed to connect to Semantic Scholar API. Please retry the same query."
85
+ ) from e
86
+
79
87
  data = response.json()
88
+
89
+ # Check for expected data format
90
+ if "data" not in data:
91
+ logger.error("Unexpected API response format: %s", data)
92
+ raise RuntimeError(
93
+ "Unexpected response from Semantic Scholar API. The results could not be "
94
+ "retrieved due to an unexpected format. "
95
+ "Please modify your search query and try again."
96
+ )
97
+
80
98
  papers = data.get("data", [])
81
- logger.info("Received %d papers", len(papers))
82
99
  if not papers:
83
- return Command(
84
- update={ # Place 'messages' inside 'update'
85
- "messages": [
86
- ToolMessage(
87
- content="No papers found. Please try a different search query.",
88
- tool_call_id=tool_call_id,
89
- )
90
- ]
91
- }
100
+ logger.error(
101
+ "No papers returned from Semantic Scholar API for query: %s", query
92
102
  )
103
+ raise RuntimeError(
104
+ "No papers were found for your query. Consider refining your search "
105
+ "by using more specific keywords or different terms."
106
+ )
107
+
93
108
  # Create a dictionary to store the papers
94
109
  filtered_papers = {
95
110
  paper["paperId"]: {
96
- # "semantic_scholar_id": paper["paperId"], # Store Semantic Scholar ID
111
+ "paper_id": paper["paperId"],
97
112
  "Title": paper.get("title", "N/A"),
98
113
  "Abstract": paper.get("abstract", "N/A"),
99
114
  "Year": paper.get("year", "N/A"),
100
115
  "Citation Count": paper.get("citationCount", "N/A"),
101
116
  "URL": paper.get("url", "N/A"),
102
- # "arXiv_ID": paper.get("externalIds", {}).get(
103
- # "ArXiv", "N/A"
104
- # ), # Extract arXiv ID
117
+ "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
105
118
  }
106
119
  for paper in papers
107
120
  if paper.get("title") and paper.get("authors")
@@ -129,7 +142,7 @@ def search_tool(
129
142
 
130
143
  return Command(
131
144
  update={
132
- "papers": filtered_papers, # Now sending the dictionary directly
145
+ "papers": filtered_papers, # Sending the dictionary directly
133
146
  "last_displayed_papers": "papers",
134
147
  "messages": [
135
148
  ToolMessage(
@@ -40,14 +40,6 @@ class SinglePaperRecInput(BaseModel):
40
40
  model_config = {"arbitrary_types_allowed": True}
41
41
 
42
42
 
43
- # Load hydra configuration
44
- with hydra.initialize(version_base=None, config_path="../../configs"):
45
- cfg = hydra.compose(
46
- config_name="config", overrides=["tools/single_paper_recommendation=default"]
47
- )
48
- cfg = cfg.tools.single_paper_recommendation
49
-
50
-
51
43
  @tool(args_schema=SinglePaperRecInput, parse_docstring=True)
52
44
  def get_single_paper_recommendations(
53
45
  paper_id: str,
@@ -56,19 +48,27 @@ def get_single_paper_recommendations(
56
48
  year: Optional[str] = None,
57
49
  ) -> Command[Any]:
58
50
  """
59
- Get recommendations for on a single paper using its Semantic Scholar ID.
51
+ Get recommendations for a single paper using its Semantic Scholar ID.
60
52
  No other ID types are supported.
61
53
 
62
54
  Args:
63
55
  paper_id (str): The Semantic Scholar Paper ID to get recommendations for.
64
56
  tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
65
- limit (int, optional): The maximum number of recommendations to return. Defaults to 2.
57
+ limit (int, optional): The maximum number of recommendations to return. Defaults to 5.
66
58
  year (str, optional): Year range for papers.
67
59
  Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
68
60
 
69
61
  Returns:
70
62
  Dict[str, Any]: The recommendations and related information.
71
63
  """
64
+ # Load hydra configuration
65
+ with hydra.initialize(version_base=None, config_path="../../configs"):
66
+ cfg = hydra.compose(
67
+ config_name="config",
68
+ overrides=["tools/single_paper_recommendation=default"],
69
+ )
70
+ cfg = cfg.tools.single_paper_recommendation
71
+ logger.info("Loaded configuration for single paper recommendation tool")
72
72
  logger.info(
73
73
  "Starting single paper recommendations search with paper ID: %s", paper_id
74
74
  )
@@ -84,48 +84,54 @@ def get_single_paper_recommendations(
84
84
  if year:
85
85
  params["year"] = year
86
86
 
87
- response = requests.get(endpoint, params=params, timeout=cfg.request_timeout)
88
- data = response.json()
89
- response = requests.get(endpoint, params=params, timeout=10)
90
- # print(f"API Response Status: {response.status_code}")
91
- logging.info(
87
+ # Wrap API call in try/except to catch connectivity issues and check response format
88
+ try:
89
+ response = requests.get(endpoint, params=params, timeout=cfg.request_timeout)
90
+ response.raise_for_status() # Raises HTTPError for bad responses
91
+ except requests.exceptions.RequestException as e:
92
+ logger.error(
93
+ "Failed to connect to Semantic Scholar API for recommendations: %s", e
94
+ )
95
+ raise RuntimeError(
96
+ "Failed to connect to Semantic Scholar API. Please retry the same query."
97
+ ) from e
98
+
99
+ logger.info(
92
100
  "API Response Status for recommendations of paper %s: %s",
93
101
  paper_id,
94
102
  response.status_code,
95
103
  )
96
- if response.status_code != 200:
97
- raise ValueError("Invalid paper ID or API error.")
98
- # print(f"Request params: {params}")
99
- logging.info("Request params: %s", params)
104
+ logger.info("Request params: %s", params)
100
105
 
101
106
  data = response.json()
102
- recommendations = data.get("recommendedPapers", [])
103
107
 
108
+ # Check for expected data format
109
+ if "recommendedPapers" not in data:
110
+ logger.error("Unexpected API response format: %s", data)
111
+ raise RuntimeError(
112
+ "Unexpected response from Semantic Scholar API. The results could not be "
113
+ "retrieved due to an unexpected format. "
114
+ "Please modify your search query and try again."
115
+ )
116
+
117
+ recommendations = data.get("recommendedPapers", [])
104
118
  if not recommendations:
105
- return Command(
106
- update={
107
- "papers": {},
108
- "messages": [
109
- ToolMessage(
110
- content=f"No recommendations found for {paper_id}.",
111
- tool_call_id=tool_call_id,
112
- )
113
- ],
114
- }
119
+ logger.error("No recommendations returned from API for paper: %s", paper_id)
120
+ raise RuntimeError(
121
+ "No recommendations were found for your query. Consider refining your search "
122
+ "by using more specific keywords or different terms."
115
123
  )
116
124
 
117
125
  # Extract paper ID and title from recommendations
118
126
  filtered_papers = {
119
127
  paper["paperId"]: {
120
- # "semantic_scholar_id": paper["paperId"], # Store Semantic Scholar ID
128
+ "paper_id": paper["paperId"],
121
129
  "Title": paper.get("title", "N/A"),
122
130
  "Abstract": paper.get("abstract", "N/A"),
123
131
  "Year": paper.get("year", "N/A"),
124
132
  "Citation Count": paper.get("citationCount", "N/A"),
125
133
  "URL": paper.get("url", "N/A"),
126
- # "arXiv_ID": paper.get("externalIds", {}).get(
127
- # "ArXiv", "N/A"
128
- # ), # Extract arXiv ID
134
+ "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
129
135
  }
130
136
  for paper in recommendations
131
137
  if paper.get("title") and paper.get("authors")
@@ -143,10 +149,10 @@ def get_single_paper_recommendations(
143
149
  logger.info("Filtered %d papers", len(filtered_papers))
144
150
 
145
151
  content = (
146
- "Recommendations based on single paper were successful. "
147
- "Papers are attached as an artifact."
152
+ "Recommendations based on the single paper were successful. "
153
+ "Papers are attached as an artifact. "
154
+ "Here is a summary of the recommendations:\n"
148
155
  )
149
- content += " Here is a summary of the recommendations:\n"
150
156
  content += f"Number of papers found: {len(filtered_papers)}\n"
151
157
  content += f"Query Paper ID: {paper_id}\n"
152
158
  content += f"Year: {year}\n" if year else ""
@@ -154,7 +160,7 @@ def get_single_paper_recommendations(
154
160
 
155
161
  return Command(
156
162
  update={
157
- "papers": filtered_papers, # Now sending the dictionary directly
163
+ "papers": filtered_papers, # Sending the dictionary directly
158
164
  "last_displayed_papers": "papers",
159
165
  "messages": [
160
166
  ToolMessage(
@@ -3,3 +3,5 @@ Import statements
3
3
  """
4
4
 
5
5
  from . import zotero_read
6
+ from . import zotero_write
7
+ from . import utils
@@ -0,0 +1,5 @@
1
+ """
2
+ Import statements
3
+ """
4
+
5
+ from . import zotero_path
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Utility functions for Zotero tools.
5
+ """
6
+
7
+ import logging
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def get_item_collections(zot):
15
+ """
16
+ Fetch all Zotero collections and map item keys to their full collection paths.
17
+
18
+ Args:
19
+ zot (Zotero): An initialized Zotero client.
20
+
21
+ Returns:
22
+ dict: A dictionary mapping item keys to a list of full collection paths.
23
+ """
24
+ logger.info("Fetching Zotero collections...")
25
+
26
+ # Fetch all collections
27
+ collections = zot.collections()
28
+
29
+ # Create mappings: collection key → name and collection key → parent key
30
+ collection_map = {col["key"]: col["data"]["name"] for col in collections}
31
+ parent_map = {
32
+ col["key"]: col["data"].get("parentCollection") for col in collections
33
+ }
34
+
35
+ # Build full paths for collections
36
+ def build_collection_path(col_key):
37
+ path = []
38
+ while col_key:
39
+ path.insert(0, collection_map.get(col_key, "Unknown"))
40
+ col_key = parent_map.get(col_key)
41
+ return "/" + "/".join(path) # Convert to "/path/to/collection"
42
+
43
+ collection_paths = {key: build_collection_path(key) for key in collection_map}
44
+
45
+ # Manually create an item-to-collection mapping with full paths
46
+ item_to_collections = {}
47
+
48
+ for collection in collections:
49
+ collection_key = collection["key"]
50
+ collection_items = zot.collection_items(
51
+ collection_key
52
+ ) # Fetch items in the collection
53
+
54
+ for item in collection_items:
55
+ item_key = item["data"]["key"]
56
+ if item_key in item_to_collections:
57
+ item_to_collections[item_key].append(collection_paths[collection_key])
58
+ else:
59
+ item_to_collections[item_key] = [collection_paths[collection_key]]
60
+
61
+ logger.info("Successfully mapped items to collection paths.")
62
+
63
+ return item_to_collections
@@ -13,6 +13,11 @@ from langchain_core.tools import tool
13
13
  from langchain_core.tools.base import InjectedToolCallId
14
14
  from langgraph.types import Command
15
15
  from pydantic import BaseModel, Field
16
+ from aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_path import (
17
+ get_item_collections,
18
+ )
19
+
20
+ # pylint: disable=R0914,R0912,R0915
16
21
 
17
22
  # Configure logging
18
23
  logging.basicConfig(level=logging.INFO)
@@ -27,7 +32,7 @@ class ZoteroSearchInput(BaseModel):
27
32
  )
28
33
  only_articles: bool = Field(
29
34
  default=True,
30
- description="Whether to only search for journal articles/" "conference papers.",
35
+ description="Whether to only search for journal articles/conference papers.",
31
36
  )
32
37
  limit: int = Field(
33
38
  default=2, description="Maximum number of results to return", ge=1, le=100
@@ -35,12 +40,6 @@ class ZoteroSearchInput(BaseModel):
35
40
  tool_call_id: Annotated[str, InjectedToolCallId]
36
41
 
37
42
 
38
- # Load hydra configuration
39
- with hydra.initialize(version_base=None, config_path="../../configs"):
40
- cfg = hydra.compose(config_name="config", overrides=["tools/zotero_read=default"])
41
- cfg = cfg.tools.zotero_read
42
-
43
-
44
43
  @tool(args_schema=ZoteroSearchInput, parse_docstring=True)
45
44
  def zotero_search_tool(
46
45
  query: str,
@@ -59,22 +58,55 @@ def zotero_search_tool(
59
58
  Returns:
60
59
  Dict[str, Any]: The search results and related information.
61
60
  """
62
- logger.info(
63
- "Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
64
- query,
65
- only_articles,
66
- limit,
67
- )
61
+ # Load hydra configuration
62
+ with hydra.initialize(version_base=None, config_path="../../configs"):
63
+ cfg = hydra.compose(
64
+ config_name="config", overrides=["tools/zotero_read=default"]
65
+ )
66
+ logger.info("Loaded configuration for Zotero search tool")
67
+ cfg = cfg.tools.zotero_read
68
+ logger.info(
69
+ "Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
70
+ query,
71
+ only_articles,
72
+ limit,
73
+ )
68
74
 
69
75
  # Initialize Zotero client
70
76
  zot = zotero.Zotero(cfg.user_id, cfg.library_type, cfg.api_key)
71
77
 
72
- # Get items matching the query
73
- items = zot.items(q=query, limit=min(limit, cfg.zotero.max_limit))
78
+ # Fetch collection mapping once
79
+ item_to_collections = get_item_collections(zot)
80
+
81
+ # If the query is empty, fetch all items (up to max_limit), otherwise use the query
82
+ try:
83
+ if query.strip() == "":
84
+ logger.info(
85
+ "Empty query provided, fetching all items up to max_limit: %d",
86
+ cfg.zotero.max_limit,
87
+ )
88
+ items = zot.items(limit=cfg.zotero.max_limit)
89
+ else:
90
+ items = zot.items(q=query, limit=min(limit, cfg.zotero.max_limit))
91
+ except Exception as e:
92
+ logger.error("Failed to fetch items from Zotero: %s", e)
93
+ raise RuntimeError(
94
+ "Failed to fetch items from Zotero. Please retry the same query."
95
+ ) from e
96
+
74
97
  logger.info("Received %d items from Zotero", len(items))
75
98
 
99
+ if not items:
100
+ logger.error("No items returned from Zotero for query: '%s'", query)
101
+ raise RuntimeError(
102
+ "No items returned from Zotero. Please retry the same query."
103
+ )
104
+
76
105
  # Define filter criteria
77
106
  filter_item_types = cfg.zotero.filter_item_types if only_articles else []
107
+ filter_excluded_types = (
108
+ cfg.zotero.filter_excluded_types
109
+ ) # Exclude non-research items
78
110
 
79
111
  # Filter and format papers
80
112
  filtered_papers = {}
@@ -88,10 +120,16 @@ def zotero_search_tool(
88
120
  continue
89
121
 
90
122
  item_type = data.get("itemType")
91
- if only_articles and (
123
+ logger.debug("Item type: %s", item_type)
124
+
125
+ # Exclude attachments, notes, and other unwanted types
126
+ if (
92
127
  not item_type
93
128
  or not isinstance(item_type, str)
94
- or item_type not in filter_item_types
129
+ or item_type in filter_excluded_types # Skip attachments & notes
130
+ or (
131
+ only_articles and item_type not in filter_item_types
132
+ ) # Skip non-research types
95
133
  ):
96
134
  continue
97
135
 
@@ -99,20 +137,27 @@ def zotero_search_tool(
99
137
  if not key:
100
138
  continue
101
139
 
140
+ # Use the imported utility function's mapping to get collection paths
141
+ collection_paths = item_to_collections.get(key, ["/Unknown"])
142
+
102
143
  filtered_papers[key] = {
103
144
  "Title": data.get("title", "N/A"),
104
145
  "Abstract": data.get("abstractNote", "N/A"),
105
146
  "Date": data.get("date", "N/A"),
106
147
  "URL": data.get("url", "N/A"),
107
148
  "Type": item_type if isinstance(item_type, str) else "N/A",
149
+ "Collections": collection_paths, # Now displays full paths
108
150
  }
109
151
 
110
152
  if not filtered_papers:
111
- logger.warning("No matching papers found for query: '%s'", query)
153
+ logger.error("No matching papers returned from Zotero for query: '%s'", query)
154
+ raise RuntimeError(
155
+ "No matching papers returned from Zotero. Please retry the same query."
156
+ )
112
157
 
113
158
  logger.info("Filtered %d items", len(filtered_papers))
114
159
 
115
- # Prepare content with top 3 paper titles and types
160
+ # Prepare content with top 2 paper titles and types
116
161
  top_papers = list(filtered_papers.values())[:2]
117
162
  top_papers_info = "\n".join(
118
163
  [