agno 2.3.15__py3-none-any.whl → 2.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. agno/agent/__init__.py +2 -0
  2. agno/agent/agent.py +4 -53
  3. agno/agent/remote.py +351 -0
  4. agno/client/__init__.py +3 -0
  5. agno/client/os.py +2669 -0
  6. agno/db/base.py +20 -0
  7. agno/db/mongo/async_mongo.py +11 -0
  8. agno/db/mongo/mongo.py +10 -0
  9. agno/db/mysql/async_mysql.py +9 -0
  10. agno/db/mysql/mysql.py +9 -0
  11. agno/db/postgres/async_postgres.py +9 -0
  12. agno/db/postgres/postgres.py +9 -0
  13. agno/db/postgres/utils.py +3 -2
  14. agno/db/sqlite/async_sqlite.py +9 -0
  15. agno/db/sqlite/sqlite.py +11 -1
  16. agno/exceptions.py +23 -0
  17. agno/knowledge/chunking/semantic.py +123 -46
  18. agno/knowledge/reader/csv_reader.py +9 -14
  19. agno/knowledge/reader/docx_reader.py +2 -4
  20. agno/knowledge/reader/field_labeled_csv_reader.py +11 -17
  21. agno/knowledge/reader/json_reader.py +9 -17
  22. agno/knowledge/reader/markdown_reader.py +6 -6
  23. agno/knowledge/reader/pdf_reader.py +14 -11
  24. agno/knowledge/reader/pptx_reader.py +2 -4
  25. agno/knowledge/reader/s3_reader.py +2 -11
  26. agno/knowledge/reader/text_reader.py +6 -18
  27. agno/knowledge/reader/web_search_reader.py +4 -15
  28. agno/os/app.py +104 -23
  29. agno/os/auth.py +25 -1
  30. agno/os/interfaces/a2a/a2a.py +7 -6
  31. agno/os/interfaces/a2a/router.py +13 -13
  32. agno/os/interfaces/agui/agui.py +5 -3
  33. agno/os/interfaces/agui/router.py +23 -16
  34. agno/os/interfaces/base.py +7 -7
  35. agno/os/interfaces/slack/router.py +6 -6
  36. agno/os/interfaces/slack/slack.py +7 -7
  37. agno/os/interfaces/whatsapp/router.py +29 -6
  38. agno/os/interfaces/whatsapp/whatsapp.py +11 -8
  39. agno/os/managers.py +326 -0
  40. agno/os/mcp.py +651 -79
  41. agno/os/router.py +125 -18
  42. agno/os/routers/agents/router.py +65 -22
  43. agno/os/routers/agents/schema.py +16 -4
  44. agno/os/routers/database.py +5 -0
  45. agno/os/routers/evals/evals.py +93 -11
  46. agno/os/routers/evals/utils.py +6 -6
  47. agno/os/routers/knowledge/knowledge.py +104 -16
  48. agno/os/routers/memory/memory.py +124 -7
  49. agno/os/routers/metrics/metrics.py +21 -4
  50. agno/os/routers/session/session.py +141 -12
  51. agno/os/routers/teams/router.py +40 -14
  52. agno/os/routers/teams/schema.py +12 -4
  53. agno/os/routers/traces/traces.py +54 -4
  54. agno/os/routers/workflows/router.py +223 -117
  55. agno/os/routers/workflows/schema.py +65 -1
  56. agno/os/schema.py +38 -12
  57. agno/os/utils.py +87 -166
  58. agno/remote/__init__.py +3 -0
  59. agno/remote/base.py +484 -0
  60. agno/run/workflow.py +1 -0
  61. agno/team/__init__.py +2 -0
  62. agno/team/remote.py +287 -0
  63. agno/team/team.py +25 -54
  64. agno/tracing/exporter.py +10 -6
  65. agno/tracing/setup.py +2 -1
  66. agno/utils/agent.py +58 -1
  67. agno/utils/http.py +68 -20
  68. agno/utils/os.py +0 -0
  69. agno/utils/remote.py +23 -0
  70. agno/vectordb/chroma/chromadb.py +452 -16
  71. agno/vectordb/pgvector/pgvector.py +7 -0
  72. agno/vectordb/redis/redisdb.py +1 -1
  73. agno/workflow/__init__.py +2 -0
  74. agno/workflow/agent.py +2 -2
  75. agno/workflow/remote.py +222 -0
  76. agno/workflow/types.py +0 -73
  77. agno/workflow/workflow.py +119 -68
  78. {agno-2.3.15.dist-info → agno-2.3.17.dist-info}/METADATA +1 -1
  79. {agno-2.3.15.dist-info → agno-2.3.17.dist-info}/RECORD +82 -72
  80. {agno-2.3.15.dist-info → agno-2.3.17.dist-info}/WHEEL +0 -0
  81. {agno-2.3.15.dist-info → agno-2.3.17.dist-info}/licenses/LICENSE +0 -0
  82. {agno-2.3.15.dist-info → agno-2.3.17.dist-info}/top_level.txt +0 -0
agno/db/base.py CHANGED
@@ -58,6 +58,14 @@ class BaseDb(ABC):
58
58
  """Create all tables for this database."""
59
59
  pass
60
60
 
61
+ def close(self) -> None:
62
+ """Close database connections and release resources.
63
+
64
+ Override in subclasses to properly dispose of connection pools.
65
+ Should be called during application shutdown.
66
+ """
67
+ pass
68
+
61
69
  # --- Schema Version ---
62
70
  @abstractmethod
63
71
  def get_latest_schema_version(self, table_name: str):
@@ -517,6 +525,18 @@ class AsyncBaseDb(ABC):
517
525
  self.culture_table_name = culture_table or "agno_culture"
518
526
  self.versions_table_name = versions_table or "agno_schema_versions"
519
527
 
528
+ async def _create_all_tables(self) -> None:
529
+ """Create all tables for this database. Override in subclasses."""
530
+ pass
531
+
532
+ async def close(self) -> None:
533
+ """Close database connections and release resources.
534
+
535
+ Override in subclasses to properly dispose of connection pools.
536
+ Should be called during application shutdown.
537
+ """
538
+ pass
539
+
520
540
  @abstractmethod
521
541
  async def table_exists(self, table_name: str) -> bool:
522
542
  """Check if a table with the given name exists in this database.
@@ -249,6 +249,17 @@ class AsyncMongoDb(AsyncBaseDb):
249
249
  if collection_name and not await self.table_exists(collection_name):
250
250
  await self._get_collection(collection_type, create_collection_if_not_found=True)
251
251
 
252
+ async def close(self) -> None:
253
+ """Close the MongoDB client connection.
254
+
255
+ Should be called during application shutdown to properly release
256
+ all database connections.
257
+ """
258
+ if self._client is not None:
259
+ self._client.close()
260
+ self._client = None
261
+ self._database = None
262
+
252
263
  def _ensure_client(self) -> AsyncMongoClientType:
253
264
  """
254
265
  Ensure the MongoDB async client is valid for the current event loop.
agno/db/mongo/mongo.py CHANGED
@@ -98,10 +98,20 @@ class MongoDb(BaseDb):
98
98
 
99
99
  self.db_url: Optional[str] = db_url
100
100
  self.db_client: MongoClient = _client
101
+
101
102
  self.db_name: str = db_name if db_name is not None else "agno"
102
103
 
103
104
  self._database: Optional[Database] = None
104
105
 
106
+ def close(self) -> None:
107
+ """Close the MongoDB client connection.
108
+
109
+ Should be called during application shutdown to properly release
110
+ all database connections.
111
+ """
112
+ if self.db_client is not None:
113
+ self.db_client.close()
114
+
105
115
  @property
106
116
  def database(self) -> Database:
107
117
  if self._database is None:
@@ -123,6 +123,15 @@ class AsyncMySQLDb(AsyncBaseDb):
123
123
  expire_on_commit=False,
124
124
  )
125
125
 
126
+ async def close(self) -> None:
127
+ """Close database connections and dispose of the connection pool.
128
+
129
+ Should be called during application shutdown to properly release
130
+ all database connections.
131
+ """
132
+ if self.db_engine is not None:
133
+ await self.db_engine.dispose()
134
+
126
135
  # -- DB methods --
127
136
  async def table_exists(self, table_name: str) -> bool:
128
137
  """Check if a table with the given name exists in the MySQL database.
agno/db/mysql/mysql.py CHANGED
@@ -121,6 +121,15 @@ class MySQLDb(BaseDb):
121
121
  # Initialize database session
122
122
  self.Session: scoped_session = scoped_session(sessionmaker(bind=self.db_engine))
123
123
 
124
+ def close(self) -> None:
125
+ """Close database connections and dispose of the connection pool.
126
+
127
+ Should be called during application shutdown to properly release
128
+ all database connections.
129
+ """
130
+ if self.db_engine is not None:
131
+ self.db_engine.dispose()
132
+
124
133
  # -- DB methods --
125
134
  def table_exists(self, table_name: str) -> bool:
126
135
  """Check if a table with the given name exists in the MySQL database.
@@ -128,6 +128,15 @@ class AsyncPostgresDb(AsyncBaseDb):
128
128
  expire_on_commit=False,
129
129
  )
130
130
 
131
+ async def close(self) -> None:
132
+ """Close database connections and dispose of the connection pool.
133
+
134
+ Should be called during application shutdown to properly release
135
+ all database connections.
136
+ """
137
+ if self.db_engine is not None:
138
+ await self.db_engine.dispose()
139
+
131
140
  # -- DB methods --
132
141
  async def table_exists(self, table_name: str) -> bool:
133
142
  """Check if a table with the given name exists in the Postgres database.
@@ -124,6 +124,15 @@ class PostgresDb(BaseDb):
124
124
  # Initialize database session
125
125
  self.Session: scoped_session = scoped_session(sessionmaker(bind=self.db_engine, expire_on_commit=False))
126
126
 
127
+ def close(self) -> None:
128
+ """Close database connections and dispose of the connection pool.
129
+
130
+ Should be called during application shutdown to properly release
131
+ all database connections.
132
+ """
133
+ if self.db_engine is not None:
134
+ self.db_engine.dispose()
135
+
127
136
  # -- DB methods --
128
137
  def table_exists(self, table_name: str) -> bool:
129
138
  """Check if a table with the given name exists in the Postgres database.
agno/db/postgres/utils.py CHANGED
@@ -298,8 +298,9 @@ def calculate_date_metrics(date_to_process: date, sessions_data: dict) -> dict:
298
298
  for session in sessions:
299
299
  if session.get("user_id"):
300
300
  all_user_ids.add(session["user_id"])
301
- metrics[runs_count_key] += len(session.get("runs", []))
302
- if runs := session.get("runs", []):
301
+ runs = session.get("runs", []) or []
302
+ metrics[runs_count_key] += len(runs)
303
+ if runs:
303
304
  for run in runs:
304
305
  if model_id := run.get("model"):
305
306
  model_provider = run.get("model_provider", "")
@@ -124,6 +124,15 @@ class AsyncSqliteDb(AsyncBaseDb):
124
124
  # Initialize database session factory
125
125
  self.async_session_factory = async_sessionmaker(bind=self.db_engine, expire_on_commit=False)
126
126
 
127
+ async def close(self) -> None:
128
+ """Close database connections and dispose of the connection pool.
129
+
130
+ Should be called during application shutdown to properly release
131
+ all database connections.
132
+ """
133
+ if self.db_engine is not None:
134
+ await self.db_engine.dispose()
135
+
127
136
  # -- DB methods --
128
137
  async def table_exists(self, table_name: str) -> bool:
129
138
  """Check if a table with the given name exists in the SQLite database.
agno/db/sqlite/sqlite.py CHANGED
@@ -125,6 +125,15 @@ class SqliteDb(BaseDb):
125
125
  # Initialize database session
126
126
  self.Session: scoped_session = scoped_session(sessionmaker(bind=self.db_engine))
127
127
 
128
+ def close(self) -> None:
129
+ """Close database connections and dispose of the connection pool.
130
+
131
+ Should be called during application shutdown to properly release
132
+ all database connections.
133
+ """
134
+ if self.db_engine is not None:
135
+ self.db_engine.dispose()
136
+
128
137
  # -- DB methods --
129
138
  def table_exists(self, table_name: str) -> bool:
130
139
  """Check if a table with the given name exists in the SQLite database.
@@ -1110,7 +1119,8 @@ class SqliteDb(BaseDb):
1110
1119
  # Select topics from all results
1111
1120
  stmt = select(table.c.topics)
1112
1121
  result = sess.execute(stmt).fetchall()
1113
- return list(set([record[0] for record in result]))
1122
+ result = result[0][0]
1123
+ return list(set(result))
1114
1124
 
1115
1125
  except Exception as e:
1116
1126
  log_debug(f"Exception reading from memory table: {e}")
agno/exceptions.py CHANGED
@@ -178,3 +178,26 @@ class RetryableModelProviderError(Exception):
178
178
  original_error: Optional[str] = None
179
179
  # Guidance message to retry a model invocation after an error
180
180
  retry_guidance_message: Optional[str] = None
181
+
182
+
183
+ class RemoteServerUnavailableError(AgnoError):
184
+ """Exception raised when a remote server is unavailable.
185
+
186
+ This can happen due to:
187
+ - Connection refused (server not running)
188
+ - Connection timeout
189
+ - Network errors
190
+ - DNS resolution failures
191
+ """
192
+
193
+ def __init__(
194
+ self,
195
+ message: str,
196
+ base_url: Optional[str] = None,
197
+ original_error: Optional[Exception] = None,
198
+ ):
199
+ super().__init__(message, status_code=503)
200
+ self.base_url = base_url
201
+ self.original_error = original_error
202
+ self.type = "remote_server_unavailable_error"
203
+ self.error_id = "remote_server_unavailable_error"
@@ -1,5 +1,18 @@
1
- import inspect
2
- from typing import Any, Dict, List, Optional
1
+ from typing import Any, Dict, List, Literal, Optional, Union
2
+
3
+ try:
4
+ import numpy as np
5
+ except ImportError:
6
+ raise ImportError("`numpy` not installed. Please install using `pip install numpy`")
7
+
8
+ try:
9
+ from chonkie import SemanticChunker
10
+ from chonkie.embeddings.base import BaseEmbeddings
11
+ except ImportError:
12
+ raise ImportError(
13
+ "`chonkie` is required for semantic chunking. "
14
+ "Please install it using `pip install chonkie` to use SemanticChunking."
15
+ )
3
16
 
4
17
  from agno.knowledge.chunking.strategy import ChunkingStrategy
5
18
  from agno.knowledge.document.base import Document
@@ -7,10 +20,69 @@ from agno.knowledge.embedder.base import Embedder
7
20
  from agno.utils.log import log_info
8
21
 
9
22
 
10
- class SemanticChunking(ChunkingStrategy):
11
- """Chunking strategy that splits text into semantic chunks using chonkie"""
23
+ def _get_chonkie_embedder_wrapper(embedder: Embedder):
24
+ """Create a wrapper that adapts Agno Embedder to chonkie's BaseEmbeddings interface."""
25
+
26
+ class _ChonkieEmbedderWrapper(BaseEmbeddings):
27
+ """Wrapper to make Agno Embedders compatible with chonkie."""
28
+
29
+ def __init__(self, agno_embedder: Embedder):
30
+ super().__init__()
31
+ self._embedder = agno_embedder
32
+
33
+ def embed(self, text: str):
34
+ embedding = self._embedder.get_embedding(text) # type: ignore[attr-defined]
35
+ return np.array(embedding, dtype=np.float32)
36
+
37
+ def get_tokenizer(self):
38
+ """Return a simple token counter function."""
39
+ return lambda text: len(text.split())
12
40
 
13
- def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
41
+ @property
42
+ def dimension(self) -> int:
43
+ return getattr(self._embedder, "dimensions")
44
+
45
+ return _ChonkieEmbedderWrapper(embedder)
46
+
47
+
48
+ class SemanticChunking(ChunkingStrategy):
49
+ """Chunking strategy that splits text into semantic chunks using chonkie.
50
+
51
+ Args:
52
+ embedder: The embedder to use for generating embeddings. Can be:
53
+ - A string model identifier (e.g., "minishlab/potion-base-32M") for chonkie's built-in models
54
+ - A chonkie BaseEmbeddings instance (used directly)
55
+ - An Agno Embedder (wrapped for chonkie compatibility)
56
+ chunk_size: Maximum tokens allowed per chunk.
57
+ similarity_threshold: Threshold for semantic similarity (0-1).
58
+ similarity_window: Number of sentences to consider for similarity calculation.
59
+ min_sentences_per_chunk: Minimum number of sentences per chunk.
60
+ min_characters_per_sentence: Minimum number of characters per sentence.
61
+ delimiters: Delimiters to use for sentence splitting.
62
+ include_delimiters: Whether to include delimiter in prev/next sentence or None.
63
+ skip_window: Number of groups to skip when merging (0=disabled).
64
+ filter_window: Window length for the Savitzky-Golay filter.
65
+ filter_polyorder: Polynomial order for the Savitzky-Golay filter.
66
+ filter_tolerance: Tolerance for the Savitzky-Golay filter.
67
+ chunker_params: Additional parameters to pass to chonkie's SemanticChunker.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ embedder: Optional[Union[str, Embedder, BaseEmbeddings]] = None,
73
+ chunk_size: int = 5000,
74
+ similarity_threshold: float = 0.5,
75
+ similarity_window: int = 3,
76
+ min_sentences_per_chunk: int = 1,
77
+ min_characters_per_sentence: int = 24,
78
+ delimiters: Optional[List[str]] = None,
79
+ include_delimiters: Literal["prev", "next", None] = "prev",
80
+ skip_window: int = 0,
81
+ filter_window: int = 5,
82
+ filter_polyorder: int = 3,
83
+ filter_tolerance: float = 0.2,
84
+ chunker_params: Optional[Dict[str, Any]] = None,
85
+ ):
14
86
  if embedder is None:
15
87
  from agno.knowledge.embedder.openai import OpenAIEmbedder
16
88
 
@@ -19,50 +91,55 @@ class SemanticChunking(ChunkingStrategy):
19
91
  self.embedder = embedder
20
92
  self.chunk_size = chunk_size
21
93
  self.similarity_threshold = similarity_threshold
22
- self.chunker = None # Will be initialized lazily when needed
94
+ self.similarity_window = similarity_window
95
+ self.min_sentences_per_chunk = min_sentences_per_chunk
96
+ self.min_characters_per_sentence = min_characters_per_sentence
97
+ self.delimiters = delimiters if delimiters is not None else [". ", "! ", "? ", "\n"]
98
+ self.include_delimiters = include_delimiters
99
+ self.skip_window = skip_window
100
+ self.filter_window = filter_window
101
+ self.filter_polyorder = filter_polyorder
102
+ self.filter_tolerance = filter_tolerance
103
+ self.chunker_params = chunker_params
104
+ self.chunker: Optional[SemanticChunker] = None
23
105
 
24
106
  def _initialize_chunker(self):
25
107
  """Lazily initialize the chunker with chonkie dependency."""
26
- if self.chunker is None:
27
- try:
28
- from chonkie import SemanticChunker
29
- except ImportError:
30
- raise ImportError(
31
- "`chonkie` is required for semantic chunking. "
32
- "Please install it using `pip install chonkie` to use SemanticChunking."
33
- )
34
-
35
- # Build arguments dynamically based on chonkie's supported signature
36
- params: Dict[str, Any] = {
37
- "chunk_size": self.chunk_size,
38
- "threshold": self.similarity_threshold,
39
- }
40
-
41
- try:
42
- sig = inspect.signature(SemanticChunker)
43
- param_names = set(sig.parameters.keys())
44
-
45
- # Prefer passing a callable to avoid Chonkie initializing its own client
46
- if "embedding_fn" in param_names:
47
- params["embedding_fn"] = self.embedder.get_embedding # type: ignore[attr-defined]
48
- # If chonkie allows specifying dimensions, provide them
49
- if "embedding_dimensions" in param_names and getattr(self.embedder, "dimensions", None):
50
- params["embedding_dimensions"] = self.embedder.dimensions # type: ignore[attr-defined]
51
- elif "embedder" in param_names:
52
- # Some versions may accept an embedder object directly
53
- params["embedder"] = self.embedder
54
- else:
55
- # Fallback to model id
56
- params["embedding_model"] = getattr(self.embedder, "id", None) or "text-embedding-3-small"
57
-
58
- self.chunker = SemanticChunker(**params) # type: ignore
59
- except Exception:
60
- # As a final fallback, use the original behavior
61
- self.chunker = SemanticChunker( # type: ignore
62
- embedding_model=getattr(self.embedder, "id", None) or "text-embedding-3-small",
63
- chunk_size=self.chunk_size,
64
- threshold=self.similarity_threshold,
65
- )
108
+ if self.chunker is not None:
109
+ return
110
+
111
+ # Determine embedding model based on type:
112
+ # - str: pass directly to chonkie (uses chonkie's built-in models)
113
+ # - BaseEmbeddings: pass directly to chonkie
114
+ # - Agno Embedder: wrap for chonkie compatibility
115
+ embedding_model: Union[str, BaseEmbeddings]
116
+ if isinstance(self.embedder, str):
117
+ embedding_model = self.embedder
118
+ elif isinstance(self.embedder, BaseEmbeddings):
119
+ embedding_model = self.embedder
120
+ elif isinstance(self.embedder, Embedder):
121
+ embedding_model = _get_chonkie_embedder_wrapper(self.embedder)
122
+ else:
123
+ raise ValueError("Invalid embedder type. Must be a string, BaseEmbeddings, or Embedder instance.")
124
+
125
+ _chunker_params: Dict[str, Any] = {
126
+ "embedding_model": embedding_model,
127
+ "chunk_size": self.chunk_size,
128
+ "threshold": self.similarity_threshold,
129
+ "similarity_window": self.similarity_window,
130
+ "min_sentences_per_chunk": self.min_sentences_per_chunk,
131
+ "min_characters_per_sentence": self.min_characters_per_sentence,
132
+ "delim": self.delimiters,
133
+ "include_delim": self.include_delimiters,
134
+ "skip_window": self.skip_window,
135
+ "filter_window": self.filter_window,
136
+ "filter_polyorder": self.filter_polyorder,
137
+ "filter_tolerance": self.filter_tolerance,
138
+ }
139
+ if self.chunker_params:
140
+ _chunker_params.update(self.chunker_params)
141
+
142
+ self.chunker = SemanticChunker(**_chunker_params)
66
143
 
67
144
  def chunk(self, document: Document) -> List[Document]:
68
145
  """Split document into semantic chunks using chonkie"""
@@ -47,17 +47,16 @@ class CSVReader(Reader):
47
47
  if not file.exists():
48
48
  raise FileNotFoundError(f"Could not find file: {file}")
49
49
  log_debug(f"Reading: {file}")
50
- file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
50
+ csv_name = name or file.stem
51
+ file_content: Union[io.TextIOWrapper, io.StringIO] = file.open(
52
+ newline="", mode="r", encoding=self.encoding or "utf-8"
53
+ )
51
54
  else:
52
- log_debug(f"Reading retrieved file: {name or file.name}")
55
+ log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
56
+ csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
53
57
  file.seek(0)
54
- file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
58
+ file_content = io.StringIO(file.read().decode("utf-8"))
55
59
 
56
- csv_name = name or (
57
- Path(file.name).stem
58
- if isinstance(file, Path)
59
- else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
60
- )
61
60
  csv_content = ""
62
61
  with file_content as csvfile:
63
62
  csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
@@ -109,16 +108,12 @@ class CSVReader(Reader):
109
108
  async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
110
109
  content = await file_content.read()
111
110
  file_content_io = io.StringIO(content)
111
+ csv_name = name or file.stem
112
112
  else:
113
113
  log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
114
114
  file.seek(0)
115
115
  file_content_io = io.StringIO(file.read().decode("utf-8"))
116
-
117
- csv_name = name or (
118
- Path(file.name).stem
119
- if isinstance(file, Path)
120
- else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
121
- )
116
+ csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
122
117
 
123
118
  file_content_io.seek(0)
124
119
  csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
@@ -47,11 +47,9 @@ class DocxReader(Reader):
47
47
  docx_document = DocxDocument(str(file))
48
48
  doc_name = name or file.stem
49
49
  else:
50
- log_debug(f"Reading uploaded file: {getattr(file, 'name', 'docx_file')}")
50
+ log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
51
51
  docx_document = DocxDocument(file)
52
- doc_name = name or (
53
- getattr(file, "name", "docx_file").split(".")[0] if hasattr(file, "name") else "docx_file"
54
- )
52
+ doc_name = name or getattr(file, "name", "docx_file").split(".")[0]
55
53
 
56
54
  doc_content = "\n\n".join([para.text for para in docx_document.paragraphs])
57
55
 
@@ -106,17 +106,15 @@ class FieldLabeledCSVReader(Reader):
106
106
  if not file.exists():
107
107
  raise FileNotFoundError(f"Could not find file: {file}")
108
108
  log_debug(f"Reading: {file}")
109
- file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
109
+ csv_name = name or file.stem
110
+ file_content: Union[io.TextIOWrapper, io.StringIO] = file.open(
111
+ newline="", mode="r", encoding=self.encoding or "utf-8"
112
+ )
110
113
  else:
111
- log_debug(f"Reading retrieved file: {name or file.name}")
114
+ log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
115
+ csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
112
116
  file.seek(0)
113
- file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
114
-
115
- csv_name = name or (
116
- Path(file.name).stem
117
- if isinstance(file, Path)
118
- else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
119
- )
117
+ file_content = io.StringIO(file.read().decode("utf-8"))
120
118
 
121
119
  documents = []
122
120
 
@@ -189,16 +187,12 @@ class FieldLabeledCSVReader(Reader):
189
187
  async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
190
188
  content = await file_content.read()
191
189
  file_content_io = io.StringIO(content)
190
+ csv_name = name or file.stem
192
191
  else:
193
- log_debug(f"Reading retrieved file async: {name or file.name}")
192
+ log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
193
+ csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
194
194
  file.seek(0)
195
- file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
196
-
197
- csv_name = name or (
198
- Path(file.name).stem
199
- if isinstance(file, Path)
200
- else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
201
- )
195
+ file_content_io = io.StringIO(file.read().decode("utf-8"))
202
196
 
203
197
  file_content_io.seek(0)
204
198
  csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
@@ -1,6 +1,5 @@
1
1
  import asyncio
2
2
  import json
3
- from io import BytesIO
4
3
  from pathlib import Path
5
4
  from typing import IO, Any, List, Optional, Union
6
5
  from uuid import uuid4
@@ -42,17 +41,15 @@ class JSONReader(Reader):
42
41
  if not path.exists():
43
42
  raise FileNotFoundError(f"Could not find file: {path}")
44
43
  log_debug(f"Reading: {path}")
45
- json_name = name or path.name.split(".")[0]
46
- json_contents = json.loads(path.read_text(self.encoding or "utf-8"))
47
-
48
- elif isinstance(path, BytesIO):
49
- json_name = name or path.name.split(".")[0]
50
- log_debug(f"Reading uploaded file: {json_name}")
44
+ json_name = name or path.stem
45
+ json_contents = json.loads(path.read_text(encoding=self.encoding or "utf-8"))
46
+ elif hasattr(path, "seek") and hasattr(path, "read"):
47
+ log_debug(f"Reading uploaded file: {getattr(path, 'name', 'BytesIO')}")
48
+ json_name = name or getattr(path, "name", "json_file").split(".")[0]
51
49
  path.seek(0)
52
50
  json_contents = json.load(path)
53
-
54
51
  else:
55
- raise ValueError("Unsupported file type. Must be Path or BytesIO.")
52
+ raise ValueError("Unsupported file type. Must be Path or file-like object.")
56
53
 
57
54
  if isinstance(json_contents, dict):
58
55
  json_contents = [json_contents]
@@ -72,17 +69,12 @@ class JSONReader(Reader):
72
69
  chunked_documents.extend(self.chunk_document(document))
73
70
  return chunked_documents
74
71
  return documents
72
+ except (FileNotFoundError, ValueError, json.JSONDecodeError):
73
+ raise
75
74
  except Exception as e:
76
75
  log_error(f"Error reading: {path}: {e}")
77
76
  raise
78
77
 
79
78
  async def async_read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
80
- """Asynchronously read JSON files.
81
-
82
- Args:
83
- path (Union[Path, IO[Any]]): Path to a JSON file or a file-like object
84
-
85
- Returns:
86
- List[Document]: List of documents from the JSON file
87
- """
79
+ """Asynchronously read JSON files."""
88
80
  return await asyncio.to_thread(self.read, path, name)
@@ -69,8 +69,8 @@ class MarkdownReader(Reader):
69
69
  file_name = name or file.stem
70
70
  file_contents = file.read_text(encoding=self.encoding or "utf-8")
71
71
  else:
72
- log_debug(f"Reading uploaded file: {file.name}")
73
- file_name = name or file.name.split(".")[0]
72
+ log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
73
+ file_name = name or getattr(file, "name", "file").split(".")[0]
74
74
  file.seek(0)
75
75
  file_contents = file.read().decode(self.encoding or "utf-8")
76
76
 
@@ -101,16 +101,16 @@ class MarkdownReader(Reader):
101
101
  file_contents = await f.read()
102
102
  except ImportError:
103
103
  log_warning("aiofiles not installed, using synchronous file I/O")
104
- file_contents = file.read_text(self.encoding or "utf-8")
104
+ file_contents = file.read_text(encoding=self.encoding or "utf-8")
105
105
  else:
106
- log_debug(f"Reading uploaded file asynchronously: {file.name}")
107
- file_name = name or file.name.split(".")[0]
106
+ log_debug(f"Reading uploaded file asynchronously: {getattr(file, 'name', 'BytesIO')}")
107
+ file_name = name or getattr(file, "name", "file").split(".")[0]
108
108
  file.seek(0)
109
109
  file_contents = file.read().decode(self.encoding or "utf-8")
110
110
 
111
111
  document = Document(
112
112
  name=file_name,
113
- id=str(uuid.uuid4()), # Fixed an issue with the id creation
113
+ id=str(uuid.uuid4()),
114
114
  content=file_contents,
115
115
  )
116
116
 
@@ -218,16 +218,13 @@ class BasePDFReader(Reader):
218
218
 
219
219
  def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
220
220
  """Determines the document name from the source or a provided name."""
221
- try:
222
- if name:
223
- return name
224
- if isinstance(pdf_source, str):
225
- return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
226
- # Assumes a file-like object with a .name attribute
227
- return pdf_source.name.split(".")[0]
228
- except Exception:
229
- # The original code had a bug here, it should check `name` first.
230
- return name or "pdf"
221
+ if name:
222
+ return name
223
+ if isinstance(pdf_source, str):
224
+ return Path(pdf_source).stem.replace(" ", "_")
225
+ if isinstance(pdf_source, Path):
226
+ return pdf_source.stem.replace(" ", "_")
227
+ return getattr(pdf_source, "name", "pdf_file").split(".")[0].replace(" ", "_")
231
228
 
232
229
  def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
233
230
  if not doc_reader.is_encrypted:
@@ -341,8 +338,14 @@ class PDFReader(BasePDFReader):
341
338
  return [ContentType.PDF]
342
339
 
343
340
  def read(
344
- self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
341
+ self,
342
+ pdf: Optional[Union[str, Path, IO[Any]]] = None,
343
+ name: Optional[str] = None,
344
+ password: Optional[str] = None,
345
345
  ) -> List[Document]:
346
+ if pdf is None:
347
+ log_error("No pdf provided")
348
+ return []
346
349
  doc_name = self._get_doc_name(pdf, name)
347
350
  log_debug(f"Reading: {doc_name}")
348
351