PyPI - meshagent-agents - Versions diffs - 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl - Mend

meshagent-agents 0.0.37py3-none-any.whl → 0.0.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of meshagent-agents might be problematic. Click here for more details.

Files changed (32) hide show

meshagent/agents/__init__.py +27 -2
meshagent/agents/adapter.py +18 -9
meshagent/agents/agent.py +317 -214
meshagent/agents/chat.py +392 -267
meshagent/agents/context.py +58 -30
meshagent/agents/development.py +11 -13
meshagent/agents/hosting.py +109 -46
meshagent/agents/indexer.py +241 -224
meshagent/agents/listener.py +55 -52
meshagent/agents/mail.py +145 -109
meshagent/agents/planning.py +294 -199
meshagent/agents/prompt.py +14 -12
meshagent/agents/pydantic.py +98 -61
meshagent/agents/schemas/__init__.py +11 -0
meshagent/agents/schemas/document.py +32 -21
meshagent/agents/schemas/gallery.py +23 -14
meshagent/agents/schemas/presentation.py +33 -17
meshagent/agents/schemas/schema.py +99 -45
meshagent/agents/schemas/super_editor_document.py +52 -46
meshagent/agents/single_shot_writer.py +37 -31
meshagent/agents/thread_schema.py +74 -32
meshagent/agents/utils.py +20 -12
meshagent/agents/version.py +1 -1
meshagent/agents/worker.py +48 -28
meshagent/agents/writer.py +36 -23
meshagent_agents-0.0.39.dist-info/METADATA +64 -0
meshagent_agents-0.0.39.dist-info/RECORD +30 -0
meshagent_agents-0.0.37.dist-info/METADATA +0 -36
meshagent_agents-0.0.37.dist-info/RECORD +0 -30
{meshagent_agents-0.0.37.dist-info → meshagent_agents-0.0.39.dist-info}/WHEEL +0 -0
{meshagent_agents-0.0.37.dist-info → meshagent_agents-0.0.39.dist-info}/licenses/LICENSE +0 -0
{meshagent_agents-0.0.37.dist-info → meshagent_agents-0.0.39.dist-info}/top_level.txt +0 -0

meshagent/agents/indexer.py CHANGED Viewed

@@ -1,6 +1,11 @@
 from meshagent.agents import TaskRunner, RequiredToolkit, SingleRoomAgent
 from meshagent.tools import Toolkit, Tool, ToolContext
-from meshagent.api.room_server_client import TextDataType, VectorDataType, FloatDataType, IntDataType
+from meshagent.api.room_server_client import (
+    TextDataType,
+    VectorDataType,
+    FloatDataType,
+    IntDataType,
+)
 from openai import AsyncOpenAI
 from typing import Optional
 from meshagent.api.chan import Chan
@@ -16,7 +21,6 @@ import os
 # TODO: install chonkie, chonkie[semantic], openai
-from concurrent.futures import ThreadPoolExecutor, as_completed
 def _async_debounce(wait):
     def decorator(func):
@@ -40,6 +44,7 @@ def _async_debounce(wait):
     return decorator
 logger = logging.getLogger("indexer")
@@ -49,124 +54,141 @@ class Chunk:
         self.start = start
         self.end = end
-class Chunker:
-    async def chunk(self, *, text: str, max_length: Optional[int] = None) -> list[Chunk]:
+class Chunker:
+    async def chunk(
+        self, *, text: str, max_length: Optional[int] = None
+    ) -> list[Chunk]:
         pass
 class ChonkieChunker(Chunker):
     def __init__(self, chunker: Optional[chonkie.BaseChunker] = None):
         super().__init__()
-        if chunker == None:
+        if chunker is None:
             chunker = chonkie.SemanticChunker()
         self._chunker = chunker
-    async def chunk(self, *, text: str, max_length: Optional[int] = None) -> list[Chunk]:
+    async def chunk(
+        self, *, text: str, max_length: Optional[int] = None
+    ) -> list[Chunk]:
         chunks = await asyncio.to_thread(self._chunker.chunk, text=text)
         mapped = []
         for chunk in chunks:
-            mapped.append(Chunk(text=chunk.text, start=chunk.start_index, end=chunk.end_index))
+            mapped.append(
+                Chunk(text=chunk.text, start=chunk.start_index, end=chunk.end_index)
+            )
         return mapped
 class Embedder:
     def __init__(self, *, size: int, max_length: int):
         self.size = size
         self.max_length = max_length
     async def embed(self, *, text: str) -> list[float]:
         pass
 class OpenAIEmbedder(Embedder):
-    def __init__(self, *, size: int, max_length: int, model: str,  openai: Optional[AsyncOpenAI] = None, ):
-        if openai == None:
+    def __init__(
+        self,
+        *,
+        size: int,
+        max_length: int,
+        model: str,
+        openai: Optional[AsyncOpenAI] = None,
+    ):
+        if openai is None:
             openai = AsyncOpenAI()
         self._openai = openai
         self._model = model
         super().__init__(size=size, max_length=max_length)
     async def embed(self, *, text):
-        return (await self._openai.embeddings.create(input=text, model=self._model, encoding_format="float")).data[0].embedding
+        return (
+            (
+                await self._openai.embeddings.create(
+                    input=text, model=self._model, encoding_format="float"
+                )
+            )
+            .data[0]
+            .embedding
+        )
 class RagTool(Tool):
-    def __init__(self, *, name = "rag_search", table: str, title = "RAG search", description = "perform a RAG search", rules = None, thumbnail_url = None, embedder: Optional[Embedder] = None):
+    def __init__(
+        self,
+        *,
+        name="rag_search",
+        table: str,
+        title="RAG search",
+        description="perform a RAG search",
+        rules=None,
+        thumbnail_url=None,
+        embedder: Optional[Embedder] = None,
+    ):
         self.table = table
         super().__init__(
             name=name,
             input_schema={
-                "type":"object",
-                "additionalProperties" : False,
-                "required" : [
-                    "query"
-                ],
-                "properties" : {
-                    "query" : {
-                        "type" : "string"
-                    }
-                }
+                "type": "object",
+                "additionalProperties": False,
+                "required": ["query"],
+                "properties": {"query": {"type": "string"}},
             },
             title=title,
             description=description,
-            rules=rules, thumbnail_url=thumbnail_url)
+            rules=rules,
+            thumbnail_url=thumbnail_url,
+        )
         self._embedder = embedder
     async def execute(self, context: ToolContext, query: str):
-        if self._embedder == None:
+        if self._embedder is None:
             results = await context.room.database.search(
-                table=self.table,
-                text=query,
-                limit=10
+                table=self.table, text=query, limit=10
             )
         else:
             embedding = await self._embedder.embed(text=query)
             results = await context.room.database.search(
-                table=self.table,
-                text=query,
-                vector=embedding,
-                limit=10
+                table=self.table, text=query, vector=embedding, limit=10
             )
-        results = list(map(lambda r: f"from {r["url"]}: {r["text"]}", results))
+        results = list(map(lambda r: f"from {r['url']}: {r['text']}", results))
+        return {"results": results}
-        return {
-            "results" : results
-        }
 def open_ai_embedding_3_small():
     return OpenAIEmbedder(model="text-embedding-3-small", max_length=8191, size=1536)
 def open_ai_embedding_3_large():
     return OpenAIEmbedder(model="text-embedding-3-large", max_length=8191, size=3072)
 def open_ai_embedding_ada_2():
     return OpenAIEmbedder(model="text-embedding-ada-002", max_length=8191, size=1536)
 class RagToolkit(Toolkit):
-    def __init__(self, table: str, embedder:Optional[Embedder] = None):
-        if embedder == None:
+    def __init__(self, table: str, embedder: Optional[Embedder] = None):
+        if embedder is None:
             embedder = open_ai_embedding_3_large()
         super().__init__(
             name="meshagent.rag",
             title="RAG",
             description="Searches against an index",
-            tools=[
-                RagTool(table=table, embedder=embedder)
-            ]
+            tools=[RagTool(table=table, embedder=embedder)],
         )
@@ -175,8 +197,8 @@ class FileIndexEvent:
         self.path = path
         self.deleted = deleted
-class StorageIndexer(SingleRoomAgent):
+class StorageIndexer(SingleRoomAgent):
     def __init__(
         self,
         *,
@@ -184,19 +206,25 @@ class StorageIndexer(SingleRoomAgent):
         title=None,
         description=None,
         requires=None,
-        labels = None,
+        labels=None,
         chunker: Optional[Chunker] = None,
-        embedder:Optional[Embedder] = None,
+        embedder: Optional[Embedder] = None,
         table: str = "storage_index",
-      ):
-        super().__init__(name=name, title=title, description=description, requires=requires, labels=labels)
+    ):
+        super().__init__(
+            name=name,
+            title=title,
+            description=description,
+            requires=requires,
+            labels=labels,
+        )
         self._chan = Chan[FileIndexEvent]()
-        if chunker == None:
+        if chunker is None:
             chunker = ChonkieChunker()
-        if embedder == None:
+        if embedder is None:
             embedder = open_ai_embedding_3_large()
         self.chunker = chunker
@@ -207,46 +235,47 @@ class StorageIndexer(SingleRoomAgent):
     async def read_file(self, *, path: str) -> str | None:
         pass
     @_async_debounce(10)
     async def refresh_index(self):
         self.room.developer.log_nowait(type="indexer.rebuild", data={})
         indexes = await self.room.database.list_indexes(table=self.table)
         logger.info(f"existing indexes {indexes}")
-        for index in indexes:
+        for index in indexes:
             if "embedding" in index["columns"]:
                 self._vector_index_created = True
             if "text" in index["columns"]:
-                self._fts_created  = True
+                self._fts_created = True
-        if self._vector_index_created == False:
+        if not self._vector_index_created:
             try:
                 logger.info("attempting to create embedding index")
-                await self.room.database.create_vector_index(table=self.table, column="embedding", replace=False)
+                await self.room.database.create_vector_index(
+                    table=self.table, column="embedding", replace=False
+                )
                 self._vector_index_created = True
-            except Exception as e:
+            except Exception:
                 # Will fail if there aren't enough rows
                 pass
-        if self._fts_created == False:
+        if not self._fts_created:
             try:
                 logger.info("attempting to create fts index")
-                await self.room.database.create_full_text_search_index(table=self.table, column="text", replace=False)
+                await self.room.database.create_full_text_search_index(
+                    table=self.table, column="text", replace=False
+                )
                 self._fts_created = True
-            except Exception as e:
+            except Exception:
                 # Will fail if there aren't enough rows
                 pass
-        if self._fts_created == True or self._vector_index_created == True:
+        if self._fts_created or self._vector_index_created:
             logger.info("optimizing existing index")
             await self.room.database.optimize(table=self.table)
     async def start(self, *, room):
         await super().start(room=room)
@@ -257,74 +286,72 @@ class StorageIndexer(SingleRoomAgent):
         await room.database.create_table_with_schema(
             name=self.table,
             schema={
-                "url" : TextDataType(),
-                "text" : TextDataType(),
-                "embedding" : VectorDataType(
-                    size=self.embedder.size,
-                    element_type=FloatDataType()
+                "url": TextDataType(),
+                "text": TextDataType(),
+                "embedding": VectorDataType(
+                    size=self.embedder.size, element_type=FloatDataType()
                 ),
-                "sha" : TextDataType(),
+                "sha": TextDataType(),
             },
             mode="create_if_not_exists",
-            data=None
+            data=None,
         )
         def index_task(task: asyncio.Task):
             try:
-                result = task.result()
+                task.result()
             except Exception as e:
                 logger.error("Index task failed", exc_info=e)
         self._index_task = asyncio.create_task(self._indexer())
         self._index_task.add_done_callback(index_task)
     async def stop(self):
         await super().stop()
         await self._chan.close()
-    async def _indexer(self):
+    async def _indexer(self):
         async for e in self._chan:
             try:
                 if e.deleted:
                     # todo: consider using sql_alchemy or a library to do the escaping
                     def escape_sql_string(value):
                         if not isinstance(value, str):
                             raise TypeError("Input must be a string")
                         return value.replace("'", "''")
-                    self.room.developer.log_nowait(type="indexer.delete", data={"path": e.path})
-                    await self.room.database.delete(table=self.table, where=f"url='{escape_sql_string(e.path)}'")
+                    self.room.developer.log_nowait(
+                        type="indexer.delete", data={"path": e.path}
+                    )
+                    await self.room.database.delete(
+                        table=self.table, where=f"url='{escape_sql_string(e.path)}'"
+                    )
                 else:
-                    self.room.developer.log_nowait(type="indexer.index", data={"path": e.path})
+                    self.room.developer.log_nowait(
+                        type="indexer.index", data={"path": e.path}
+                    )
                     async def lookup_or_embed(*, sha: str, text: str) -> list[float]:
                         # if we already indexed this chunk, lets use the existing embedding instead of generating a new one
                         results = await self.room.database.search(
                             table=self.table,
                             where={
-                                "sha" : sha,
+                                "sha": sha,
                             },
-                            limit=1
+                            limit=1,
                         )
                         if len(results) != 0:
-                            logger.info(f"chunk found from {e.path} {sha}, reusing embedding")
+                            logger.info(
+                                f"chunk found from {e.path} {sha}, reusing embedding"
+                            )
                             return results[0]["embedding"]
-                        logger.info(f"chunk not found from {e.path} {sha}, generating embedding")
+                        logger.info(
+                            f"chunk not found from {e.path} {sha}, generating embedding"
+                        )
                         return await self.embedder.embed(text=text)
                     basename = os.path.basename(e.path)
@@ -335,67 +362,69 @@ class StorageIndexer(SingleRoomAgent):
                     # let's make the filename it's own chunk
                     rows.append(
                         {
-                            "url" : e.path,
-                            "text" : basename,
-                            "sha" : chunk_sha,
-                            "embedding" : await lookup_or_embed(sha=chunk_sha, text=basename)
+                            "url": e.path,
+                            "text": basename,
+                            "sha": chunk_sha,
+                            "embedding": await lookup_or_embed(
+                                sha=chunk_sha, text=basename
+                            ),
                         }
                     )
                     text = await self.read_file(path=e.path)
-                    if text != None:
+                    if text is not None:
                         # the content will be transformed into additional chunks
-                        for chunk in await self.chunker.chunk(text=text, max_length = self.embedder.max_length):
-                            logger.info(f"processing chunk from {e.path}: {chunk.start}")
-                            chunk_sha = hashlib.sha256(chunk.text.encode("utf-8")).hexdigest()
+                        for chunk in await self.chunker.chunk(
+                            text=text, max_length=self.embedder.max_length
+                        ):
+                            logger.info(
+                                f"processing chunk from {e.path}: {chunk.start}"
+                            )
+                            chunk_sha = hashlib.sha256(
+                                chunk.text.encode("utf-8")
+                            ).hexdigest()
                             rows.append(
                                 {
-                                    "url" : e.path,
-                                    "text" : chunk.text,
-                                    "embedding" : await lookup_or_embed(sha=chunk_sha, text=chunk.text),
-                                    "sha" : chunk_sha,
+                                    "url": e.path,
+                                    "text": chunk.text,
+                                    "embedding": await lookup_or_embed(
+                                        sha=chunk_sha, text=chunk.text
+                                    ),
+                                    "sha": chunk_sha,
                                 }
                             )
-                    await self.room.database.merge(table=self.table, on="sha", records=rows)
+                    await self.room.database.merge(
+                        table=self.table, on="sha", records=rows
+                    )
                     await self.refresh_index()
             except Exception as e:
                 logger.error("error while indexing", exc_info=e)
     def _on_file_deleted(self, path: str, participant_id: str):
         self._chan.send_nowait(FileIndexEvent(path=path, deleted=True))
     def _on_file_updated(self, path: str, participant_id: str):
         self._chan.send_nowait(FileIndexEvent(path=path, deleted=False))
 class SiteIndexer(TaskRunner):
-    def __init__(self,
-         *,
+    def __init__(
+        self,
+        *,
         name,
         chunker: Optional[Chunker] = None,
-        embedder:Optional[Embedder] = None,
+        embedder: Optional[Embedder] = None,
         title=None,
         description=None,
         requires=None,
-        supports_tools = None,
-        labels: Optional[list[str]] = None
+        supports_tools=None,
+        labels: Optional[list[str]] = None,
     ):
-        if chunker == None:
+        if chunker is None:
             chunker = ChonkieChunker()
-        if embedder == None:
+        if embedder is None:
             embedder = open_ai_embedding_3_large()
         self.chunker = chunker
@@ -406,47 +435,29 @@ class SiteIndexer(TaskRunner):
             title=title,
             description=description,
             requires=[
-                RequiredToolkit(
-                    name="meshagent.firecrawl",
-                    tools=[
-                        "firecrawl_queue"
-                    ]
-                ),
+                RequiredToolkit(name="meshagent.firecrawl", tools=["firecrawl_queue"]),
             ],
             supports_tools=supports_tools,
             input_schema={
-                "type" : "object",
-                "required" : [
-                    "queue", "table", "url"
-                ],
-                "additionalProperties" : False,
-                "properties" : {
-                    "queue" : {
-                        "type" : "string",
-                        "description" : "default: firecrawl"
-                    },
-                    "table" : {
-                        "type" : "string",
-                        "description" : "default: index"
-                    },
-                    "url" : {
-                        "type" : "string",
-                        "description" : "default: index"
-                    }
-                }
+                "type": "object",
+                "required": ["queue", "table", "url"],
+                "additionalProperties": False,
+                "properties": {
+                    "queue": {"type": "string", "description": "default: firecrawl"},
+                    "table": {"type": "string", "description": "default: index"},
+                    "url": {"type": "string", "description": "default: index"},
+                },
             },
             output_schema={
-                "type" : "object",
-                "required" : [],
-                "additionalProperties" : False,
-                "properties" : {},
+                "type": "object",
+                "required": [],
+                "additionalProperties": False,
+                "properties": {},
             },
-            labels=labels
+            labels=labels,
         )
     async def ask(self, *, context, arguments):
         queue = arguments["queue"]
         table = arguments["table"]
         url = arguments["url"]
@@ -459,129 +470,135 @@ class SiteIndexer(TaskRunner):
         except ValueError:
             pass
         async def lookup_or_embed(*, sha: str, text: str) -> list[float]:
             # if we already indexed this chunk, lets use the existing embedding instead of generating a new one
             if exists:
                 results = await self.room.database.search(
                     table=self.table,
                     where={
-                        "sha" : sha,
+                        "sha": sha,
                     },
-                    limit=1
+                    limit=1,
                 )
                 if len(results) != 0:
                     logger.info(f"chunk found from {url} {sha}, reusing embedding")
                     return results[0]["embedding"]
             logger.info(f"chunk not found from {url} {sha}, generating embedding")
             return await self.embedder.embed(text=text)
         async def crawl():
             logger.info(f"starting to crawl: {url}")
             await context.room.agents.invoke_tool(
                 toolkit="meshagent.firecrawl",
                 tool="firecrawl_queue",
-                arguments={
-                    "url" : url,
-                    "queue": queue,
-                    "limit" : 100
-                })
+                arguments={"url": url, "queue": queue, "limit": 100},
+            )
             logger.info(f"done with crawl: {url}")
-            await context.room.queues.send(name=queue, message={ "done" : True })
+            await context.room.queues.send(name=queue, message={"done": True})
         def crawl_done(task: asyncio.Task):
             try:
                 task.result()
             except Exception as e:
                 logger.error("crawl failed", exc_info=e)
         crawl_task = asyncio.create_task(crawl())
         crawl_task.add_done_callback(crawl_done)
         rows = []
         id = 0
         while True:
-            message = await context.room.queues.receive(name=queue, create=True, wait=True)
-            if message == None:
+            message = await context.room.queues.receive(
+                name=queue, create=True, wait=True
+            )
+            if message is None:
                 break
             if message.get("type", None) == "crawl.completed":
                 break
             if "data" in message:
                 for data in message["data"]:
                     try:
-                        url : str  = data["metadata"]["url"]
-                        text : str = data["markdown"]
-                        title : str  = data["metadata"]["title"]
-                        title_sha : str  = hashlib.sha256(text.encode("utf-8")).hexdigest()
+                        url: str = data["metadata"]["url"]
+                        text: str = data["markdown"]
+                        title: str = data["metadata"]["title"]
+                        title_sha: str = hashlib.sha256(
+                            text.encode("utf-8")
+                        ).hexdigest()
                         logger.info(f"processing crawled page: {url}")
                         # let's make the title it's own chunk
                         rows.append(
-                                {
-                                    "id" : id,
-                                    "url" : url,
-                                    "text" : title,
-                                    "sha" : title_sha,
-                                    "embedding" : await lookup_or_embed(sha=title_sha, text=title)
-                                }
-                            )
+                            {
+                                "id": id,
+                                "url": url,
+                                "text": title,
+                                "sha": title_sha,
+                                "embedding": await lookup_or_embed(
+                                    sha=title_sha, text=title
+                                ),
+                            }
+                        )
                         id = id + 1
                         # the content will be transformed into additional chunks
-                        for chunk in await self.chunker.chunk(text=text, max_length = self.embedder.max_length):
+                        for chunk in await self.chunker.chunk(
+                            text=text, max_length=self.embedder.max_length
+                        ):
                             logger.info(f"processing chunk from {url}: {chunk.text}")
-                            chunk_sha = hashlib.sha256(chunk.text.encode("utf-8")).hexdigest()
+                            chunk_sha = hashlib.sha256(
+                                chunk.text.encode("utf-8")
+                            ).hexdigest()
                             rows.append(
                                 {
-                                    "id" : id,
-                                    "url" : url,
-                                    "text" : chunk.text,
-                                    "embedding" : await lookup_or_embed(sha=chunk_sha, text=chunk.text)
+                                    "id": id,
+                                    "url": url,
+                                    "text": chunk.text,
+                                    "embedding": await lookup_or_embed(
+                                        sha=chunk_sha, text=chunk.text
+                                    ),
                                 }
                             )
                             id = id + 1
                     except Exception as e:
                         logger.error(f"failed to process: {url}", exc_info=e)
         logger.info(f"saving crawl: {url}")
         await context.room.database.create_table_with_schema(
             name=table,
             schema={
-                "id" : IntDataType(),
-                "url" : TextDataType(),
-                "text" : TextDataType(),
-                "embedding" : VectorDataType(
-                    size=self.embedder.size,
-                    element_type=FloatDataType()
+                "id": IntDataType(),
+                "url": TextDataType(),
+                "text": TextDataType(),
+                "embedding": VectorDataType(
+                    size=self.embedder.size, element_type=FloatDataType()
                 ),
-                "sha" : TextDataType(),
+                "sha": TextDataType(),
             },
             mode="overwrite",
-            data=rows
+            data=rows,
         )
         if len(rows) > 255:
-            await context.room.database.create_vector_index(table=table, column="embedding")
+            await context.room.database.create_vector_index(
+                table=table, column="embedding"
+            )
-        await context.room.database.create_full_text_search_index(table=table, column="text")
+        await context.room.database.create_full_text_search_index(
+            table=table, column="text"
+        )
         return {}

meshagent-agents 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl

Potentially problematic release.

meshagent-agents 0.0.37py3-none-any.whl → 0.0.39py3-none-any.whl