modaic 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modaic might be problematic. Click here for more details.
- {modaic-0.2.0/src/modaic.egg-info → modaic-0.4.0}/PKG-INFO +2 -2
- {modaic-0.2.0 → modaic-0.4.0}/pyproject.toml +3 -2
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/agents/rag_agent.py +5 -2
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/sql_database.py +49 -19
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/exceptions.py +9 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/module_utils.py +64 -26
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/precompiled.py +98 -5
- {modaic-0.2.0 → modaic-0.4.0/src/modaic.egg-info}/PKG-INFO +2 -2
- modaic-0.4.0/tests/test_auto.py +341 -0
- {modaic-0.2.0 → modaic-0.4.0}/tests/test_precompiled.py +40 -0
- modaic-0.2.0/tests/test_auto.py +0 -180
- {modaic-0.2.0 → modaic-0.4.0}/LICENSE +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/README.md +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/setup.cfg +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/__init__.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/agents/registry.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/auto.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/context/__init__.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/context/base.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/context/dtype_mapping.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/context/table.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/context/text.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/__init__.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/graph_database.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/__init__.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/benchmarks/baseline.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/benchmarks/common.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/benchmarks/fork.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/benchmarks/threaded.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/vector_database.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/vendors/milvus.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/vendors/mongodb.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/vendors/pinecone.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/databases/vector_database/vendors/qdrant.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/datasets.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/hub.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/indexing.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/observability.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/query_language.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/storage/__init__.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/storage/file_store.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/storage/pickle_store.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/types.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic/utils.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic.egg-info/SOURCES.txt +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic.egg-info/dependency_links.txt +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic.egg-info/requires.txt +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/src/modaic.egg-info/top_level.txt +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/tests/test_observability.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/tests/test_query_language.py +0 -0
- {modaic-0.2.0 → modaic-0.4.0}/tests/test_types.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modaic
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Modular Agent Infrastructure
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Modular Agent Infrastructure Collection, a python framework for managing and sharing DSPy agents
|
|
5
5
|
Author-email: Tyrin <tytodd@mit.edu>, Farouk <farouk@modaic.dev>
|
|
6
6
|
License: MIT License
|
|
7
7
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "modaic"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "Modular Agent Infrastructure
|
|
3
|
+
version = "0.4.0"
|
|
4
|
+
description = "Modular Agent Infrastructure Collection, a python framework for managing and sharing DSPy agents"
|
|
5
5
|
authors = [{ name = "Tyrin", email = "tytodd@mit.edu" }, {name = "Farouk", email = "farouk@modaic.dev"}]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
license = {file = "LICENSE"}
|
|
@@ -87,4 +87,5 @@ members = [
|
|
|
87
87
|
"tests/artifacts/test_repos/nested_repo_2",
|
|
88
88
|
"tests/artifacts/test_repos/nested_repo_3",
|
|
89
89
|
"tests/artifacts/test_repos/multi_module_repo",
|
|
90
|
+
"tests/artifacts/test_repos/failing_repo",
|
|
90
91
|
]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
from modaic import Indexer, PrecompiledAgent, PrecompiledConfig
|
|
4
4
|
from modaic.context import Context
|
|
@@ -19,7 +19,10 @@ class RAGAgentConfig(PrecompiledConfig):
|
|
|
19
19
|
|
|
20
20
|
@builtin_indexer(agent_name)
|
|
21
21
|
class RAGIndexer(Indexer):
|
|
22
|
-
def
|
|
22
|
+
def __init__(self, config: RAGAgentConfig):
|
|
23
|
+
super().__init__(config)
|
|
24
|
+
|
|
25
|
+
def index(self, contents: Any):
|
|
23
26
|
pass
|
|
24
27
|
|
|
25
28
|
|
|
@@ -5,7 +5,19 @@ from typing import Any, Callable, Iterable, List, Literal, Optional, Tuple
|
|
|
5
5
|
from urllib.parse import urlencode
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from sqlalchemy import
|
|
8
|
+
from sqlalchemy import (
|
|
9
|
+
JSON,
|
|
10
|
+
Column,
|
|
11
|
+
CursorResult,
|
|
12
|
+
Index,
|
|
13
|
+
MetaData,
|
|
14
|
+
PrimaryKeyConstraint,
|
|
15
|
+
String,
|
|
16
|
+
Text,
|
|
17
|
+
create_engine,
|
|
18
|
+
inspect,
|
|
19
|
+
text,
|
|
20
|
+
)
|
|
9
21
|
from sqlalchemy import Table as SQLTable
|
|
10
22
|
from sqlalchemy.dialects import sqlite
|
|
11
23
|
from sqlalchemy.orm import sessionmaker
|
|
@@ -81,30 +93,43 @@ class SQLiteBackend(SQLDatabaseBackend):
|
|
|
81
93
|
|
|
82
94
|
|
|
83
95
|
class SQLDatabase:
|
|
96
|
+
METADATA_TABLE_NAME = "modaic_metadata"
|
|
97
|
+
|
|
84
98
|
def __init__(
|
|
85
99
|
self,
|
|
86
100
|
backend: SQLDatabaseBackend | str,
|
|
87
101
|
engine_kwargs: dict = None, # TODO: This may not be a smart idea, may want to enforce specific kwargs
|
|
88
102
|
session_kwargs: dict = None, # TODO: This may not be a smart idea, may want to enforce specific kwargs
|
|
103
|
+
track_metadata: bool = False,
|
|
89
104
|
):
|
|
90
105
|
self.url = backend.url if isinstance(backend, SQLDatabaseBackend) else backend
|
|
91
106
|
self.engine = create_engine(self.url, **(engine_kwargs or {}))
|
|
92
107
|
self.metadata = MetaData()
|
|
93
108
|
self.session = sessionmaker(bind=self.engine, **(session_kwargs or {}))
|
|
94
109
|
self.inspector = inspect(self.engine)
|
|
95
|
-
self.preparer =
|
|
110
|
+
self.preparer = self.engine.dialect.identifier_preparer
|
|
96
111
|
|
|
97
112
|
# Create metadata table to store table metadata
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
113
|
+
if track_metadata:
|
|
114
|
+
self._ensure_metadata_table()
|
|
115
|
+
self.metadata.reflect(bind=self.engine)
|
|
116
|
+
self.metadata_table: Optional[Table] = (
|
|
117
|
+
self.metadata.tables[self.METADATA_TABLE_NAME] if track_metadata else None
|
|
103
118
|
)
|
|
104
|
-
self.metadata.create_all(self.engine)
|
|
105
119
|
self.connection = None
|
|
106
120
|
self._in_transaction = False
|
|
107
121
|
|
|
122
|
+
def _ensure_metadata_table(self) -> None:
|
|
123
|
+
"""Create the metadata table if missing."""
|
|
124
|
+
if not self.inspector.has_table(self.METADATA_TABLE_NAME):
|
|
125
|
+
SQLTable(
|
|
126
|
+
self.METADATA_TABLE_NAME,
|
|
127
|
+
self.metadata,
|
|
128
|
+
Column("table_name", String(255), primary_key=True),
|
|
129
|
+
Column("metadata_json", Text),
|
|
130
|
+
)
|
|
131
|
+
self.metadata.create_all(self.engine)
|
|
132
|
+
|
|
108
133
|
def add_table(
|
|
109
134
|
self,
|
|
110
135
|
table: BaseTable,
|
|
@@ -115,17 +140,17 @@ class SQLDatabase:
|
|
|
115
140
|
with self.connect() as connection:
|
|
116
141
|
# Use the connection for to_sql to respect transaction context
|
|
117
142
|
table._df.to_sql(table.name, connection, if_exists=if_exists, index=False)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
143
|
+
if self.metadata_table is not None:
|
|
144
|
+
# Remove existing metadata for this table if it exists
|
|
145
|
+
connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == table.name))
|
|
146
|
+
|
|
147
|
+
# Insert new metadata
|
|
148
|
+
connection.execute(
|
|
149
|
+
self.metadata_table.insert().values(
|
|
150
|
+
table_name=table.name,
|
|
151
|
+
metadata_json=json.dumps(table.metadata),
|
|
152
|
+
)
|
|
127
153
|
)
|
|
128
|
-
)
|
|
129
154
|
if self._should_commit():
|
|
130
155
|
connection.commit()
|
|
131
156
|
|
|
@@ -151,7 +176,8 @@ class SQLDatabase:
|
|
|
151
176
|
command = text(f"DROP TABLE {if_exists} {safe_name}")
|
|
152
177
|
connection.execute(command)
|
|
153
178
|
# Also remove metadata for this table
|
|
154
|
-
|
|
179
|
+
if self.metadata_table is not None:
|
|
180
|
+
connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == name))
|
|
155
181
|
if self._should_commit():
|
|
156
182
|
connection.commit()
|
|
157
183
|
|
|
@@ -197,6 +223,10 @@ class SQLDatabase:
|
|
|
197
223
|
Returns:
|
|
198
224
|
Dictionary containing the table's metadata, or empty dict if not found.
|
|
199
225
|
"""
|
|
226
|
+
if self.metadata_table is None:
|
|
227
|
+
raise ValueError(
|
|
228
|
+
"Metadata table is not enabled. Please enable metadata tracking when initializing the SQLDatabase. with track_metadata=True."
|
|
229
|
+
)
|
|
200
230
|
with self.connect() as connection:
|
|
201
231
|
result = connection.execute(
|
|
202
232
|
self.metadata_table.select().where(self.metadata_table.c.table_name == name)
|
|
@@ -36,3 +36,12 @@ class BackendCompatibilityError(ModaicError):
|
|
|
36
36
|
"""Raised when a feature is not supported by a backend"""
|
|
37
37
|
|
|
38
38
|
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class MissingSecretError(AuthenticationError):
|
|
42
|
+
"""Raised when a secret is missing"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, message: str, secret_name: str):
|
|
45
|
+
self.message = message
|
|
46
|
+
self.secret_name = secret_name
|
|
47
|
+
super().__init__(message)
|
|
@@ -243,6 +243,12 @@ def init_agent_repo(repo_path: str, with_code: bool = True) -> Path:
|
|
|
243
243
|
if src_init.exists() and not dest_init.exists():
|
|
244
244
|
shutil.copy2(src_init, dest_init)
|
|
245
245
|
|
|
246
|
+
for extra_file in get_extra_files():
|
|
247
|
+
if extra_file.is_dir():
|
|
248
|
+
shutil.copytree(extra_file, repo_dir / extra_file.relative_to(project_root))
|
|
249
|
+
else:
|
|
250
|
+
shutil.copy2(extra_file, repo_dir / extra_file.relative_to(project_root))
|
|
251
|
+
|
|
246
252
|
return repo_dir
|
|
247
253
|
|
|
248
254
|
|
|
@@ -272,23 +278,52 @@ def get_ignored_files() -> list[Path]:
|
|
|
272
278
|
pyproject_path = Path("pyproject.toml")
|
|
273
279
|
doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
|
|
274
280
|
|
|
275
|
-
# Safely get [tool.modaic.
|
|
276
|
-
|
|
281
|
+
# Safely get [tool.modaic.exclude]
|
|
282
|
+
files = (
|
|
277
283
|
doc.get("tool", {}) # [tool]
|
|
278
284
|
.get("modaic", {}) # [tool.modaic]
|
|
279
|
-
.get("
|
|
285
|
+
.get("exclude", {}) # [tool.modaic.exclude]
|
|
286
|
+
.get("files", []) # [tool.modaic.exclude] files = ["file1", "file2"]
|
|
280
287
|
)
|
|
281
288
|
|
|
282
|
-
|
|
283
|
-
|
|
289
|
+
excluded: list[Path] = []
|
|
290
|
+
for entry in files:
|
|
291
|
+
entry = Path(entry)
|
|
292
|
+
if not entry.is_absolute():
|
|
293
|
+
entry = project_root / entry
|
|
294
|
+
if entry.exists():
|
|
295
|
+
excluded.append(entry)
|
|
296
|
+
return excluded
|
|
284
297
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
298
|
+
|
|
299
|
+
def get_extra_files() -> list[Path]:
|
|
300
|
+
"""Return a list of extra files that should be excluded from staging."""
|
|
301
|
+
project_root = resolve_project_root()
|
|
302
|
+
pyproject_path = Path("pyproject.toml")
|
|
303
|
+
doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
|
|
304
|
+
files = (
|
|
305
|
+
doc.get("tool", {}) # [tool]
|
|
306
|
+
.get("modaic", {}) # [tool.modaic]
|
|
307
|
+
.get("include", {}) # [tool.modaic.include]
|
|
308
|
+
.get("files", []) # [tool.modaic.include] files = ["file1", "file2"]
|
|
309
|
+
)
|
|
310
|
+
included: list[Path] = []
|
|
311
|
+
for entry in files:
|
|
312
|
+
entry = Path(entry)
|
|
313
|
+
if entry.is_absolute():
|
|
314
|
+
try:
|
|
315
|
+
entry = entry.resolve()
|
|
316
|
+
entry.relative_to(project_root.resolve())
|
|
317
|
+
except ValueError:
|
|
318
|
+
warnings.warn(
|
|
319
|
+
f"{entry} will not be bundled because it is not inside the current working directory", stacklevel=4
|
|
320
|
+
)
|
|
321
|
+
else:
|
|
322
|
+
entry = project_root / entry
|
|
323
|
+
if entry.resolve().exists():
|
|
324
|
+
included.append(entry)
|
|
325
|
+
|
|
326
|
+
return included
|
|
292
327
|
|
|
293
328
|
|
|
294
329
|
def create_pyproject_toml(repo_dir: Path, package_name: str):
|
|
@@ -304,7 +339,7 @@ def create_pyproject_toml(repo_dir: Path, package_name: str):
|
|
|
304
339
|
if "project" not in doc_old:
|
|
305
340
|
raise KeyError("No [project] table in old TOML")
|
|
306
341
|
doc_new["project"] = doc_old["project"]
|
|
307
|
-
doc_new["project"]["dependencies"] =
|
|
342
|
+
doc_new["project"]["dependencies"] = get_final_dependencies(doc_old["project"]["dependencies"])
|
|
308
343
|
if "tool" in doc_old and "uv" in doc_old["tool"] and "sources" in doc_old["tool"]["uv"]:
|
|
309
344
|
doc_new["tool"] = {"uv": {"sources": doc_old["tool"]["uv"]["sources"]}}
|
|
310
345
|
warn_if_local(doc_new["tool"]["uv"]["sources"])
|
|
@@ -315,29 +350,32 @@ def create_pyproject_toml(repo_dir: Path, package_name: str):
|
|
|
315
350
|
tomlk.dump(doc_new, fp)
|
|
316
351
|
|
|
317
352
|
|
|
318
|
-
def
|
|
353
|
+
def get_final_dependencies(dependencies: list[str]) -> list[str]:
|
|
319
354
|
"""
|
|
320
355
|
Get the dependencies that should be included in the bundled agent.
|
|
356
|
+
Filters out "[tool.modaic.ignore] dependencies. Adds [tool.modaic.include] dependencies.
|
|
321
357
|
"""
|
|
322
358
|
pyproject_path = Path("pyproject.toml")
|
|
323
359
|
doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
|
|
324
360
|
|
|
325
|
-
# Safely get [tool.modaic.
|
|
326
|
-
|
|
361
|
+
# Safely get [tool.modaic.exclude]
|
|
362
|
+
exclude_deps = (
|
|
327
363
|
doc.get("tool", {}) # [tool]
|
|
328
364
|
.get("modaic", {}) # [tool.modaic]
|
|
329
|
-
.get("
|
|
365
|
+
.get("exclude", {}) # [tool.modaic.exclude]
|
|
366
|
+
.get("dependencies", []) # [tool.modaic.exclude] dependencies = ["praw", "sagemaker"]
|
|
367
|
+
)
|
|
368
|
+
include_deps = (
|
|
369
|
+
doc.get("tool", {}) # [tool]
|
|
370
|
+
.get("modaic", {}) # [tool.modaic]
|
|
371
|
+
.get("include", {}) # [tool.modaic.include]
|
|
372
|
+
.get("dependencies", []) # [tool.modaic.include] dependencies = ["praw", "sagemaker"]
|
|
330
373
|
)
|
|
331
374
|
|
|
332
|
-
if
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
if not ignored_dependencies:
|
|
337
|
-
return dependencies
|
|
338
|
-
pattern = re.compile(r"\b(" + "|".join(map(re.escape, ignored_dependencies)) + r")\b")
|
|
339
|
-
filtered_dependencies = [pkg for pkg in dependencies if not pattern.search(pkg)]
|
|
340
|
-
return filtered_dependencies
|
|
375
|
+
if exclude_deps:
|
|
376
|
+
pattern = re.compile(r"\b(" + "|".join(map(re.escape, exclude_deps)) + r")\b")
|
|
377
|
+
dependencies = [pkg for pkg in dependencies if not pattern.search(pkg)]
|
|
378
|
+
return dependencies + include_deps
|
|
341
379
|
|
|
342
380
|
|
|
343
381
|
def warn_if_local(sources: dict[str, dict]):
|
|
@@ -2,12 +2,13 @@ import inspect
|
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
4
|
import pathlib
|
|
5
|
+
import warnings
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import (
|
|
8
9
|
TYPE_CHECKING,
|
|
10
|
+
Any,
|
|
9
11
|
Dict,
|
|
10
|
-
List,
|
|
11
12
|
Optional,
|
|
12
13
|
Type,
|
|
13
14
|
TypeVar,
|
|
@@ -15,11 +16,13 @@ from typing import (
|
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
import dspy
|
|
19
|
+
from git import config
|
|
18
20
|
from pydantic import BaseModel
|
|
19
21
|
|
|
20
22
|
from modaic.module_utils import create_agent_repo
|
|
21
23
|
from modaic.observability import Trackable, track_modaic_obj
|
|
22
24
|
|
|
25
|
+
from .exceptions import MissingSecretError
|
|
23
26
|
from .hub import load_repo, push_folder_to_hub
|
|
24
27
|
from .module_utils import _module_path
|
|
25
28
|
|
|
@@ -128,6 +131,7 @@ class PrecompiledConfig(BaseModel):
|
|
|
128
131
|
return self.model_dump_json()
|
|
129
132
|
|
|
130
133
|
|
|
134
|
+
# Use a metaclass to enforce super().__init__() with config
|
|
131
135
|
class PrecompiledAgent(dspy.Module):
|
|
132
136
|
"""
|
|
133
137
|
Bases: `dspy.Module`
|
|
@@ -234,15 +238,25 @@ class PrecompiledAgent(dspy.Module):
|
|
|
234
238
|
extra_auto_classes["AutoRetriever"] = self.retriever
|
|
235
239
|
self.config.save_precompiled(path, extra_auto_classes)
|
|
236
240
|
self.save(path / "agent.json")
|
|
241
|
+
_clean_secrets(path / "agent.json")
|
|
237
242
|
|
|
238
243
|
@classmethod
|
|
239
|
-
def from_precompiled(
|
|
244
|
+
def from_precompiled(
|
|
245
|
+
cls: Type[A],
|
|
246
|
+
path: str | Path,
|
|
247
|
+
config_options: Optional[dict] = None,
|
|
248
|
+
api_key: Optional[str | dict[str, str]] = None,
|
|
249
|
+
hf_token: Optional[str | dict[str, str]] = None,
|
|
250
|
+
**kwargs,
|
|
251
|
+
) -> A:
|
|
240
252
|
"""
|
|
241
253
|
Loads the agent and the config from the given path.
|
|
242
254
|
|
|
243
255
|
Args:
|
|
244
256
|
path: The path to load the agent and config from. Can be a local path or a path on Modaic Hub.
|
|
245
257
|
config_options: A dictionary containg key-value pairs used to override the default config.
|
|
258
|
+
api_key: Your API key.
|
|
259
|
+
hf_token: Your Hugging Face token.
|
|
246
260
|
**kwargs: Additional keyword arguments forwarded to the PrecompiledAgent's constructor.
|
|
247
261
|
|
|
248
262
|
Returns:
|
|
@@ -260,7 +274,9 @@ class PrecompiledAgent(dspy.Module):
|
|
|
260
274
|
agent = cls(config, **kwargs)
|
|
261
275
|
agent_state_path = local_dir / "agent.json"
|
|
262
276
|
if agent_state_path.exists():
|
|
263
|
-
|
|
277
|
+
secrets = {"api_key": api_key, "hf_token": hf_token}
|
|
278
|
+
state = _get_state_with_secrets(agent_state_path, secrets)
|
|
279
|
+
agent.load_state(state)
|
|
264
280
|
return agent
|
|
265
281
|
|
|
266
282
|
def push_to_hub(
|
|
@@ -374,7 +390,7 @@ class Indexer(Retriever):
|
|
|
374
390
|
config: PrecompiledConfig
|
|
375
391
|
|
|
376
392
|
@abstractmethod
|
|
377
|
-
def
|
|
393
|
+
def index(self, contents: Any, **kwargs):
|
|
378
394
|
pass
|
|
379
395
|
|
|
380
396
|
|
|
@@ -404,11 +420,88 @@ def _push_to_hub(
|
|
|
404
420
|
|
|
405
421
|
def is_local_path(s: str | Path) -> bool:
|
|
406
422
|
# absolute or relative filesystem path
|
|
423
|
+
if isinstance(s, Path):
|
|
424
|
+
return True
|
|
407
425
|
s = str(s)
|
|
426
|
+
|
|
427
|
+
print("SSSS", s)
|
|
408
428
|
if os.path.isabs(s) or s.startswith((".", "/", "\\")):
|
|
409
429
|
return True
|
|
410
430
|
parts = s.split("/")
|
|
411
431
|
# hub IDs: "repo" or "user/repo"
|
|
412
|
-
if len(parts) == 1
|
|
432
|
+
if len(parts) == 1:
|
|
433
|
+
raise ValueError(
|
|
434
|
+
f"Invalid repo: '{s}'. Please prefix local paths with './', '/', or '../' . And use 'user/repo' format for hub paths."
|
|
435
|
+
)
|
|
436
|
+
elif len(parts) == 2 and all(parts):
|
|
413
437
|
return False
|
|
414
438
|
return True
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
SECRET_MASK = "********"
|
|
442
|
+
COMMON_SECRETS = ["api_key", "hf_token"]
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _clean_secrets(path: Path, extra_secrets: Optional[list[str]] = None):
|
|
446
|
+
"""
|
|
447
|
+
Removes all secret keys from `lm` dict in agent.json file
|
|
448
|
+
"""
|
|
449
|
+
secret_keys = COMMON_SECRETS + (extra_secrets or [])
|
|
450
|
+
|
|
451
|
+
with open(path, "r") as f:
|
|
452
|
+
d = json.load(f)
|
|
453
|
+
|
|
454
|
+
for predictor in d.values():
|
|
455
|
+
lm = predictor.get("lm", None)
|
|
456
|
+
if lm is None:
|
|
457
|
+
continue
|
|
458
|
+
for k in lm.keys():
|
|
459
|
+
if k in secret_keys:
|
|
460
|
+
lm[k] = SECRET_MASK
|
|
461
|
+
|
|
462
|
+
with open(path, "w") as f:
|
|
463
|
+
json.dump(d, f, indent=2)
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _get_state_with_secrets(path: Path, secrets: dict[str, str | dict[str, str] | None]):
|
|
467
|
+
"""`
|
|
468
|
+
Fills secret keys in `lm` dict in agent.json file
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
path: The path to the agent.json file.
|
|
472
|
+
secrets: A dictionary containing the secrets to fill in the `lm` dict.
|
|
473
|
+
- Dict[k,v] where k is the name of a secret (e.g. "api_key") and v is the value of the secret
|
|
474
|
+
- If v is a string, every lm will use v for k
|
|
475
|
+
- if v is a dict, each key of v should be the name of a named predictor
|
|
476
|
+
(e.g. "my_module.predict", "my_module.summarizer") mapping to the secret value for that predictor
|
|
477
|
+
Returns:
|
|
478
|
+
A dictionary containing the state of the agent.json file with the secrets filled in.
|
|
479
|
+
"""
|
|
480
|
+
with open(path, "r") as f:
|
|
481
|
+
named_predictors = json.load(f)
|
|
482
|
+
|
|
483
|
+
def _get_secret(predictor_name: str, secret_name: str) -> Optional[str]:
|
|
484
|
+
if secret_val := secrets.get(secret_name):
|
|
485
|
+
if isinstance(secret_val, str):
|
|
486
|
+
return secret_val
|
|
487
|
+
elif isinstance(secret_val, dict):
|
|
488
|
+
return secret_val.get(predictor_name)
|
|
489
|
+
return None
|
|
490
|
+
|
|
491
|
+
for predictor_name, predictor in named_predictors.items():
|
|
492
|
+
lm = predictor.get("lm", {})
|
|
493
|
+
for kw, arg in lm.items():
|
|
494
|
+
if kw in COMMON_SECRETS and arg != "" and arg != SECRET_MASK:
|
|
495
|
+
warnings.warn(
|
|
496
|
+
f"{str(path)} exposes the secret key {kw}. Please remove it or ensure this file is not made public."
|
|
497
|
+
)
|
|
498
|
+
secret = _get_secret(predictor_name, kw)
|
|
499
|
+
if secret is not None and arg != "" and arg != SECRET_MASK:
|
|
500
|
+
raise ValueError(
|
|
501
|
+
f"Failed to fill insert secret value for {predictor_name}['lm']['{kw}']. It is already set to {arg}"
|
|
502
|
+
)
|
|
503
|
+
elif secret is None and kw in COMMON_SECRETS:
|
|
504
|
+
raise MissingSecretError(f"Please specify a value for {kw} in the secrets dictionary", kw)
|
|
505
|
+
elif secret is not None:
|
|
506
|
+
lm[kw] = secret
|
|
507
|
+
return named_predictors
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modaic
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Modular Agent Infrastructure
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Modular Agent Infrastructure Collection, a python framework for managing and sharing DSPy agents
|
|
5
5
|
Author-email: Tyrin <tytodd@mit.edu>, Farouk <farouk@modaic.dev>
|
|
6
6
|
License: MIT License
|
|
7
7
|
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Union
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
import tomlkit as tomlk
|
|
10
|
+
|
|
11
|
+
from modaic import AutoAgent, AutoConfig, AutoRetriever
|
|
12
|
+
from modaic.hub import MODAIC_CACHE, get_user_info
|
|
13
|
+
from tests.testing_utils import delete_agent_repo
|
|
14
|
+
|
|
15
|
+
MODAIC_TOKEN = os.getenv("MODAIC_TOKEN")
|
|
16
|
+
INSTALL_TEST_REPO_DEPS = os.getenv("INSTALL_TEST_REPO_DEPS", "True").lower() == "true"
|
|
17
|
+
USERNAME = get_user_info(os.environ["MODAIC_TOKEN"])["login"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_cached_agent_dir(repo_name: str) -> Path:
|
|
21
|
+
return MODAIC_CACHE / "agents" / repo_name
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def clean_modaic_cache() -> None:
|
|
25
|
+
"""Remove the MODAIC cache directory if it exists.
|
|
26
|
+
|
|
27
|
+
Params:
|
|
28
|
+
None
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
None
|
|
32
|
+
"""
|
|
33
|
+
shutil.rmtree(MODAIC_CACHE, ignore_errors=True)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prepare_repo(repo_name: str) -> None:
|
|
37
|
+
"""Clean cache and ensure remote hub repo is deleted before test run.
|
|
38
|
+
|
|
39
|
+
Params:
|
|
40
|
+
repo_name (str): The name of the test repository in artifacts/test_repos.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
None
|
|
44
|
+
"""
|
|
45
|
+
clean_modaic_cache()
|
|
46
|
+
if not MODAIC_TOKEN:
|
|
47
|
+
pytest.skip("Skipping because MODAIC_TOKEN is not set")
|
|
48
|
+
delete_agent_repo(username=USERNAME, agent_name=repo_name)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def run_script(repo_name: str, run_path: str = "compile.py") -> None:
|
|
52
|
+
"""Run the repository's compile script inside its own uv environment.
|
|
53
|
+
|
|
54
|
+
Params:
|
|
55
|
+
repo_name (str): The name of the test repository directory to compile.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
None
|
|
59
|
+
"""
|
|
60
|
+
env = os.environ.copy()
|
|
61
|
+
env.update(
|
|
62
|
+
{
|
|
63
|
+
"MODAIC_CACHE": "../../temp/modaic_cache",
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
repo_dir = pathlib.Path("tests/artifacts/test_repos") / repo_name
|
|
67
|
+
if INSTALL_TEST_REPO_DEPS:
|
|
68
|
+
subprocess.run(["uv", "sync"], cwd=repo_dir, check=True, env=env)
|
|
69
|
+
# Ensure the root package is available in the subproject env
|
|
70
|
+
# Run as file
|
|
71
|
+
if run_path.endswith(".py"):
|
|
72
|
+
subprocess.run(["uv", "run", run_path, USERNAME], cwd=repo_dir, check=True, env=env)
|
|
73
|
+
# Run as module
|
|
74
|
+
else:
|
|
75
|
+
subprocess.run(["uv", "run", "-m", run_path, USERNAME], cwd=repo_dir, check=True, env=env)
|
|
76
|
+
# clean cache
|
|
77
|
+
shutil.rmtree("tests/artifacts/temp/modaic_cache", ignore_errors=True)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# recursive dict/list of dicts/lists of strs representing a folder structure
|
|
81
|
+
FolderLayout = dict[str, Union[str, "FolderLayout"]] | list[Union[str, "FolderLayout"]]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def assert_expected_files(cache_dir: Path, extra_expected_files: FolderLayout):
|
|
85
|
+
default_expected = ["agent.json", "auto_classes.json", "config.json", "pyproject.toml", "README.md", ".git"]
|
|
86
|
+
if isinstance(extra_expected_files, list):
|
|
87
|
+
expected = extra_expected_files + default_expected
|
|
88
|
+
elif isinstance(extra_expected_files, dict):
|
|
89
|
+
expected = [extra_expected_files] + default_expected
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError(f"Invalid folder layout: {extra_expected_files}")
|
|
92
|
+
assert_folder_layout(cache_dir, expected)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def assert_top_level_names(dir: Path, expected_files: FolderLayout | str, root: bool = True):
|
|
96
|
+
if isinstance(expected_files, list):
|
|
97
|
+
expected_names = []
|
|
98
|
+
for obj in expected_files:
|
|
99
|
+
if isinstance(obj, str):
|
|
100
|
+
expected_names.append(obj)
|
|
101
|
+
elif isinstance(obj, dict):
|
|
102
|
+
expected_names.extend(list(obj.keys()))
|
|
103
|
+
else:
|
|
104
|
+
raise ValueError(f"Invalid folder layout: {expected_files}")
|
|
105
|
+
elif isinstance(expected_files, dict):
|
|
106
|
+
expected_names = list(expected_files.keys())
|
|
107
|
+
elif isinstance(expected_files, str):
|
|
108
|
+
expected_names = [expected_files]
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(f"Invalid folder layout: {expected_files}")
|
|
111
|
+
expected_names = expected_names if root else expected_names + ["__init__.py"]
|
|
112
|
+
missing = set(expected_names) - set(os.listdir(dir))
|
|
113
|
+
assert missing == set(), f"Missing files, in {dir}, {missing}"
|
|
114
|
+
unexpected = set(os.listdir(dir)) - set(expected_names)
|
|
115
|
+
assert unexpected.issubset(set(["__pycache__", "__init__.py"])), (
|
|
116
|
+
f"Unexpected files in {dir}, {unexpected - set(['__pycache__', '__init__.py'])}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def assert_folder_layout(
|
|
121
|
+
dir: Path, expected_files: FolderLayout | str, root: bool = True, assert_top_level: bool = True
|
|
122
|
+
):
|
|
123
|
+
"""
|
|
124
|
+
Asserts that the files in the directory match the expected folder structure.
|
|
125
|
+
Checking that only expected files are included. Will raise assertion error if unexpected files are included.
|
|
126
|
+
Args:
|
|
127
|
+
dir: The directory to assert the files in.
|
|
128
|
+
expected_files: The expected folder structure.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
Assertion error if expected file not found in path or if unexpected file found in path
|
|
132
|
+
"""
|
|
133
|
+
# dir is a single file folder
|
|
134
|
+
if isinstance(expected_files, str):
|
|
135
|
+
assert_top_level_names(dir, expected_files, root)
|
|
136
|
+
# dir is a folder containg multiples files or subfolders
|
|
137
|
+
elif isinstance(expected_files, list):
|
|
138
|
+
assert_top_level_names(dir, expected_files, root)
|
|
139
|
+
for file in expected_files:
|
|
140
|
+
if isinstance(file, dict):
|
|
141
|
+
assert_folder_layout(dir, file, root=False, assert_top_level=False)
|
|
142
|
+
elif not isinstance(file, str):
|
|
143
|
+
raise ValueError(f"Invalid folder layout: {expected_files}")
|
|
144
|
+
# dir contains subfolders, however don't check top level because we don't know if this is the entirety of dir or a subset
|
|
145
|
+
elif isinstance(expected_files, dict):
|
|
146
|
+
for key, value in expected_files.items():
|
|
147
|
+
assert_folder_layout(dir / key, value, root=False)
|
|
148
|
+
else:
|
|
149
|
+
raise ValueError(f"Invalid folder layout: {expected_files}")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def assert_dependencies(cache_dir: Path, extra_expected_dependencies: list[str]):
|
|
153
|
+
expected_dependencies = extra_expected_dependencies + ["dspy", "modaic"]
|
|
154
|
+
|
|
155
|
+
pyproject_path = cache_dir / "pyproject.toml"
|
|
156
|
+
doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
|
|
157
|
+
actual_dependencies = doc.get("project", {}).get("dependencies", [])
|
|
158
|
+
|
|
159
|
+
missing = set(expected_dependencies) - set(actual_dependencies)
|
|
160
|
+
assert missing == set(), f"Missing dependencies, {missing}"
|
|
161
|
+
unexpected = set(actual_dependencies) - set(expected_dependencies)
|
|
162
|
+
assert unexpected == set(), f"Unexpected dependencies, {unexpected}"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_simple_repo() -> None:
|
|
166
|
+
prepare_repo("simple_repo")
|
|
167
|
+
run_script("simple_repo", run_path="agent.py")
|
|
168
|
+
clean_modaic_cache()
|
|
169
|
+
config = AutoConfig.from_precompiled(f"{USERNAME}/simple_repo")
|
|
170
|
+
assert config.lm == "openai/gpt-4o"
|
|
171
|
+
assert config.output_type == "str"
|
|
172
|
+
assert config.number == 1
|
|
173
|
+
cache_dir = get_cached_agent_dir(f"{USERNAME}/simple_repo")
|
|
174
|
+
assert_expected_files(cache_dir, ["agent.py"])
|
|
175
|
+
assert_dependencies(cache_dir, ["dspy", "modaic", "praw"])
|
|
176
|
+
|
|
177
|
+
clean_modaic_cache()
|
|
178
|
+
agent = AutoAgent.from_precompiled(f"{USERNAME}/simple_repo", runtime_param="Hello")
|
|
179
|
+
assert agent.config.lm == "openai/gpt-4o"
|
|
180
|
+
assert agent.config.output_type == "str"
|
|
181
|
+
assert agent.config.number == 1
|
|
182
|
+
assert agent.runtime_param == "Hello"
|
|
183
|
+
clean_modaic_cache()
|
|
184
|
+
agent = AutoAgent.from_precompiled(
|
|
185
|
+
f"{USERNAME}/simple_repo", runtime_param="Hello", config_options={"lm": "openai/gpt-4o-mini"}
|
|
186
|
+
)
|
|
187
|
+
assert agent.config.lm == "openai/gpt-4o-mini"
|
|
188
|
+
assert agent.config.output_type == "str"
|
|
189
|
+
assert agent.config.number == 1
|
|
190
|
+
assert agent.runtime_param == "Hello"
|
|
191
|
+
# TODO: test third party deps installation
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
simple_repo_with_compile_extra_files = [{"agent": ["agent.py", "mod.py"]}, "compile.py", "include_me_too.txt"]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def test_simple_repo_with_compile():
|
|
198
|
+
prepare_repo("simple_repo_with_compile")
|
|
199
|
+
run_script("simple_repo_with_compile", run_path="compile.py")
|
|
200
|
+
clean_modaic_cache()
|
|
201
|
+
config = AutoConfig.from_precompiled(f"{USERNAME}/simple_repo_with_compile")
|
|
202
|
+
assert config.lm == "openai/gpt-4o"
|
|
203
|
+
assert config.output_type == "str"
|
|
204
|
+
assert config.number == 1
|
|
205
|
+
cache_dir = get_cached_agent_dir(f"{USERNAME}/simple_repo_with_compile")
|
|
206
|
+
assert os.path.exists(cache_dir / "config.json")
|
|
207
|
+
assert os.path.exists(cache_dir / "agent.json")
|
|
208
|
+
assert os.path.exists(cache_dir / "auto_classes.json")
|
|
209
|
+
assert os.path.exists(cache_dir / "README.md")
|
|
210
|
+
assert os.path.exists(cache_dir / "agent" / "agent.py")
|
|
211
|
+
assert os.path.exists(cache_dir / "agent" / "mod.py")
|
|
212
|
+
assert os.path.exists(cache_dir / "pyproject.toml")
|
|
213
|
+
assert os.path.exists(cache_dir / "include_me_too.txt")
|
|
214
|
+
extra_files = [{"agent": ["agent.py", "mod.py"]}, "compile.py", "include_me_too.txt"]
|
|
215
|
+
assert_expected_files(cache_dir, extra_files)
|
|
216
|
+
assert_dependencies(cache_dir, ["dspy", "modaic"])
|
|
217
|
+
clean_modaic_cache()
|
|
218
|
+
agent = AutoAgent.from_precompiled(f"{USERNAME}/simple_repo_with_compile", runtime_param="Hello")
|
|
219
|
+
assert agent.config.lm == "openai/gpt-4o"
|
|
220
|
+
assert agent.config.output_type == "str"
|
|
221
|
+
assert agent.config.number == 1
|
|
222
|
+
assert agent.runtime_param == "Hello"
|
|
223
|
+
clean_modaic_cache()
|
|
224
|
+
agent = AutoAgent.from_precompiled(
|
|
225
|
+
f"{USERNAME}/simple_repo_with_compile", runtime_param="Hello", config_options={"lm": "openai/gpt-4o-mini"}
|
|
226
|
+
)
|
|
227
|
+
assert agent.config.lm == "openai/gpt-4o-mini"
|
|
228
|
+
assert agent.config.output_type == "str"
|
|
229
|
+
assert agent.config.number == 1
|
|
230
|
+
assert agent.runtime_param == "Hello"
|
|
231
|
+
# TODO: test third party deps installation
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
nested_repo_extra_files = {
|
|
235
|
+
"agent": [
|
|
236
|
+
{
|
|
237
|
+
"tools": {"google": "google_search.py", "jira": "jira_api_tools.py"},
|
|
238
|
+
"utils": ["second_degree_import.py", "used.py"],
|
|
239
|
+
},
|
|
240
|
+
"agent.py",
|
|
241
|
+
"compile.py",
|
|
242
|
+
"config.py",
|
|
243
|
+
"retriever.py",
|
|
244
|
+
]
|
|
245
|
+
}
|
|
246
|
+
nested_repo_2_extra_files = [
|
|
247
|
+
{
|
|
248
|
+
"agent": [
|
|
249
|
+
{
|
|
250
|
+
"tools": {"google": "google_search.py", "jira": "jira_api_tools.py"},
|
|
251
|
+
"utils": [
|
|
252
|
+
"second_degree_import.py",
|
|
253
|
+
"unused_but_included.py",
|
|
254
|
+
"used.py",
|
|
255
|
+
],
|
|
256
|
+
},
|
|
257
|
+
"agent.py",
|
|
258
|
+
"config.py",
|
|
259
|
+
"retriever.py",
|
|
260
|
+
]
|
|
261
|
+
},
|
|
262
|
+
{"unused_but_included_folder": [".env", "folder_content1.py", "folder_content2.txt"]},
|
|
263
|
+
"compile.py",
|
|
264
|
+
]
|
|
265
|
+
nested_repo_3_extra_files = {
|
|
266
|
+
"agent": [
|
|
267
|
+
{
|
|
268
|
+
"tools": [{"google": "google_search.py", "jira": "jira_api_tools.py"}, "unused_but_included2.py"],
|
|
269
|
+
"utils": ["second_degree_import.py", "unused_but_included.py", "used.py"],
|
|
270
|
+
},
|
|
271
|
+
"agent.py",
|
|
272
|
+
"config.py",
|
|
273
|
+
"retriever.py",
|
|
274
|
+
],
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@pytest.mark.parametrize(
|
|
279
|
+
"repo_name, run_path, extra_expected_files, extra_expected_dependencies",
|
|
280
|
+
[
|
|
281
|
+
(
|
|
282
|
+
"nested_repo",
|
|
283
|
+
"agent.compile",
|
|
284
|
+
nested_repo_extra_files,
|
|
285
|
+
[],
|
|
286
|
+
),
|
|
287
|
+
(
|
|
288
|
+
"nested_repo_2",
|
|
289
|
+
"compile.py",
|
|
290
|
+
nested_repo_2_extra_files,
|
|
291
|
+
["dspy", "modaic", "praw", "sagemaker"],
|
|
292
|
+
),
|
|
293
|
+
(
|
|
294
|
+
"nested_repo_3",
|
|
295
|
+
"agent.agent",
|
|
296
|
+
nested_repo_3_extra_files,
|
|
297
|
+
["dspy", "modaic"],
|
|
298
|
+
),
|
|
299
|
+
],
|
|
300
|
+
)
|
|
301
|
+
def test_nested_repo(
|
|
302
|
+
repo_name: str, run_path: str, extra_expected_files: FolderLayout, extra_expected_dependencies: list[str]
|
|
303
|
+
):
|
|
304
|
+
prepare_repo(repo_name)
|
|
305
|
+
run_script(repo_name, run_path=run_path)
|
|
306
|
+
clean_modaic_cache()
|
|
307
|
+
config = AutoConfig.from_precompiled(f"{USERNAME}/{repo_name}", clients={"get_replaced": "noob"})
|
|
308
|
+
assert config.num_fetch == 1
|
|
309
|
+
assert config.lm == "openai/gpt-4o-mini"
|
|
310
|
+
assert config.embedder == "openai/text-embedding-3-small"
|
|
311
|
+
assert config.clients == {"get_replaced": "noob"}
|
|
312
|
+
|
|
313
|
+
cache_dir = get_cached_agent_dir(f"{USERNAME}/{repo_name}")
|
|
314
|
+
assert_expected_files(cache_dir, extra_expected_files)
|
|
315
|
+
assert_dependencies(cache_dir, extra_expected_dependencies)
|
|
316
|
+
|
|
317
|
+
clean_modaic_cache()
|
|
318
|
+
retriever = AutoRetriever.from_precompiled(f"{USERNAME}/{repo_name}", needed_param="hello")
|
|
319
|
+
agent = AutoAgent.from_precompiled(f"{USERNAME}/{repo_name}", retriever=retriever)
|
|
320
|
+
assert agent.config.num_fetch == 1
|
|
321
|
+
assert agent.config.lm == "openai/gpt-4o-mini"
|
|
322
|
+
assert agent.config.embedder == "openai/text-embedding-3-small"
|
|
323
|
+
assert agent.config.clients == {"mit": ["csail", "mit-media-lab"], "berkeley": ["bear"]}
|
|
324
|
+
assert retriever.needed_param == "hello"
|
|
325
|
+
assert agent.forward("my query") == "Retrieved 1 results for my query"
|
|
326
|
+
clean_modaic_cache()
|
|
327
|
+
config_options = {"lm": "openai/gpt-4o"}
|
|
328
|
+
retriever = AutoRetriever.from_precompiled(
|
|
329
|
+
f"{USERNAME}/{repo_name}", needed_param="hello", config_options=config_options
|
|
330
|
+
)
|
|
331
|
+
agent = AutoAgent.from_precompiled(f"{USERNAME}/{repo_name}", retriever=retriever, config_options=config_options)
|
|
332
|
+
assert agent.config.num_fetch == 1
|
|
333
|
+
assert agent.config.lm == "openai/gpt-4o"
|
|
334
|
+
assert agent.config.embedder == "openai/text-embedding-3-small"
|
|
335
|
+
assert agent.config.clients == {"mit": ["csail", "mit-media-lab"], "berkeley": ["bear"]}
|
|
336
|
+
assert retriever.needed_param == "hello"
|
|
337
|
+
assert agent.forward("my query") == "Retrieved 1 results for my query"
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def test_auth():
|
|
341
|
+
pass
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
3
|
import shutil
|
|
3
4
|
from pathlib import Path
|
|
@@ -415,5 +416,44 @@ def test_precompiled_agent_with_retriever_hub(hub_repo: str):
|
|
|
415
416
|
loaded_agent3.push_to_hub(hub_repo, with_code=False)
|
|
416
417
|
|
|
417
418
|
|
|
419
|
+
class InnerSecretAgent(dspy.Module):
|
|
420
|
+
def __init__(self):
|
|
421
|
+
self.predictor = dspy.Predict(Summarize)
|
|
422
|
+
self.predictor.set_lm(lm=dspy.LM("openai/gpt-4o-mini", api_key="sk-proj-1234567890", hf_token="hf_1234567890"))
|
|
423
|
+
|
|
424
|
+
def forward(self, query: str) -> str:
|
|
425
|
+
return self.predictor(query=query)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class SecretAgentConfig(PrecompiledConfig):
|
|
429
|
+
pass
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class SecretAgent(PrecompiledAgent):
|
|
433
|
+
config: SecretAgentConfig
|
|
434
|
+
|
|
435
|
+
def __init__(self, config: SecretAgentConfig, **kwargs):
|
|
436
|
+
super().__init__(config, **kwargs)
|
|
437
|
+
self.predictor = dspy.Predict(Summarize)
|
|
438
|
+
self.predictor.set_lm(lm=dspy.LM("openai/gpt-4o-mini", api_key="sk-proj-1234567890"))
|
|
439
|
+
self.inner = InnerSecretAgent()
|
|
440
|
+
|
|
441
|
+
def forward(self, query: str) -> str:
|
|
442
|
+
return self.inner(query=query)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def test_precompiled_agent_with_secret(clean_folder: Path):
|
|
446
|
+
SecretAgent(SecretAgentConfig()).save_precompiled(clean_folder)
|
|
447
|
+
with open(clean_folder / "agent.json", "r") as f:
|
|
448
|
+
agent_state = json.load(f)
|
|
449
|
+
assert agent_state["inner.predictor"]["lm"]["api_key"] == "********"
|
|
450
|
+
assert agent_state["inner.predictor"]["lm"]["hf_token"] == "********"
|
|
451
|
+
assert agent_state["predictor"]["lm"]["api_key"] == "********"
|
|
452
|
+
loaded_agent = SecretAgent.from_precompiled(clean_folder, api_key="set-api-key", hf_token="set-hf-token")
|
|
453
|
+
assert loaded_agent.inner.predictor.lm.kwargs["api_key"] == "set-api-key"
|
|
454
|
+
assert loaded_agent.inner.predictor.lm.kwargs["hf_token"] == "set-hf-token"
|
|
455
|
+
assert loaded_agent.predictor.lm.kwargs["api_key"] == "set-api-key"
|
|
456
|
+
|
|
457
|
+
|
|
418
458
|
def test_unauthorized_push_to_hub():
|
|
419
459
|
pass
|
modaic-0.2.0/tests/test_auto.py
DELETED
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pathlib
|
|
3
|
-
import shutil
|
|
4
|
-
import subprocess
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
|
|
9
|
-
from modaic import AutoAgent, AutoConfig, AutoRetriever
|
|
10
|
-
from modaic.hub import MODAIC_CACHE, get_user_info
|
|
11
|
-
from tests.testing_utils import delete_agent_repo
|
|
12
|
-
|
|
13
|
-
MODAIC_TOKEN = os.getenv("MODAIC_TOKEN")
|
|
14
|
-
INSTALL_TEST_REPO_DEPS = os.getenv("INSTALL_TEST_REPO_DEPS", "True").lower() == "true"
|
|
15
|
-
USERNAME = get_user_info(os.environ["MODAIC_TOKEN"])["login"]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def get_cached_agent_dir(repo_name: str) -> Path:
|
|
19
|
-
return MODAIC_CACHE / "agents" / repo_name
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def clean_modaic_cache() -> None:
|
|
23
|
-
"""Remove the MODAIC cache directory if it exists.
|
|
24
|
-
|
|
25
|
-
Params:
|
|
26
|
-
None
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
None
|
|
30
|
-
"""
|
|
31
|
-
shutil.rmtree(MODAIC_CACHE, ignore_errors=True)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def prepare_repo(repo_name: str) -> None:
|
|
35
|
-
"""Clean cache and ensure remote hub repo is deleted before test run.
|
|
36
|
-
|
|
37
|
-
Params:
|
|
38
|
-
repo_name (str): The name of the test repository in artifacts/test_repos.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
None
|
|
42
|
-
"""
|
|
43
|
-
clean_modaic_cache()
|
|
44
|
-
if not MODAIC_TOKEN:
|
|
45
|
-
pytest.skip("Skipping because MODAIC_TOKEN is not set")
|
|
46
|
-
delete_agent_repo(username=USERNAME, agent_name=repo_name)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def run_script(repo_name: str, run_path: str = "compile.py", module_mode: bool = False) -> None:
|
|
50
|
-
"""Run the repository's compile script inside its own uv environment.
|
|
51
|
-
|
|
52
|
-
Params:
|
|
53
|
-
repo_name (str): The name of the test repository directory to compile.
|
|
54
|
-
|
|
55
|
-
Returns:
|
|
56
|
-
None
|
|
57
|
-
"""
|
|
58
|
-
env = os.environ.copy()
|
|
59
|
-
env.update(
|
|
60
|
-
{
|
|
61
|
-
"MODAIC_CACHE": "../../temp/modaic_cache",
|
|
62
|
-
}
|
|
63
|
-
)
|
|
64
|
-
repo_dir = pathlib.Path("tests/artifacts/test_repos") / repo_name
|
|
65
|
-
if INSTALL_TEST_REPO_DEPS:
|
|
66
|
-
subprocess.run(["uv", "sync"], cwd=repo_dir, check=True, env=env)
|
|
67
|
-
# Ensure the root package is available in the subproject env
|
|
68
|
-
if module_mode:
|
|
69
|
-
subprocess.run(["uv", "run", "-m", run_path, USERNAME], cwd=repo_dir, check=True, env=env)
|
|
70
|
-
else:
|
|
71
|
-
subprocess.run(["uv", "run", run_path, USERNAME], cwd=repo_dir, check=True, env=env)
|
|
72
|
-
# clean cache
|
|
73
|
-
shutil.rmtree("tests/artifacts/temp/modaic_cache", ignore_errors=True)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def test_simple_repo() -> None:
|
|
77
|
-
prepare_repo("simple_repo")
|
|
78
|
-
run_script("simple_repo", run_path="agent.py")
|
|
79
|
-
clean_modaic_cache()
|
|
80
|
-
config = AutoConfig.from_precompiled(f"{USERNAME}/simple_repo")
|
|
81
|
-
assert config.lm == "openai/gpt-4o"
|
|
82
|
-
assert config.output_type == "str"
|
|
83
|
-
assert config.number == 1
|
|
84
|
-
cache_dir = get_cached_agent_dir(f"{USERNAME}/simple_repo")
|
|
85
|
-
assert os.path.exists(cache_dir / "config.json")
|
|
86
|
-
assert os.path.exists(cache_dir / "agent.json")
|
|
87
|
-
assert os.path.exists(cache_dir / "auto_classes.json")
|
|
88
|
-
assert os.path.exists(cache_dir / "README.md")
|
|
89
|
-
assert os.path.exists(cache_dir / "agent.py")
|
|
90
|
-
assert os.path.exists(cache_dir / "pyproject.toml")
|
|
91
|
-
clean_modaic_cache()
|
|
92
|
-
agent = AutoAgent.from_precompiled(f"{USERNAME}/simple_repo", runtime_param="Hello")
|
|
93
|
-
assert agent.config.lm == "openai/gpt-4o"
|
|
94
|
-
assert agent.config.output_type == "str"
|
|
95
|
-
assert agent.config.number == 1
|
|
96
|
-
assert agent.runtime_param == "Hello"
|
|
97
|
-
clean_modaic_cache()
|
|
98
|
-
agent = AutoAgent.from_precompiled(
|
|
99
|
-
f"{USERNAME}/simple_repo", runtime_param="Hello", config_options={"lm": "openai/gpt-4o-mini"}
|
|
100
|
-
)
|
|
101
|
-
assert agent.config.lm == "openai/gpt-4o-mini"
|
|
102
|
-
assert agent.config.output_type == "str"
|
|
103
|
-
assert agent.config.number == 1
|
|
104
|
-
assert agent.runtime_param == "Hello"
|
|
105
|
-
# TODO: test third party deps installation
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def test_simple_repo_with_compile():
|
|
109
|
-
prepare_repo("simple_repo_with_compile")
|
|
110
|
-
run_script("simple_repo_with_compile", run_path="compile.py")
|
|
111
|
-
clean_modaic_cache()
|
|
112
|
-
config = AutoConfig.from_precompiled(f"{USERNAME}/simple_repo_with_compile")
|
|
113
|
-
assert config.lm == "openai/gpt-4o"
|
|
114
|
-
assert config.output_type == "str"
|
|
115
|
-
assert config.number == 1
|
|
116
|
-
cache_dir = get_cached_agent_dir(f"{USERNAME}/simple_repo_with_compile")
|
|
117
|
-
assert os.path.exists(cache_dir / "config.json")
|
|
118
|
-
assert os.path.exists(cache_dir / "agent.json")
|
|
119
|
-
assert os.path.exists(cache_dir / "auto_classes.json")
|
|
120
|
-
assert os.path.exists(cache_dir / "README.md")
|
|
121
|
-
assert os.path.exists(cache_dir / "agent" / "agent.py")
|
|
122
|
-
assert os.path.exists(cache_dir / "agent" / "mod.py")
|
|
123
|
-
assert os.path.exists(cache_dir / "pyproject.toml")
|
|
124
|
-
clean_modaic_cache()
|
|
125
|
-
agent = AutoAgent.from_precompiled(f"{USERNAME}/simple_repo_with_compile", runtime_param="Hello")
|
|
126
|
-
assert agent.config.lm == "openai/gpt-4o"
|
|
127
|
-
assert agent.config.output_type == "str"
|
|
128
|
-
assert agent.config.number == 1
|
|
129
|
-
assert agent.runtime_param == "Hello"
|
|
130
|
-
clean_modaic_cache()
|
|
131
|
-
agent = AutoAgent.from_precompiled(
|
|
132
|
-
f"{USERNAME}/simple_repo_with_compile", runtime_param="Hello", config_options={"lm": "openai/gpt-4o-mini"}
|
|
133
|
-
)
|
|
134
|
-
assert agent.config.lm == "openai/gpt-4o-mini"
|
|
135
|
-
assert agent.config.output_type == "str"
|
|
136
|
-
assert agent.config.number == 1
|
|
137
|
-
assert agent.runtime_param == "Hello"
|
|
138
|
-
# TODO: test third party deps installation
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
@pytest.mark.parametrize("repo_name", ["nested_repo", "nested_repo_2", "nested_repo_3"])
|
|
142
|
-
def test_nested_repo(repo_name: str):
|
|
143
|
-
prepare_repo(repo_name)
|
|
144
|
-
if repo_name == "nested_repo":
|
|
145
|
-
run_script(repo_name, run_path="agent.compile", module_mode=True)
|
|
146
|
-
elif repo_name == "nested_repo_2":
|
|
147
|
-
run_script(repo_name, run_path="compile.py")
|
|
148
|
-
else:
|
|
149
|
-
run_script(repo_name, run_path="agent.agent", module_mode=True)
|
|
150
|
-
clean_modaic_cache()
|
|
151
|
-
config = AutoConfig.from_precompiled(f"{USERNAME}/{repo_name}", clients={"get_replaced": "noob"})
|
|
152
|
-
assert config.num_fetch == 1
|
|
153
|
-
assert config.lm == "openai/gpt-4o-mini"
|
|
154
|
-
assert config.embedder == "openai/text-embedding-3-small"
|
|
155
|
-
assert config.clients == {"get_replaced": "noob"}
|
|
156
|
-
clean_modaic_cache()
|
|
157
|
-
retriever = AutoRetriever.from_precompiled(f"{USERNAME}/{repo_name}", needed_param="hello")
|
|
158
|
-
agent = AutoAgent.from_precompiled(f"{USERNAME}/{repo_name}", retriever=retriever)
|
|
159
|
-
assert agent.config.num_fetch == 1
|
|
160
|
-
assert agent.config.lm == "openai/gpt-4o-mini"
|
|
161
|
-
assert agent.config.embedder == "openai/text-embedding-3-small"
|
|
162
|
-
assert agent.config.clients == {"mit": ["csail", "mit-media-lab"], "berkeley": ["bear"]}
|
|
163
|
-
assert retriever.needed_param == "hello"
|
|
164
|
-
assert agent.forward("my query") == "Retrieved 1 results for my query"
|
|
165
|
-
clean_modaic_cache()
|
|
166
|
-
config_options = {"lm": "openai/gpt-4o"}
|
|
167
|
-
retriever = AutoRetriever.from_precompiled(
|
|
168
|
-
f"{USERNAME}/{repo_name}", needed_param="hello", config_options=config_options
|
|
169
|
-
)
|
|
170
|
-
agent = AutoAgent.from_precompiled(f"{USERNAME}/{repo_name}", retriever=retriever, config_options=config_options)
|
|
171
|
-
assert agent.config.num_fetch == 1
|
|
172
|
-
assert agent.config.lm == "openai/gpt-4o"
|
|
173
|
-
assert agent.config.embedder == "openai/text-embedding-3-small"
|
|
174
|
-
assert agent.config.clients == {"mit": ["csail", "mit-media-lab"], "berkeley": ["bear"]}
|
|
175
|
-
assert retriever.needed_param == "hello"
|
|
176
|
-
assert agent.forward("my query") == "Retrieved 1 results for my query"
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def test_auth():
|
|
180
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|