modaic 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modaic might be problematic. Click here for more details.
- modaic/agents/rag_agent.py +5 -2
- modaic/databases/sql_database.py +49 -19
- modaic/exceptions.py +9 -0
- modaic/precompiled.py +97 -5
- {modaic-0.3.0.dist-info → modaic-0.4.0.dist-info}/METADATA +1 -1
- {modaic-0.3.0.dist-info → modaic-0.4.0.dist-info}/RECORD +9 -9
- {modaic-0.3.0.dist-info → modaic-0.4.0.dist-info}/WHEEL +0 -0
- {modaic-0.3.0.dist-info → modaic-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {modaic-0.3.0.dist-info → modaic-0.4.0.dist-info}/top_level.txt +0 -0
modaic/agents/rag_agent.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
from modaic import Indexer, PrecompiledAgent, PrecompiledConfig
|
|
4
4
|
from modaic.context import Context
|
|
@@ -19,7 +19,10 @@ class RAGAgentConfig(PrecompiledConfig):
|
|
|
19
19
|
|
|
20
20
|
@builtin_indexer(agent_name)
|
|
21
21
|
class RAGIndexer(Indexer):
|
|
22
|
-
def
|
|
22
|
+
def __init__(self, config: RAGAgentConfig):
|
|
23
|
+
super().__init__(config)
|
|
24
|
+
|
|
25
|
+
def index(self, contents: Any):
|
|
23
26
|
pass
|
|
24
27
|
|
|
25
28
|
|
modaic/databases/sql_database.py
CHANGED
|
@@ -5,7 +5,19 @@ from typing import Any, Callable, Iterable, List, Literal, Optional, Tuple
|
|
|
5
5
|
from urllib.parse import urlencode
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from sqlalchemy import
|
|
8
|
+
from sqlalchemy import (
|
|
9
|
+
JSON,
|
|
10
|
+
Column,
|
|
11
|
+
CursorResult,
|
|
12
|
+
Index,
|
|
13
|
+
MetaData,
|
|
14
|
+
PrimaryKeyConstraint,
|
|
15
|
+
String,
|
|
16
|
+
Text,
|
|
17
|
+
create_engine,
|
|
18
|
+
inspect,
|
|
19
|
+
text,
|
|
20
|
+
)
|
|
9
21
|
from sqlalchemy import Table as SQLTable
|
|
10
22
|
from sqlalchemy.dialects import sqlite
|
|
11
23
|
from sqlalchemy.orm import sessionmaker
|
|
@@ -81,30 +93,43 @@ class SQLiteBackend(SQLDatabaseBackend):
|
|
|
81
93
|
|
|
82
94
|
|
|
83
95
|
class SQLDatabase:
|
|
96
|
+
METADATA_TABLE_NAME = "modaic_metadata"
|
|
97
|
+
|
|
84
98
|
def __init__(
|
|
85
99
|
self,
|
|
86
100
|
backend: SQLDatabaseBackend | str,
|
|
87
101
|
engine_kwargs: dict = None, # TODO: This may not be a smart idea, may want to enforce specific kwargs
|
|
88
102
|
session_kwargs: dict = None, # TODO: This may not be a smart idea, may want to enforce specific kwargs
|
|
103
|
+
track_metadata: bool = False,
|
|
89
104
|
):
|
|
90
105
|
self.url = backend.url if isinstance(backend, SQLDatabaseBackend) else backend
|
|
91
106
|
self.engine = create_engine(self.url, **(engine_kwargs or {}))
|
|
92
107
|
self.metadata = MetaData()
|
|
93
108
|
self.session = sessionmaker(bind=self.engine, **(session_kwargs or {}))
|
|
94
109
|
self.inspector = inspect(self.engine)
|
|
95
|
-
self.preparer =
|
|
110
|
+
self.preparer = self.engine.dialect.identifier_preparer
|
|
96
111
|
|
|
97
112
|
# Create metadata table to store table metadata
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
113
|
+
if track_metadata:
|
|
114
|
+
self._ensure_metadata_table()
|
|
115
|
+
self.metadata.reflect(bind=self.engine)
|
|
116
|
+
self.metadata_table: Optional[Table] = (
|
|
117
|
+
self.metadata.tables[self.METADATA_TABLE_NAME] if track_metadata else None
|
|
103
118
|
)
|
|
104
|
-
self.metadata.create_all(self.engine)
|
|
105
119
|
self.connection = None
|
|
106
120
|
self._in_transaction = False
|
|
107
121
|
|
|
122
|
+
def _ensure_metadata_table(self) -> None:
|
|
123
|
+
"""Create the metadata table if missing."""
|
|
124
|
+
if not self.inspector.has_table(self.METADATA_TABLE_NAME):
|
|
125
|
+
SQLTable(
|
|
126
|
+
self.METADATA_TABLE_NAME,
|
|
127
|
+
self.metadata,
|
|
128
|
+
Column("table_name", String(255), primary_key=True),
|
|
129
|
+
Column("metadata_json", Text),
|
|
130
|
+
)
|
|
131
|
+
self.metadata.create_all(self.engine)
|
|
132
|
+
|
|
108
133
|
def add_table(
|
|
109
134
|
self,
|
|
110
135
|
table: BaseTable,
|
|
@@ -115,17 +140,17 @@ class SQLDatabase:
|
|
|
115
140
|
with self.connect() as connection:
|
|
116
141
|
# Use the connection for to_sql to respect transaction context
|
|
117
142
|
table._df.to_sql(table.name, connection, if_exists=if_exists, index=False)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
143
|
+
if self.metadata_table is not None:
|
|
144
|
+
# Remove existing metadata for this table if it exists
|
|
145
|
+
connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == table.name))
|
|
146
|
+
|
|
147
|
+
# Insert new metadata
|
|
148
|
+
connection.execute(
|
|
149
|
+
self.metadata_table.insert().values(
|
|
150
|
+
table_name=table.name,
|
|
151
|
+
metadata_json=json.dumps(table.metadata),
|
|
152
|
+
)
|
|
127
153
|
)
|
|
128
|
-
)
|
|
129
154
|
if self._should_commit():
|
|
130
155
|
connection.commit()
|
|
131
156
|
|
|
@@ -151,7 +176,8 @@ class SQLDatabase:
|
|
|
151
176
|
command = text(f"DROP TABLE {if_exists} {safe_name}")
|
|
152
177
|
connection.execute(command)
|
|
153
178
|
# Also remove metadata for this table
|
|
154
|
-
|
|
179
|
+
if self.metadata_table is not None:
|
|
180
|
+
connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == name))
|
|
155
181
|
if self._should_commit():
|
|
156
182
|
connection.commit()
|
|
157
183
|
|
|
@@ -197,6 +223,10 @@ class SQLDatabase:
|
|
|
197
223
|
Returns:
|
|
198
224
|
Dictionary containing the table's metadata, or empty dict if not found.
|
|
199
225
|
"""
|
|
226
|
+
if self.metadata_table is None:
|
|
227
|
+
raise ValueError(
|
|
228
|
+
"Metadata table is not enabled. Please enable metadata tracking when initializing the SQLDatabase. with track_metadata=True."
|
|
229
|
+
)
|
|
200
230
|
with self.connect() as connection:
|
|
201
231
|
result = connection.execute(
|
|
202
232
|
self.metadata_table.select().where(self.metadata_table.c.table_name == name)
|
modaic/exceptions.py
CHANGED
|
@@ -36,3 +36,12 @@ class BackendCompatibilityError(ModaicError):
|
|
|
36
36
|
"""Raised when a feature is not supported by a backend"""
|
|
37
37
|
|
|
38
38
|
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class MissingSecretError(AuthenticationError):
|
|
42
|
+
"""Raised when a secret is missing"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, message: str, secret_name: str):
|
|
45
|
+
self.message = message
|
|
46
|
+
self.secret_name = secret_name
|
|
47
|
+
super().__init__(message)
|
modaic/precompiled.py
CHANGED
|
@@ -2,12 +2,13 @@ import inspect
|
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
4
|
import pathlib
|
|
5
|
+
import warnings
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import (
|
|
8
9
|
TYPE_CHECKING,
|
|
10
|
+
Any,
|
|
9
11
|
Dict,
|
|
10
|
-
List,
|
|
11
12
|
Optional,
|
|
12
13
|
Type,
|
|
13
14
|
TypeVar,
|
|
@@ -15,11 +16,13 @@ from typing import (
|
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
import dspy
|
|
19
|
+
from git import config
|
|
18
20
|
from pydantic import BaseModel
|
|
19
21
|
|
|
20
22
|
from modaic.module_utils import create_agent_repo
|
|
21
23
|
from modaic.observability import Trackable, track_modaic_obj
|
|
22
24
|
|
|
25
|
+
from .exceptions import MissingSecretError
|
|
23
26
|
from .hub import load_repo, push_folder_to_hub
|
|
24
27
|
from .module_utils import _module_path
|
|
25
28
|
|
|
@@ -235,15 +238,25 @@ class PrecompiledAgent(dspy.Module):
|
|
|
235
238
|
extra_auto_classes["AutoRetriever"] = self.retriever
|
|
236
239
|
self.config.save_precompiled(path, extra_auto_classes)
|
|
237
240
|
self.save(path / "agent.json")
|
|
241
|
+
_clean_secrets(path / "agent.json")
|
|
238
242
|
|
|
239
243
|
@classmethod
|
|
240
|
-
def from_precompiled(
|
|
244
|
+
def from_precompiled(
|
|
245
|
+
cls: Type[A],
|
|
246
|
+
path: str | Path,
|
|
247
|
+
config_options: Optional[dict] = None,
|
|
248
|
+
api_key: Optional[str | dict[str, str]] = None,
|
|
249
|
+
hf_token: Optional[str | dict[str, str]] = None,
|
|
250
|
+
**kwargs,
|
|
251
|
+
) -> A:
|
|
241
252
|
"""
|
|
242
253
|
Loads the agent and the config from the given path.
|
|
243
254
|
|
|
244
255
|
Args:
|
|
245
256
|
path: The path to load the agent and config from. Can be a local path or a path on Modaic Hub.
|
|
246
257
|
config_options: A dictionary containg key-value pairs used to override the default config.
|
|
258
|
+
api_key: Your API key.
|
|
259
|
+
hf_token: Your Hugging Face token.
|
|
247
260
|
**kwargs: Additional keyword arguments forwarded to the PrecompiledAgent's constructor.
|
|
248
261
|
|
|
249
262
|
Returns:
|
|
@@ -261,7 +274,9 @@ class PrecompiledAgent(dspy.Module):
|
|
|
261
274
|
agent = cls(config, **kwargs)
|
|
262
275
|
agent_state_path = local_dir / "agent.json"
|
|
263
276
|
if agent_state_path.exists():
|
|
264
|
-
|
|
277
|
+
secrets = {"api_key": api_key, "hf_token": hf_token}
|
|
278
|
+
state = _get_state_with_secrets(agent_state_path, secrets)
|
|
279
|
+
agent.load_state(state)
|
|
265
280
|
return agent
|
|
266
281
|
|
|
267
282
|
def push_to_hub(
|
|
@@ -375,7 +390,7 @@ class Indexer(Retriever):
|
|
|
375
390
|
config: PrecompiledConfig
|
|
376
391
|
|
|
377
392
|
@abstractmethod
|
|
378
|
-
def
|
|
393
|
+
def index(self, contents: Any, **kwargs):
|
|
379
394
|
pass
|
|
380
395
|
|
|
381
396
|
|
|
@@ -405,11 +420,88 @@ def _push_to_hub(
|
|
|
405
420
|
|
|
406
421
|
def is_local_path(s: str | Path) -> bool:
|
|
407
422
|
# absolute or relative filesystem path
|
|
423
|
+
if isinstance(s, Path):
|
|
424
|
+
return True
|
|
408
425
|
s = str(s)
|
|
426
|
+
|
|
427
|
+
print("SSSS", s)
|
|
409
428
|
if os.path.isabs(s) or s.startswith((".", "/", "\\")):
|
|
410
429
|
return True
|
|
411
430
|
parts = s.split("/")
|
|
412
431
|
# hub IDs: "repo" or "user/repo"
|
|
413
|
-
if len(parts) == 1
|
|
432
|
+
if len(parts) == 1:
|
|
433
|
+
raise ValueError(
|
|
434
|
+
f"Invalid repo: '{s}'. Please prefix local paths with './', '/', or '../' . And use 'user/repo' format for hub paths."
|
|
435
|
+
)
|
|
436
|
+
elif len(parts) == 2 and all(parts):
|
|
414
437
|
return False
|
|
415
438
|
return True
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
SECRET_MASK = "********"
|
|
442
|
+
COMMON_SECRETS = ["api_key", "hf_token"]
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _clean_secrets(path: Path, extra_secrets: Optional[list[str]] = None):
|
|
446
|
+
"""
|
|
447
|
+
Removes all secret keys from `lm` dict in agent.json file
|
|
448
|
+
"""
|
|
449
|
+
secret_keys = COMMON_SECRETS + (extra_secrets or [])
|
|
450
|
+
|
|
451
|
+
with open(path, "r") as f:
|
|
452
|
+
d = json.load(f)
|
|
453
|
+
|
|
454
|
+
for predictor in d.values():
|
|
455
|
+
lm = predictor.get("lm", None)
|
|
456
|
+
if lm is None:
|
|
457
|
+
continue
|
|
458
|
+
for k in lm.keys():
|
|
459
|
+
if k in secret_keys:
|
|
460
|
+
lm[k] = SECRET_MASK
|
|
461
|
+
|
|
462
|
+
with open(path, "w") as f:
|
|
463
|
+
json.dump(d, f, indent=2)
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _get_state_with_secrets(path: Path, secrets: dict[str, str | dict[str, str] | None]):
|
|
467
|
+
"""`
|
|
468
|
+
Fills secret keys in `lm` dict in agent.json file
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
path: The path to the agent.json file.
|
|
472
|
+
secrets: A dictionary containing the secrets to fill in the `lm` dict.
|
|
473
|
+
- Dict[k,v] where k is the name of a secret (e.g. "api_key") and v is the value of the secret
|
|
474
|
+
- If v is a string, every lm will use v for k
|
|
475
|
+
- if v is a dict, each key of v should be the name of a named predictor
|
|
476
|
+
(e.g. "my_module.predict", "my_module.summarizer") mapping to the secret value for that predictor
|
|
477
|
+
Returns:
|
|
478
|
+
A dictionary containing the state of the agent.json file with the secrets filled in.
|
|
479
|
+
"""
|
|
480
|
+
with open(path, "r") as f:
|
|
481
|
+
named_predictors = json.load(f)
|
|
482
|
+
|
|
483
|
+
def _get_secret(predictor_name: str, secret_name: str) -> Optional[str]:
|
|
484
|
+
if secret_val := secrets.get(secret_name):
|
|
485
|
+
if isinstance(secret_val, str):
|
|
486
|
+
return secret_val
|
|
487
|
+
elif isinstance(secret_val, dict):
|
|
488
|
+
return secret_val.get(predictor_name)
|
|
489
|
+
return None
|
|
490
|
+
|
|
491
|
+
for predictor_name, predictor in named_predictors.items():
|
|
492
|
+
lm = predictor.get("lm", {})
|
|
493
|
+
for kw, arg in lm.items():
|
|
494
|
+
if kw in COMMON_SECRETS and arg != "" and arg != SECRET_MASK:
|
|
495
|
+
warnings.warn(
|
|
496
|
+
f"{str(path)} exposes the secret key {kw}. Please remove it or ensure this file is not made public."
|
|
497
|
+
)
|
|
498
|
+
secret = _get_secret(predictor_name, kw)
|
|
499
|
+
if secret is not None and arg != "" and arg != SECRET_MASK:
|
|
500
|
+
raise ValueError(
|
|
501
|
+
f"Failed to fill insert secret value for {predictor_name}['lm']['{kw}']. It is already set to {arg}"
|
|
502
|
+
)
|
|
503
|
+
elif secret is None and kw in COMMON_SECRETS:
|
|
504
|
+
raise MissingSecretError(f"Please specify a value for {kw} in the secrets dictionary", kw)
|
|
505
|
+
elif secret is not None:
|
|
506
|
+
lm[kw] = secret
|
|
507
|
+
return named_predictors
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modaic
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Modular Agent Infrastructure Collection, a python framework for managing and sharing DSPy agents
|
|
5
5
|
Author-email: Tyrin <tytodd@mit.edu>, Farouk <farouk@modaic.dev>
|
|
6
6
|
License: MIT License
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
modaic/__init__.py,sha256=xHu2SUk3OMvb8PIzrVCRS1pBk-Ho9BhwmzKOf_bOjGc,809
|
|
2
2
|
modaic/auto.py,sha256=rPOdQ7s-YGBQLa_v6lVONH8pbOrarPwp4VzRErp0y5c,9091
|
|
3
3
|
modaic/datasets.py,sha256=K-PpPSYIxJI0-yH-SBVpk_EfCM9i_uPz-brmlzP7hzI,513
|
|
4
|
-
modaic/exceptions.py,sha256=
|
|
4
|
+
modaic/exceptions.py,sha256=Vq-eWEPSiqA3G3HaI2uZ_3Uwg4aIvulDnyOFAZc1FBk,903
|
|
5
5
|
modaic/hub.py,sha256=d5HQjaE26K1qNCBc32qJtrpESyRv6OiniAteasiN_rk,11290
|
|
6
6
|
modaic/indexing.py,sha256=VdILiXiLVzgV1pSTV8Ho7x1dZtd31Y9z60d_Qtqr2NU,4195
|
|
7
7
|
modaic/module_utils.py,sha256=I6kGCmGSuHM9lxz8rpCIkdFHCh0M7OIyw3GZ3966YWY,13442
|
|
8
8
|
modaic/observability.py,sha256=LgR4gJM4DhD-xlVX52mzRQSPgLQzbeh2LYPmQVqSh-A,9947
|
|
9
|
-
modaic/precompiled.py,sha256=
|
|
9
|
+
modaic/precompiled.py,sha256=7__DgYaVXFE7xZR6_jtRAVxUw7v0E59zcPWlGfs7raU,18303
|
|
10
10
|
modaic/query_language.py,sha256=BJIigR0HLapiIn9fF7jM7PkLM8OWUDjwYuxmzcCVvyo,9487
|
|
11
11
|
modaic/types.py,sha256=gcx8J4oxrHwxA7McyYV4OKHsuPhhmowJtJIgjJQbLto,10081
|
|
12
12
|
modaic/utils.py,sha256=doJs-XL4TswSQFBINZeKrik-cvjZk-tS9XmWH8fOYiw,794
|
|
13
|
-
modaic/agents/rag_agent.py,sha256=
|
|
13
|
+
modaic/agents/rag_agent.py,sha256=h17YQHXGRPMFn_liUDRNFYc2m0U5KXZ16Q0UELCqmEE,844
|
|
14
14
|
modaic/agents/registry.py,sha256=z6GuPxGrq2dinCamiMJ_HVPsD9Tp9XWDUSMZ-uhWPrU,2446
|
|
15
15
|
modaic/context/__init__.py,sha256=FK-bxSu36yGFF1rATy4Yzl4Fpv9kYOlRpBRfr_4moiM,560
|
|
16
16
|
modaic/context/base.py,sha256=x66_lcdQ063DiluC6UnFEH4etgJkbAyukrgrp2KLV5U,40368
|
|
@@ -19,7 +19,7 @@ modaic/context/table.py,sha256=9Lh2_UyK3OsWmgUfZQa8jDeVAPAk_iueT89_cruDeSo,17699
|
|
|
19
19
|
modaic/context/text.py,sha256=gCVQx15FrPcmpr2GYkJca6noh7fw_nTcCt-hISwcnvQ,2581
|
|
20
20
|
modaic/databases/__init__.py,sha256=-w_yiY-Sqi1SgcPD5oAQL7MU4VXTihPa1GYGlrHfsFw,784
|
|
21
21
|
modaic/databases/graph_database.py,sha256=j44PgWGMeD3dtPZe5sRpKnruTA3snm_TiXncusTzqIQ,9990
|
|
22
|
-
modaic/databases/sql_database.py,sha256=
|
|
22
|
+
modaic/databases/sql_database.py,sha256=62h9GtZv8gEAo1qHOQsxPL9bJPwweNmWLcRjYcpFIMY,13037
|
|
23
23
|
modaic/databases/vector_database/__init__.py,sha256=sN1SuSAMC9NHJDOa80BN_olccaHgmiW2Ek57hBvdZWo,306
|
|
24
24
|
modaic/databases/vector_database/vector_database.py,sha256=RsuRemgFV06opY26CekqLLRoAEFYOGl_CMuFETrYS0c,25238
|
|
25
25
|
modaic/databases/vector_database/benchmarks/baseline.py,sha256=ZhiYzHnizsLesAIQA93RhWaaYxEZp6ygExmnBhU9Dio,5209
|
|
@@ -33,8 +33,8 @@ modaic/databases/vector_database/vendors/qdrant.py,sha256=AbpHGcgLb-kRsJGnwFEktk
|
|
|
33
33
|
modaic/storage/__init__.py,sha256=Zs-Y_9jfYUE8XVp8z-El0ZXFM_ZVMqM9aQ6fgGPZsf8,131
|
|
34
34
|
modaic/storage/file_store.py,sha256=kSS7gTP_-16wR3Xgq3frF1BZ8Dw8N--kG4V9rrCXPcc,7315
|
|
35
35
|
modaic/storage/pickle_store.py,sha256=fu9jkmmKNE852Y4R1NhOFePLfd2gskhHSXxuq1G1S3I,778
|
|
36
|
-
modaic-0.
|
|
37
|
-
modaic-0.
|
|
38
|
-
modaic-0.
|
|
39
|
-
modaic-0.
|
|
40
|
-
modaic-0.
|
|
36
|
+
modaic-0.4.0.dist-info/licenses/LICENSE,sha256=7LMx9j453Vz1DoQbFot8Uhp9SExF5wlOx7c8vw2qhsE,1333
|
|
37
|
+
modaic-0.4.0.dist-info/METADATA,sha256=g7EbTl48wysK-lb6XnfBspOb85iH3oQXmllD-Tnplls,8651
|
|
38
|
+
modaic-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
39
|
+
modaic-0.4.0.dist-info/top_level.txt,sha256=RXWGuF-TsW8-17DveTJMPRiAgg_Rf2mq5F3R7tNu6t8,7
|
|
40
|
+
modaic-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|