modaic 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of modaic might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import Any
2
2
 
3
3
  from modaic import Indexer, PrecompiledAgent, PrecompiledConfig
4
4
  from modaic.context import Context
@@ -19,7 +19,10 @@ class RAGAgentConfig(PrecompiledConfig):
19
19
 
20
20
  @builtin_indexer(agent_name)
21
21
  class RAGIndexer(Indexer):
22
- def ingest(self, config: RAGAgentConfig, contexts: List[Context]):
22
+ def __init__(self, config: RAGAgentConfig):
23
+ super().__init__(config)
24
+
25
+ def index(self, contents: Any):
23
26
  pass
24
27
 
25
28
 
@@ -5,7 +5,19 @@ from typing import Any, Callable, Iterable, List, Literal, Optional, Tuple
5
5
  from urllib.parse import urlencode
6
6
 
7
7
  import pandas as pd
8
- from sqlalchemy import Column, CursorResult, MetaData, String, Text, create_engine, inspect, text
8
+ from sqlalchemy import (
9
+ JSON,
10
+ Column,
11
+ CursorResult,
12
+ Index,
13
+ MetaData,
14
+ PrimaryKeyConstraint,
15
+ String,
16
+ Text,
17
+ create_engine,
18
+ inspect,
19
+ text,
20
+ )
9
21
  from sqlalchemy import Table as SQLTable
10
22
  from sqlalchemy.dialects import sqlite
11
23
  from sqlalchemy.orm import sessionmaker
@@ -81,30 +93,43 @@ class SQLiteBackend(SQLDatabaseBackend):
81
93
 
82
94
 
83
95
  class SQLDatabase:
96
+ METADATA_TABLE_NAME = "modaic_metadata"
97
+
84
98
  def __init__(
85
99
  self,
86
100
  backend: SQLDatabaseBackend | str,
87
101
  engine_kwargs: dict = None, # TODO: This may not be a smart idea, may want to enforce specific kwargs
88
102
  session_kwargs: dict = None, # TODO: This may not be a smart idea, may want to enforce specific kwargs
103
+ track_metadata: bool = False,
89
104
  ):
90
105
  self.url = backend.url if isinstance(backend, SQLDatabaseBackend) else backend
91
106
  self.engine = create_engine(self.url, **(engine_kwargs or {}))
92
107
  self.metadata = MetaData()
93
108
  self.session = sessionmaker(bind=self.engine, **(session_kwargs or {}))
94
109
  self.inspector = inspect(self.engine)
95
- self.preparer = IdentifierPreparer(sqlite.dialect())
110
+ self.preparer = self.engine.dialect.identifier_preparer
96
111
 
97
112
  # Create metadata table to store table metadata
98
- self.metadata_table = SQLTable(
99
- "metadata",
100
- self.metadata,
101
- Column("table_name", String(255), primary_key=True),
102
- Column("metadata_json", Text),
113
+ if track_metadata:
114
+ self._ensure_metadata_table()
115
+ self.metadata.reflect(bind=self.engine)
116
+ self.metadata_table: Optional[Table] = (
117
+ self.metadata.tables[self.METADATA_TABLE_NAME] if track_metadata else None
103
118
  )
104
- self.metadata.create_all(self.engine)
105
119
  self.connection = None
106
120
  self._in_transaction = False
107
121
 
122
+ def _ensure_metadata_table(self) -> None:
123
+ """Create the metadata table if missing."""
124
+ if not self.inspector.has_table(self.METADATA_TABLE_NAME):
125
+ SQLTable(
126
+ self.METADATA_TABLE_NAME,
127
+ self.metadata,
128
+ Column("table_name", String(255), primary_key=True),
129
+ Column("metadata_json", Text),
130
+ )
131
+ self.metadata.create_all(self.engine)
132
+
108
133
  def add_table(
109
134
  self,
110
135
  table: BaseTable,
@@ -115,17 +140,17 @@ class SQLDatabase:
115
140
  with self.connect() as connection:
116
141
  # Use the connection for to_sql to respect transaction context
117
142
  table._df.to_sql(table.name, connection, if_exists=if_exists, index=False)
118
-
119
- # Remove existing metadata for this table if it exists
120
- connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == table.name))
121
-
122
- # Insert new metadata
123
- connection.execute(
124
- self.metadata_table.insert().values(
125
- table_name=table.name,
126
- metadata_json=json.dumps(table.metadata),
143
+ if self.metadata_table is not None:
144
+ # Remove existing metadata for this table if it exists
145
+ connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == table.name))
146
+
147
+ # Insert new metadata
148
+ connection.execute(
149
+ self.metadata_table.insert().values(
150
+ table_name=table.name,
151
+ metadata_json=json.dumps(table.metadata),
152
+ )
127
153
  )
128
- )
129
154
  if self._should_commit():
130
155
  connection.commit()
131
156
 
@@ -151,7 +176,8 @@ class SQLDatabase:
151
176
  command = text(f"DROP TABLE {if_exists} {safe_name}")
152
177
  connection.execute(command)
153
178
  # Also remove metadata for this table
154
- connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == name))
179
+ if self.metadata_table is not None:
180
+ connection.execute(self.metadata_table.delete().where(self.metadata_table.c.table_name == name))
155
181
  if self._should_commit():
156
182
  connection.commit()
157
183
 
@@ -197,6 +223,10 @@ class SQLDatabase:
197
223
  Returns:
198
224
  Dictionary containing the table's metadata, or empty dict if not found.
199
225
  """
226
+ if self.metadata_table is None:
227
+ raise ValueError(
228
+ "Metadata table is not enabled. Please enable metadata tracking when initializing the SQLDatabase. with track_metadata=True."
229
+ )
200
230
  with self.connect() as connection:
201
231
  result = connection.execute(
202
232
  self.metadata_table.select().where(self.metadata_table.c.table_name == name)
modaic/exceptions.py CHANGED
@@ -36,3 +36,12 @@ class BackendCompatibilityError(ModaicError):
36
36
  """Raised when a feature is not supported by a backend"""
37
37
 
38
38
  pass
39
+
40
+
41
+ class MissingSecretError(AuthenticationError):
42
+ """Raised when a secret is missing"""
43
+
44
+ def __init__(self, message: str, secret_name: str):
45
+ self.message = message
46
+ self.secret_name = secret_name
47
+ super().__init__(message)
modaic/precompiled.py CHANGED
@@ -2,12 +2,13 @@ import inspect
2
2
  import json
3
3
  import os
4
4
  import pathlib
5
+ import warnings
5
6
  from abc import ABC, abstractmethod
6
7
  from pathlib import Path
7
8
  from typing import (
8
9
  TYPE_CHECKING,
10
+ Any,
9
11
  Dict,
10
- List,
11
12
  Optional,
12
13
  Type,
13
14
  TypeVar,
@@ -15,11 +16,13 @@ from typing import (
15
16
  )
16
17
 
17
18
  import dspy
19
+ from git import config
18
20
  from pydantic import BaseModel
19
21
 
20
22
  from modaic.module_utils import create_agent_repo
21
23
  from modaic.observability import Trackable, track_modaic_obj
22
24
 
25
+ from .exceptions import MissingSecretError
23
26
  from .hub import load_repo, push_folder_to_hub
24
27
  from .module_utils import _module_path
25
28
 
@@ -235,15 +238,25 @@ class PrecompiledAgent(dspy.Module):
235
238
  extra_auto_classes["AutoRetriever"] = self.retriever
236
239
  self.config.save_precompiled(path, extra_auto_classes)
237
240
  self.save(path / "agent.json")
241
+ _clean_secrets(path / "agent.json")
238
242
 
239
243
  @classmethod
240
- def from_precompiled(cls: Type[A], path: str | Path, config_options: Optional[dict] = None, **kwargs) -> A:
244
+ def from_precompiled(
245
+ cls: Type[A],
246
+ path: str | Path,
247
+ config_options: Optional[dict] = None,
248
+ api_key: Optional[str | dict[str, str]] = None,
249
+ hf_token: Optional[str | dict[str, str]] = None,
250
+ **kwargs,
251
+ ) -> A:
241
252
  """
242
253
  Loads the agent and the config from the given path.
243
254
 
244
255
  Args:
245
256
  path: The path to load the agent and config from. Can be a local path or a path on Modaic Hub.
246
257
  config_options: A dictionary containg key-value pairs used to override the default config.
258
+ api_key: Your API key.
259
+ hf_token: Your Hugging Face token.
247
260
  **kwargs: Additional keyword arguments forwarded to the PrecompiledAgent's constructor.
248
261
 
249
262
  Returns:
@@ -261,7 +274,9 @@ class PrecompiledAgent(dspy.Module):
261
274
  agent = cls(config, **kwargs)
262
275
  agent_state_path = local_dir / "agent.json"
263
276
  if agent_state_path.exists():
264
- agent.load(agent_state_path)
277
+ secrets = {"api_key": api_key, "hf_token": hf_token}
278
+ state = _get_state_with_secrets(agent_state_path, secrets)
279
+ agent.load_state(state)
265
280
  return agent
266
281
 
267
282
  def push_to_hub(
@@ -375,7 +390,7 @@ class Indexer(Retriever):
375
390
  config: PrecompiledConfig
376
391
 
377
392
  @abstractmethod
378
- def ingest(self, contexts: List["Context"], **kwargs):
393
+ def index(self, contents: Any, **kwargs):
379
394
  pass
380
395
 
381
396
 
@@ -405,11 +420,88 @@ def _push_to_hub(
405
420
 
406
421
  def is_local_path(s: str | Path) -> bool:
407
422
  # absolute or relative filesystem path
423
+ if isinstance(s, Path):
424
+ return True
408
425
  s = str(s)
426
+
427
+ print("SSSS", s)
409
428
  if os.path.isabs(s) or s.startswith((".", "/", "\\")):
410
429
  return True
411
430
  parts = s.split("/")
412
431
  # hub IDs: "repo" or "user/repo"
413
- if len(parts) == 1 or (len(parts) == 2 and all(parts)):
432
+ if len(parts) == 1:
433
+ raise ValueError(
434
+ f"Invalid repo: '{s}'. Please prefix local paths with './', '/', or '../' . And use 'user/repo' format for hub paths."
435
+ )
436
+ elif len(parts) == 2 and all(parts):
414
437
  return False
415
438
  return True
439
+
440
+
441
+ SECRET_MASK = "********"
442
+ COMMON_SECRETS = ["api_key", "hf_token"]
443
+
444
+
445
+ def _clean_secrets(path: Path, extra_secrets: Optional[list[str]] = None):
446
+ """
447
+ Removes all secret keys from `lm` dict in agent.json file
448
+ """
449
+ secret_keys = COMMON_SECRETS + (extra_secrets or [])
450
+
451
+ with open(path, "r") as f:
452
+ d = json.load(f)
453
+
454
+ for predictor in d.values():
455
+ lm = predictor.get("lm", None)
456
+ if lm is None:
457
+ continue
458
+ for k in lm.keys():
459
+ if k in secret_keys:
460
+ lm[k] = SECRET_MASK
461
+
462
+ with open(path, "w") as f:
463
+ json.dump(d, f, indent=2)
464
+
465
+
466
+ def _get_state_with_secrets(path: Path, secrets: dict[str, str | dict[str, str] | None]):
467
+ """`
468
+ Fills secret keys in `lm` dict in agent.json file
469
+
470
+ Args:
471
+ path: The path to the agent.json file.
472
+ secrets: A dictionary containing the secrets to fill in the `lm` dict.
473
+ - Dict[k,v] where k is the name of a secret (e.g. "api_key") and v is the value of the secret
474
+ - If v is a string, every lm will use v for k
475
+ - if v is a dict, each key of v should be the name of a named predictor
476
+ (e.g. "my_module.predict", "my_module.summarizer") mapping to the secret value for that predictor
477
+ Returns:
478
+ A dictionary containing the state of the agent.json file with the secrets filled in.
479
+ """
480
+ with open(path, "r") as f:
481
+ named_predictors = json.load(f)
482
+
483
+ def _get_secret(predictor_name: str, secret_name: str) -> Optional[str]:
484
+ if secret_val := secrets.get(secret_name):
485
+ if isinstance(secret_val, str):
486
+ return secret_val
487
+ elif isinstance(secret_val, dict):
488
+ return secret_val.get(predictor_name)
489
+ return None
490
+
491
+ for predictor_name, predictor in named_predictors.items():
492
+ lm = predictor.get("lm", {})
493
+ for kw, arg in lm.items():
494
+ if kw in COMMON_SECRETS and arg != "" and arg != SECRET_MASK:
495
+ warnings.warn(
496
+ f"{str(path)} exposes the secret key {kw}. Please remove it or ensure this file is not made public."
497
+ )
498
+ secret = _get_secret(predictor_name, kw)
499
+ if secret is not None and arg != "" and arg != SECRET_MASK:
500
+ raise ValueError(
501
+ f"Failed to fill insert secret value for {predictor_name}['lm']['{kw}']. It is already set to {arg}"
502
+ )
503
+ elif secret is None and kw in COMMON_SECRETS:
504
+ raise MissingSecretError(f"Please specify a value for {kw} in the secrets dictionary", kw)
505
+ elif secret is not None:
506
+ lm[kw] = secret
507
+ return named_predictors
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modaic
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Modular Agent Infrastructure Collection, a python framework for managing and sharing DSPy agents
5
5
  Author-email: Tyrin <tytodd@mit.edu>, Farouk <farouk@modaic.dev>
6
6
  License: MIT License
@@ -1,16 +1,16 @@
1
1
  modaic/__init__.py,sha256=xHu2SUk3OMvb8PIzrVCRS1pBk-Ho9BhwmzKOf_bOjGc,809
2
2
  modaic/auto.py,sha256=rPOdQ7s-YGBQLa_v6lVONH8pbOrarPwp4VzRErp0y5c,9091
3
3
  modaic/datasets.py,sha256=K-PpPSYIxJI0-yH-SBVpk_EfCM9i_uPz-brmlzP7hzI,513
4
- modaic/exceptions.py,sha256=XxzxOWjZTzT3l1BqTr7coJnVGxJq53uppRNrqP__YGo,651
4
+ modaic/exceptions.py,sha256=Vq-eWEPSiqA3G3HaI2uZ_3Uwg4aIvulDnyOFAZc1FBk,903
5
5
  modaic/hub.py,sha256=d5HQjaE26K1qNCBc32qJtrpESyRv6OiniAteasiN_rk,11290
6
6
  modaic/indexing.py,sha256=VdILiXiLVzgV1pSTV8Ho7x1dZtd31Y9z60d_Qtqr2NU,4195
7
7
  modaic/module_utils.py,sha256=I6kGCmGSuHM9lxz8rpCIkdFHCh0M7OIyw3GZ3966YWY,13442
8
8
  modaic/observability.py,sha256=LgR4gJM4DhD-xlVX52mzRQSPgLQzbeh2LYPmQVqSh-A,9947
9
- modaic/precompiled.py,sha256=_FwosgvItwnliVOc6nUp15RaBinmo1tTYhR9haun020,14864
9
+ modaic/precompiled.py,sha256=7__DgYaVXFE7xZR6_jtRAVxUw7v0E59zcPWlGfs7raU,18303
10
10
  modaic/query_language.py,sha256=BJIigR0HLapiIn9fF7jM7PkLM8OWUDjwYuxmzcCVvyo,9487
11
11
  modaic/types.py,sha256=gcx8J4oxrHwxA7McyYV4OKHsuPhhmowJtJIgjJQbLto,10081
12
12
  modaic/utils.py,sha256=doJs-XL4TswSQFBINZeKrik-cvjZk-tS9XmWH8fOYiw,794
13
- modaic/agents/rag_agent.py,sha256=f8s3EILOPUxMpOKDoAvk-cfLE8S9kFNvkEcAC5z2EmQ,798
13
+ modaic/agents/rag_agent.py,sha256=h17YQHXGRPMFn_liUDRNFYc2m0U5KXZ16Q0UELCqmEE,844
14
14
  modaic/agents/registry.py,sha256=z6GuPxGrq2dinCamiMJ_HVPsD9Tp9XWDUSMZ-uhWPrU,2446
15
15
  modaic/context/__init__.py,sha256=FK-bxSu36yGFF1rATy4Yzl4Fpv9kYOlRpBRfr_4moiM,560
16
16
  modaic/context/base.py,sha256=x66_lcdQ063DiluC6UnFEH4etgJkbAyukrgrp2KLV5U,40368
@@ -19,7 +19,7 @@ modaic/context/table.py,sha256=9Lh2_UyK3OsWmgUfZQa8jDeVAPAk_iueT89_cruDeSo,17699
19
19
  modaic/context/text.py,sha256=gCVQx15FrPcmpr2GYkJca6noh7fw_nTcCt-hISwcnvQ,2581
20
20
  modaic/databases/__init__.py,sha256=-w_yiY-Sqi1SgcPD5oAQL7MU4VXTihPa1GYGlrHfsFw,784
21
21
  modaic/databases/graph_database.py,sha256=j44PgWGMeD3dtPZe5sRpKnruTA3snm_TiXncusTzqIQ,9990
22
- modaic/databases/sql_database.py,sha256=wqy7AqsalhmYsbNPy0FCAg1FrUKN6Bd8ytwyJireC94,12057
22
+ modaic/databases/sql_database.py,sha256=62h9GtZv8gEAo1qHOQsxPL9bJPwweNmWLcRjYcpFIMY,13037
23
23
  modaic/databases/vector_database/__init__.py,sha256=sN1SuSAMC9NHJDOa80BN_olccaHgmiW2Ek57hBvdZWo,306
24
24
  modaic/databases/vector_database/vector_database.py,sha256=RsuRemgFV06opY26CekqLLRoAEFYOGl_CMuFETrYS0c,25238
25
25
  modaic/databases/vector_database/benchmarks/baseline.py,sha256=ZhiYzHnizsLesAIQA93RhWaaYxEZp6ygExmnBhU9Dio,5209
@@ -33,8 +33,8 @@ modaic/databases/vector_database/vendors/qdrant.py,sha256=AbpHGcgLb-kRsJGnwFEktk
33
33
  modaic/storage/__init__.py,sha256=Zs-Y_9jfYUE8XVp8z-El0ZXFM_ZVMqM9aQ6fgGPZsf8,131
34
34
  modaic/storage/file_store.py,sha256=kSS7gTP_-16wR3Xgq3frF1BZ8Dw8N--kG4V9rrCXPcc,7315
35
35
  modaic/storage/pickle_store.py,sha256=fu9jkmmKNE852Y4R1NhOFePLfd2gskhHSXxuq1G1S3I,778
36
- modaic-0.3.0.dist-info/licenses/LICENSE,sha256=7LMx9j453Vz1DoQbFot8Uhp9SExF5wlOx7c8vw2qhsE,1333
37
- modaic-0.3.0.dist-info/METADATA,sha256=Cw0LkkLDfVGdxif1ZSx9HI9q9c5HSIuzDI-VVWRFRO4,8651
38
- modaic-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
- modaic-0.3.0.dist-info/top_level.txt,sha256=RXWGuF-TsW8-17DveTJMPRiAgg_Rf2mq5F3R7tNu6t8,7
40
- modaic-0.3.0.dist-info/RECORD,,
36
+ modaic-0.4.0.dist-info/licenses/LICENSE,sha256=7LMx9j453Vz1DoQbFot8Uhp9SExF5wlOx7c8vw2qhsE,1333
37
+ modaic-0.4.0.dist-info/METADATA,sha256=g7EbTl48wysK-lb6XnfBspOb85iH3oQXmllD-Tnplls,8651
38
+ modaic-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ modaic-0.4.0.dist-info/top_level.txt,sha256=RXWGuF-TsW8-17DveTJMPRiAgg_Rf2mq5F3R7tNu6t8,7
40
+ modaic-0.4.0.dist-info/RECORD,,
File without changes