modaic 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of modaic might be problematic. Click here for more details.

Files changed (39) hide show
  1. modaic/__init__.py +25 -0
  2. modaic/agents/rag_agent.py +33 -0
  3. modaic/agents/registry.py +84 -0
  4. modaic/auto_agent.py +228 -0
  5. modaic/context/__init__.py +34 -0
  6. modaic/context/base.py +1064 -0
  7. modaic/context/dtype_mapping.py +25 -0
  8. modaic/context/table.py +585 -0
  9. modaic/context/text.py +94 -0
  10. modaic/databases/__init__.py +35 -0
  11. modaic/databases/graph_database.py +269 -0
  12. modaic/databases/sql_database.py +355 -0
  13. modaic/databases/vector_database/__init__.py +12 -0
  14. modaic/databases/vector_database/benchmarks/baseline.py +123 -0
  15. modaic/databases/vector_database/benchmarks/common.py +48 -0
  16. modaic/databases/vector_database/benchmarks/fork.py +132 -0
  17. modaic/databases/vector_database/benchmarks/threaded.py +119 -0
  18. modaic/databases/vector_database/vector_database.py +722 -0
  19. modaic/databases/vector_database/vendors/milvus.py +408 -0
  20. modaic/databases/vector_database/vendors/mongodb.py +0 -0
  21. modaic/databases/vector_database/vendors/pinecone.py +0 -0
  22. modaic/databases/vector_database/vendors/qdrant.py +1 -0
  23. modaic/exceptions.py +38 -0
  24. modaic/hub.py +305 -0
  25. modaic/indexing.py +127 -0
  26. modaic/module_utils.py +341 -0
  27. modaic/observability.py +275 -0
  28. modaic/precompiled.py +429 -0
  29. modaic/query_language.py +321 -0
  30. modaic/storage/__init__.py +3 -0
  31. modaic/storage/file_store.py +239 -0
  32. modaic/storage/pickle_store.py +25 -0
  33. modaic/types.py +287 -0
  34. modaic/utils.py +21 -0
  35. modaic-0.1.0.dist-info/METADATA +281 -0
  36. modaic-0.1.0.dist-info/RECORD +39 -0
  37. modaic-0.1.0.dist-info/WHEEL +5 -0
  38. modaic-0.1.0.dist-info/licenses/LICENSE +31 -0
  39. modaic-0.1.0.dist-info/top_level.txt +1 -0
modaic/precompiled.py ADDED
@@ -0,0 +1,429 @@
1
+ import inspect
2
+ import json
3
+ import os
4
+ import pathlib
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, ClassVar, Dict, Generic, List, Optional, Type, TypeVar, Union
8
+
9
+ import dspy
10
+ from pydantic import BaseModel
11
+
12
+ from modaic.module_utils import create_agent_repo
13
+ from modaic.observability import Trackable, track_modaic_obj
14
+
15
+ from .hub import load_repo, push_folder_to_hub
16
+
17
+ if TYPE_CHECKING:
18
+ from modaic.context.base import Context
19
+
20
+ C = TypeVar("C", bound="PrecompiledConfig")
21
+ A = TypeVar("A", bound="PrecompiledAgent")
22
+ R = TypeVar("R", bound="Retriever")
23
+
24
+
25
+ class PrecompiledConfig(BaseModel):
26
+ def save_precompiled(
27
+ self,
28
+ path: str | Path,
29
+ _extra_auto_classes: Optional[Dict[str, object]] = None,
30
+ ) -> None:
31
+ """
32
+ Saves the config to a config.json file in the given local folder.
33
+ Also saves the auto_classes.json with AutoConfig and any other auto classes passed to _extra_auto_classes
34
+
35
+ Args:
36
+ path: The local folder to save the config to.
37
+ _extra_auto_classes: An argument used internally to add extra auto classes to agent repo
38
+ """
39
+ path = pathlib.Path(path)
40
+ path.mkdir(parents=True, exist_ok=True)
41
+
42
+ with open(path / "config.json", "w") as f:
43
+ json.dump(self.to_dict(), f, indent=2)
44
+
45
+ # NOTE: since we don't allow PrecompiledConfig.push_to_hub(), when _extra_auto_classes is None we will assume that we don't need to save the auto_classes.json
46
+ if _extra_auto_classes is None:
47
+ return
48
+
49
+ auto_classes = {"AutoConfig": self}
50
+ if _extra_auto_classes is not None:
51
+ auto_classes.update(_extra_auto_classes)
52
+
53
+ auto_classes_paths = {k: _module_path(cls) for k, cls in auto_classes.items()}
54
+
55
+ with open(path / "auto_classes.json", "w") as f:
56
+ json.dump(auto_classes_paths, f, indent=2)
57
+
58
+ @classmethod
59
+ def from_precompiled(cls: Type[C], path: str | Path, **kwargs) -> C:
60
+ """
61
+ Loads the config from a config.json file in the given path. The path can be a local directory or a repo on Modaic Hub.
62
+
63
+ Args:
64
+ path: The path to load the config from. Can be a local directory or a repo on Modaic Hub.
65
+ **kwargs: Additional keyword arguments used to override the default config.
66
+
67
+ Returns:
68
+ An instance of the PrecompiledConfig class.
69
+ """
70
+ local = is_local_path(path)
71
+ local_dir = load_repo(path, local)
72
+ # TODO load repos from the hub if not local
73
+ path = local_dir / "config.json"
74
+ with open(path, "r") as f:
75
+ config_dict = json.load(f)
76
+ return cls(**{**config_dict, **kwargs})
77
+
78
+ @classmethod
79
+ def from_dict(cls: Type[C], dict: Dict, **kwargs) -> C:
80
+ """
81
+ Loads the config from a dictionary.
82
+
83
+ Args:
84
+ dict: A dictionary containing the config.
85
+ **kwargs: Additional keyword arguments used to override the default config.
86
+
87
+ Returns:
88
+ An instance of the PrecompiledConfig class.
89
+ """
90
+ instance = cls(**{**dict, **kwargs})
91
+ return instance
92
+
93
+ @classmethod
94
+ def from_json(cls: Type[C], path: str, **kwargs) -> C:
95
+ """
96
+ Loads the config from a json file.
97
+
98
+ Args:
99
+ path: The path to load the config from.
100
+ **kwargs: Additional keyword arguments used to override the default config.
101
+
102
+ Returns:
103
+ An instance of the PrecompiledConfig class.
104
+ """
105
+ with open(path, "r") as f:
106
+ config_dict = json.load(f)
107
+ return cls.from_dict(**{**config_dict, **kwargs})
108
+
109
+ def to_dict(self) -> Dict:
110
+ """
111
+ Converts the config to a dictionary.
112
+ """
113
+ return self.model_dump()
114
+
115
+ def to_json(self) -> str:
116
+ """
117
+ Converts the config to a json string.
118
+ """
119
+ return self.model_dump_json()
120
+
121
+
122
+ class PrecompiledAgent(dspy.Module):
123
+ """
124
+ Bases: `dspy.Module`
125
+
126
+ PrecompiledAgent supports observability tracking through DSPy callbacks.
127
+ """
128
+
129
+ config: PrecompiledConfig
130
+ retriever: "Retriever"
131
+
132
+ def __init__(
133
+ self,
134
+ config: PrecompiledConfig,
135
+ *,
136
+ retriever: Optional["Retriever"] = None,
137
+ repo: Optional[str] = None,
138
+ project: Optional[str] = None,
139
+ trace: bool = False,
140
+ ):
141
+ # create DSPy callback for observability if tracing is enabled
142
+ callbacks = []
143
+ if trace and (repo or project):
144
+ try:
145
+ from opik.integrations.dspy.callback import OpikCallback
146
+
147
+ # create project name from repo and project
148
+ if repo and project:
149
+ project_name = f"{repo}-{project}"
150
+ elif repo and not project:
151
+ project_name = repo
152
+ else:
153
+ raise ValueError("Must provide either repo to enable tracing")
154
+
155
+ opik_callback = OpikCallback(project_name=project_name, log_graph=True)
156
+ callbacks.append(opik_callback)
157
+ except ImportError:
158
+ # opikcallback not available, continue without tracking
159
+ pass
160
+
161
+ # initialize DSPy Module with callbacks
162
+ super().__init__()
163
+ if callbacks:
164
+ # set callbacks using DSPy's configuration
165
+ import dspy
166
+
167
+ current_settings = dspy.settings
168
+ existing_callbacks = getattr(current_settings, "callbacks", [])
169
+ dspy.settings.configure(callbacks=existing_callbacks + callbacks)
170
+
171
+ self.config = config
172
+ self.retriever = retriever
173
+
174
+ # update retriever repo and project if provided
175
+ if self.retriever and hasattr(self.retriever, "set_repo_project"):
176
+ self.retriever.set_repo_project(repo=repo, project=project, trace=trace)
177
+
178
+ # TODO: throw a warning if the config of the retriever has different values than the config of the agent
179
+
180
+ def __init_subclass__(cls, **kwargs):
181
+ super().__init_subclass__(**kwargs)
182
+ # Make sure subclasses have an annotated config attribute
183
+ if not (config_class := cls.__annotations__.get("config")) or config_class is PrecompiledConfig:
184
+ raise ValueError(
185
+ f"""config class could not be found in {cls.__name__}. \n
186
+ Hint: Please add an annotation for config to your subclass.
187
+ Example:
188
+ class {cls.__name__}(PrecompiledAgent):
189
+ config: YourConfigClass
190
+ def __init__(self, config: YourConfigClass, **kwargs):
191
+ super().__init__(config, **kwargs)
192
+ ...
193
+ """
194
+ )
195
+
196
+ def forward(self, **kwargs) -> str:
197
+ """
198
+ Forward pass for the agent.
199
+
200
+ Args:
201
+ **kwargs: Additional keyword arguments.
202
+
203
+ Returns:
204
+ Forward pass result.
205
+ """
206
+ raise NotImplementedError(
207
+ "Forward pass for PrecompiledAgent is not implemented. You must implement a forward method in your subclass."
208
+ )
209
+
210
+ def save_precompiled(self, path: str, _with_auto_classes: bool = False) -> None:
211
+ """
212
+ Saves the agent.json and the config.json to the given local folder.
213
+
214
+ Args:
215
+ path: The local folder to save the agent and config to. Must be a local path.
216
+ _with_auto_classes: Internally used argument used to configure whether to save the auto classes mapping.
217
+ """
218
+ path = pathlib.Path(path)
219
+ extra_auto_classes = None
220
+ if _with_auto_classes:
221
+ extra_auto_classes = {"AutoAgent": self}
222
+ if self.retriever is not None:
223
+ extra_auto_classes["AutoRetriever"] = self.retriever
224
+ self.config.save_precompiled(path, extra_auto_classes)
225
+ self.save(path / "agent.json")
226
+
227
+ @classmethod
228
+ def from_precompiled(cls: Type[A], path: str | Path, config_options: Optional[dict] = None, **kwargs) -> A:
229
+ """
230
+ Loads the agent and the config from the given path.
231
+
232
+ Args:
233
+ path: The path to load the agent and config from. Can be a local path or a path on Modaic Hub.
234
+ config_options: A dictionary containg key-value pairs used to override the default config.
235
+ **kwargs: Additional keyword arguments forwarded to the PrecompiledAgent's constructor.
236
+
237
+ Returns:
238
+ An instance of the PrecompiledAgent class.
239
+ """
240
+
241
+ if cls is PrecompiledAgent:
242
+ raise ValueError("from_precompiled() can only be used on a subclass of PrecompiledAgent.")
243
+
244
+ ConfigClass: Type[PrecompiledConfig] = cls.__annotations__["config"] # noqa: N806
245
+ local = is_local_path(path)
246
+ local_dir = load_repo(path, local)
247
+ config_options = config_options or {}
248
+ config = ConfigClass.from_precompiled(local_dir, **config_options)
249
+ agent = cls(config, **kwargs)
250
+ agent_state_path = local_dir / "agent.json"
251
+ if agent_state_path.exists():
252
+ agent.load(agent_state_path)
253
+ return agent
254
+
255
+ def push_to_hub(
256
+ self,
257
+ repo_path: str,
258
+ access_token: Optional[str] = None,
259
+ commit_message: str = "(no commit message)",
260
+ with_code: bool = False,
261
+ ) -> None:
262
+ """
263
+ Pushes the agent and the config to the given repo_path.
264
+
265
+ Args:
266
+ repo_path: The path on Modaic hub to save the agent and config to.
267
+ access_token: Your Modaic access token.
268
+ commit_message: The commit message to use when pushing to the hub.
269
+ with_code: Whether to save the code along with the agent.json and config.json.
270
+ """
271
+ _push_to_hub(
272
+ self, repo_path=repo_path, access_token=access_token, commit_message=commit_message, with_code=with_code
273
+ )
274
+
275
+
276
+ class Retriever(ABC, Trackable):
277
+ config: PrecompiledConfig
278
+
279
+ def __init__(self, config: PrecompiledConfig, **kwargs):
280
+ ABC.__init__(self)
281
+ Trackable.__init__(self, **kwargs)
282
+ self.config = config
283
+
284
+ def __init_subclass__(cls, **kwargs):
285
+ super().__init_subclass__(**kwargs)
286
+ # Make sure subclasses have an annotated config attribute
287
+ # Unimplemented abstract classes get a pass (like Indexer for example)
288
+ if inspect.isabstract(cls):
289
+ return
290
+ if not (config_class := cls.__annotations__.get("config")) or config_class is PrecompiledConfig:
291
+ raise ValueError(
292
+ f"""config class could not be found in {cls.__name__}. \n
293
+ Hint: Please add an annotation for config to your subclass.
294
+ Example:
295
+ class {cls.__name__}({cls.__bases__[0].__name__}):
296
+ config: YourConfigClass
297
+ def __init__(self, config: YourConfigClass, **kwargs):
298
+ super().__init__(config, **kwargs)
299
+ ...
300
+ """
301
+ )
302
+
303
+ @track_modaic_obj
304
+ @abstractmethod
305
+ def retrieve(self, query: str, **kwargs):
306
+ pass
307
+
308
+ @classmethod
309
+ def from_precompiled(cls: Type[R], path: str | Path, config_options: Optional[dict] = None, **kwargs) -> R:
310
+ """
311
+ Loads the retriever and the config from the given path.
312
+ """
313
+ if cls is PrecompiledAgent:
314
+ raise ValueError("from_precompiled() can only be used on a subclass of PrecompiledAgent.")
315
+
316
+ ConfigClass: Type[PrecompiledConfig] = cls.__annotations__["config"] # noqa: N806
317
+ local = is_local_path(path)
318
+ local_dir = load_repo(path, local)
319
+ config_options = config_options or {}
320
+ config = ConfigClass.from_precompiled(local_dir, **config_options)
321
+
322
+ retriever = cls(config, **kwargs)
323
+ return retriever
324
+
325
+ def save_precompiled(self, path: str | Path, _with_auto_classes: bool = False) -> None:
326
+ """
327
+ Saves the retriever configuration to the given path.
328
+
329
+ Args:
330
+ path: The path to save the retriever configuration and auto classes mapping.
331
+ _with_auto_classes: Internal argument used to configure whether to save the auto classes mapping.
332
+ """
333
+ path_obj = pathlib.Path(path)
334
+ extra_auto_classes = None
335
+ if _with_auto_classes:
336
+ extra_auto_classes = {"AutoRetriever": self}
337
+ self.config.save_precompiled(path_obj, extra_auto_classes)
338
+
339
+ def push_to_hub(
340
+ self,
341
+ repo_path: str,
342
+ access_token: Optional[str] = None,
343
+ commit_message: str = "(no commit message)",
344
+ with_code: bool = False,
345
+ ) -> None:
346
+ """
347
+ Pushes the retriever and the config to the given repo_path.
348
+
349
+ Args:
350
+ repo_path: The path on Modaic hub to save the agent and config to.
351
+ access_token: Your Modaic access token.
352
+ commit_message: The commit message to use when pushing to the hub.
353
+ with_code: Whether to save the code along with the retriever.json and config.json.
354
+ """
355
+ _push_to_hub(self, repo_path, access_token, commit_message, with_code)
356
+
357
+
358
+ class Indexer(Retriever):
359
+ config: PrecompiledConfig
360
+
361
+ @abstractmethod
362
+ def ingest(self, contexts: List["Context"], **kwargs):
363
+ pass
364
+
365
+
366
+ def _module_path(instance: object) -> str:
367
+ """
368
+ Return a deterministic module path for the given instance.
369
+
370
+ Args:
371
+ instance: The object instance whose class path should be resolved.
372
+
373
+ Returns:
374
+ str: A fully qualified path in the form "<module>.<ClassName>". If the
375
+ class' module is "__main__", use the file system to derive a stable
376
+ module name: the parent directory name when the file is "__main__.py",
377
+ otherwise the file stem.
378
+ """
379
+
380
+ cls = type(instance)
381
+ module_name = getattr(cls, "__module__", "__main__")
382
+ class_name = getattr(cls, "__name__", "Object")
383
+ if module_name != "__main__":
384
+ return f"{module_name}.{class_name}"
385
+
386
+ # When executed as a script, classes often report __module__ == "__main__".
387
+ # Normalize to a deterministic name based on the defining file path.
388
+ try:
389
+ file_path = pathlib.Path(inspect.getfile(cls)).resolve()
390
+ except Exception:
391
+ # Fallback to a generic name if the file cannot be determined
392
+ normalized_root = "main"
393
+ else:
394
+ if file_path.name == "__main__.py":
395
+ normalized_root = file_path.parent.name or "main"
396
+ else:
397
+ normalized_root = file_path.stem or "main"
398
+ return f"{normalized_root}.{class_name}"
399
+
400
+
401
+ # CAVEAT: PrecompiledConfig does not support push_to_hub() intentionally,
402
+ # this is to avoid confusion when pushing a config to the hub thinking it
403
+ # will update the config.json when in reality it will overwrite the entire
404
+ # directory to an empty one with just the config.json
405
+ def _push_to_hub(
406
+ self: Union[PrecompiledAgent, "Retriever"],
407
+ repo_path: str,
408
+ access_token: Optional[str] = None,
409
+ commit_message: str = "(no commit message)",
410
+ with_code: bool = True,
411
+ ) -> None:
412
+ """
413
+ Pushes the agent or retriever and the config to the given repo_path.
414
+ """
415
+ repo_dir = create_agent_repo(repo_path, with_code=with_code)
416
+ self.save_precompiled(repo_dir, _with_auto_classes=with_code)
417
+ push_folder_to_hub(repo_dir, repo_path=repo_path, access_token=access_token, commit_message=commit_message)
418
+
419
+
420
+ def is_local_path(s: str | Path) -> bool:
421
+ # absolute or relative filesystem path
422
+ s = str(s)
423
+ if os.path.isabs(s) or s.startswith((".", "/", "\\")):
424
+ return True
425
+ parts = s.split("/")
426
+ # hub IDs: "repo" or "user/repo"
427
+ if len(parts) == 1 or (len(parts) == 2 and all(parts)):
428
+ return False
429
+ return True