modaic 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modaic might be problematic. Click here for more details.
- modaic/__init__.py +25 -0
- modaic/agents/rag_agent.py +33 -0
- modaic/agents/registry.py +84 -0
- modaic/auto_agent.py +228 -0
- modaic/context/__init__.py +34 -0
- modaic/context/base.py +1064 -0
- modaic/context/dtype_mapping.py +25 -0
- modaic/context/table.py +585 -0
- modaic/context/text.py +94 -0
- modaic/databases/__init__.py +35 -0
- modaic/databases/graph_database.py +269 -0
- modaic/databases/sql_database.py +355 -0
- modaic/databases/vector_database/__init__.py +12 -0
- modaic/databases/vector_database/benchmarks/baseline.py +123 -0
- modaic/databases/vector_database/benchmarks/common.py +48 -0
- modaic/databases/vector_database/benchmarks/fork.py +132 -0
- modaic/databases/vector_database/benchmarks/threaded.py +119 -0
- modaic/databases/vector_database/vector_database.py +722 -0
- modaic/databases/vector_database/vendors/milvus.py +408 -0
- modaic/databases/vector_database/vendors/mongodb.py +0 -0
- modaic/databases/vector_database/vendors/pinecone.py +0 -0
- modaic/databases/vector_database/vendors/qdrant.py +1 -0
- modaic/exceptions.py +38 -0
- modaic/hub.py +305 -0
- modaic/indexing.py +127 -0
- modaic/module_utils.py +341 -0
- modaic/observability.py +275 -0
- modaic/precompiled.py +429 -0
- modaic/query_language.py +321 -0
- modaic/storage/__init__.py +3 -0
- modaic/storage/file_store.py +239 -0
- modaic/storage/pickle_store.py +25 -0
- modaic/types.py +287 -0
- modaic/utils.py +21 -0
- modaic-0.1.0.dist-info/METADATA +281 -0
- modaic-0.1.0.dist-info/RECORD +39 -0
- modaic-0.1.0.dist-info/WHEEL +5 -0
- modaic-0.1.0.dist-info/licenses/LICENSE +31 -0
- modaic-0.1.0.dist-info/top_level.txt +1 -0
modaic/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .auto_agent import AutoAgent, AutoConfig, AutoRetriever
|
|
2
|
+
from .indexing import Embedder
|
|
3
|
+
from .observability import Trackable, configure, track, track_modaic_obj
|
|
4
|
+
from .precompiled import Indexer, PrecompiledAgent, PrecompiledConfig, Retriever
|
|
5
|
+
from .query_language import AND, OR, Condition, Prop, Value, parse_modaic_filter
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AutoAgent",
|
|
9
|
+
"AutoConfig",
|
|
10
|
+
"AutoRetriever",
|
|
11
|
+
"Retriever",
|
|
12
|
+
"Indexer",
|
|
13
|
+
"PrecompiledAgent",
|
|
14
|
+
"PrecompiledConfig",
|
|
15
|
+
"Embedder",
|
|
16
|
+
"configure",
|
|
17
|
+
"track",
|
|
18
|
+
"Trackable",
|
|
19
|
+
"track_modaic_obj",
|
|
20
|
+
"AND",
|
|
21
|
+
"OR",
|
|
22
|
+
"Prop",
|
|
23
|
+
"Value",
|
|
24
|
+
"parse_modaic_filter",
|
|
25
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from modaic import Indexer, PrecompiledAgent, PrecompiledConfig
|
|
4
|
+
from modaic.context import Context
|
|
5
|
+
|
|
6
|
+
from .registry import builtin_agent, builtin_config, builtin_indexer
|
|
7
|
+
|
|
8
|
+
agent_name = "basic-rag"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@builtin_config(agent_name)
|
|
12
|
+
class RAGAgentConfig(PrecompiledConfig):
|
|
13
|
+
def __init__(self):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
def forward(self, query: str) -> str:
|
|
17
|
+
return "hello"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@builtin_indexer(agent_name)
|
|
21
|
+
class RAGIndexer(Indexer):
|
|
22
|
+
def ingest(self, config: RAGAgentConfig, contexts: List[Context]):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@builtin_agent(agent_name)
|
|
27
|
+
class RAGAgent(PrecompiledAgent):
|
|
28
|
+
def __init__(self, config: RAGAgentConfig, indexer: RAGIndexer):
|
|
29
|
+
super().__init__(config)
|
|
30
|
+
self.indexer = indexer
|
|
31
|
+
|
|
32
|
+
def forward(self, query: str) -> str:
|
|
33
|
+
return "hello"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# registry.py
|
|
2
|
+
from importlib import import_module
|
|
3
|
+
from typing import Callable, Dict, NamedTuple, Tuple, Type
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Key(NamedTuple):
|
|
7
|
+
name: str
|
|
8
|
+
type: str # yes, attribute name 'type' is fine here
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Registry:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self._paths: Dict[Key, Tuple[str, str]] = {} # Key -> (module_path, qualname)
|
|
14
|
+
self._cache: Dict[Key, Type] = {}
|
|
15
|
+
self._frozen: bool = False
|
|
16
|
+
|
|
17
|
+
def register(self, key: Key, cls: Type) -> None:
|
|
18
|
+
if self._frozen:
|
|
19
|
+
raise RuntimeError("Registry is frozen; no further registrations allowed.")
|
|
20
|
+
if key in self._paths:
|
|
21
|
+
mod, qual = self._paths[key]
|
|
22
|
+
raise KeyError(f"Collision for {key}: already registered to {mod}:{qual}")
|
|
23
|
+
if not isinstance(cls, type):
|
|
24
|
+
raise TypeError("register() expects a class as the second argument.")
|
|
25
|
+
|
|
26
|
+
module_path = cls.__module__
|
|
27
|
+
qualname = cls.__qualname__ # supports nested classes
|
|
28
|
+
self._paths[key] = (module_path, qualname)
|
|
29
|
+
self._cache.pop(key, None) # just in case
|
|
30
|
+
|
|
31
|
+
def freeze(self) -> None:
|
|
32
|
+
self._frozen = True
|
|
33
|
+
|
|
34
|
+
def get(self, key: Key) -> Type:
|
|
35
|
+
# Fast path
|
|
36
|
+
if key in self._cache:
|
|
37
|
+
return self._cache[key]
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
module_path, qualname = self._paths[key]
|
|
41
|
+
except KeyError:
|
|
42
|
+
raise KeyError(f"Unknown key {key}. Was it registered before freeze()?") from None
|
|
43
|
+
|
|
44
|
+
mod = import_module(module_path)
|
|
45
|
+
obj = mod
|
|
46
|
+
for part in qualname.split("."): # walk nested qualnames safely
|
|
47
|
+
obj = getattr(obj, part)
|
|
48
|
+
|
|
49
|
+
if not isinstance(obj, type):
|
|
50
|
+
raise TypeError(f"Resolved {module_path}:{qualname} is not a class.")
|
|
51
|
+
|
|
52
|
+
self._cache[key] = obj
|
|
53
|
+
return obj
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Instantiate per “kind”
|
|
57
|
+
AgentRegistry = Registry()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def builtin_agent(name: str) -> Callable[[Type], Type]:
|
|
61
|
+
def _wrap(cls: Type) -> Type:
|
|
62
|
+
key = Key(name, "agent")
|
|
63
|
+
AgentRegistry.register(key, cls)
|
|
64
|
+
return cls
|
|
65
|
+
|
|
66
|
+
return _wrap
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def builtin_indexer(name: str) -> Callable[[Type], Type]:
|
|
70
|
+
def _wrap(cls: Type) -> Type:
|
|
71
|
+
key = Key(name, "indexer")
|
|
72
|
+
AgentRegistry.register(key, cls)
|
|
73
|
+
return cls
|
|
74
|
+
|
|
75
|
+
return _wrap
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def builtin_config(name: str) -> Callable[[Type], Type]:
|
|
79
|
+
def _wrap(cls: Type) -> Type:
|
|
80
|
+
key = Key(name, "config")
|
|
81
|
+
AgentRegistry.register(key, cls)
|
|
82
|
+
return cls
|
|
83
|
+
|
|
84
|
+
return _wrap
|
modaic/auto_agent.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Literal, Optional, Type
|
|
8
|
+
|
|
9
|
+
from .hub import load_repo
|
|
10
|
+
from .precompiled import PrecompiledAgent, PrecompiledConfig, Retriever, is_local_path
|
|
11
|
+
|
|
12
|
+
MODAIC_TOKEN = os.getenv("MODAIC_TOKEN")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
_REGISTRY = {} # maps model_type string -> (ConfigCls, ModelCls)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register(model_type: str, config_cls: Type[PrecompiledConfig], model_cls: Type[PrecompiledAgent]):
|
|
19
|
+
_REGISTRY[model_type] = (config_cls, model_cls)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@lru_cache
|
|
23
|
+
def _load_dynamic_class(
|
|
24
|
+
repo_dir: str, class_path: str, parent_module: Optional[str] = None
|
|
25
|
+
) -> Type[PrecompiledConfig | PrecompiledAgent | Retriever]:
|
|
26
|
+
"""
|
|
27
|
+
Load a class from a given repository directory and fully qualified class path.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
repo_dir: Absolute path to a local repository directory containing the code.
|
|
31
|
+
class_path: Dotted path to the target class (e.g., "pkg.module.Class").
|
|
32
|
+
parent_module: Optional dotted module prefix (e.g., "swagginty.TableRAG"). If provided,
|
|
33
|
+
class_path is treated as relative to this module and only the agents cache
|
|
34
|
+
root is added to sys.path.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The resolved class object.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
repo_path = Path(repo_dir)
|
|
41
|
+
|
|
42
|
+
repo_dir_str = str(repo_path)
|
|
43
|
+
print(f"repo_dir_str: {repo_dir_str}")
|
|
44
|
+
print(f"sys.path: {sys.path}")
|
|
45
|
+
if repo_dir_str not in sys.path:
|
|
46
|
+
# print(f"Inserting {repo_dir_str} into sys.path")
|
|
47
|
+
sys.path.insert(0, repo_dir_str)
|
|
48
|
+
full_path = (
|
|
49
|
+
f"{parent_module}.{class_path}"
|
|
50
|
+
if parent_module and not class_path.startswith(parent_module + ".")
|
|
51
|
+
else class_path
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
module_name, _, attr = full_path.rpartition(".")
|
|
55
|
+
module = importlib.import_module(module_name)
|
|
56
|
+
return getattr(module, attr)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AutoConfig:
|
|
60
|
+
"""
|
|
61
|
+
Config loader for precompiled agents and retrievers.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def from_precompiled(repo_path: str, *, parent_module: Optional[str] = None, **kwargs) -> PrecompiledConfig:
|
|
66
|
+
"""
|
|
67
|
+
Load a config for an agent or retriever from a precompiled repo.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
repo_path: Hub path ("user/repo") or a local directory.
|
|
71
|
+
parent_module: Optional dotted module prefix (e.g., "swagginty.TableRAG") to use to import classes from repo_path. If provided, overides default parent_module behavior.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
A config object constructed via the resolved config class.
|
|
75
|
+
"""
|
|
76
|
+
local = is_local_path(repo_path)
|
|
77
|
+
repo_dir = load_repo(repo_path, local)
|
|
78
|
+
|
|
79
|
+
cfg_path = repo_dir / "config.json"
|
|
80
|
+
if not cfg_path.exists():
|
|
81
|
+
raise FileNotFoundError(f"Failed to load AutoConfig, config.json not found in {repo_path}")
|
|
82
|
+
with open(cfg_path, "r") as fp:
|
|
83
|
+
cfg = json.load(fp)
|
|
84
|
+
|
|
85
|
+
ConfigClass = _load_auto_class(repo_path, repo_dir, "AutoConfig", parent_module=parent_module) # noqa: N806
|
|
86
|
+
return ConfigClass(**{**cfg, **kwargs})
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AutoAgent:
|
|
90
|
+
"""
|
|
91
|
+
Dynamic loader for precompiled agents hosted on a hub or local path.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def from_precompiled(
|
|
96
|
+
repo_path: str,
|
|
97
|
+
*,
|
|
98
|
+
config_options: Optional[dict] = None,
|
|
99
|
+
parent_module: Optional[str] = None,
|
|
100
|
+
project: Optional[str] = None,
|
|
101
|
+
**kw,
|
|
102
|
+
) -> PrecompiledAgent:
|
|
103
|
+
"""
|
|
104
|
+
Load a compiled agent from the given identifier.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
repo_path: Hub path ("user/repo") or local directory.
|
|
108
|
+
parent_module: Optional dotted module prefix (e.g., "swagginty.TableRAG") to use to import classes from repo_path. If provided, overides default parent_module behavior.
|
|
109
|
+
project: Optional project name. If not provided and repo_path is a hub path, defaults to the repo name.
|
|
110
|
+
**kw: Additional keyword arguments forwarded to the Agent constructor.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
An instantiated Agent subclass.
|
|
114
|
+
"""
|
|
115
|
+
local = is_local_path(repo_path)
|
|
116
|
+
repo_dir = load_repo(repo_path, local)
|
|
117
|
+
|
|
118
|
+
if config_options is None:
|
|
119
|
+
config_options = {}
|
|
120
|
+
|
|
121
|
+
cfg = AutoConfig.from_precompiled(repo_dir, local=True, parent_module=parent_module, **config_options)
|
|
122
|
+
AgentClass = _load_auto_class(repo_path, repo_dir, "AutoAgent", parent_module=parent_module) # noqa: N806
|
|
123
|
+
|
|
124
|
+
# automatically configure repo and project from repo_path if not provided
|
|
125
|
+
if not local and "/" in repo_path and not repo_path.startswith("/"):
|
|
126
|
+
parts = repo_path.split("/")
|
|
127
|
+
if len(parts) >= 2:
|
|
128
|
+
kw.setdefault("repo", repo_path)
|
|
129
|
+
# Use explicit project parameter if provided, otherwise default to repo name
|
|
130
|
+
if project is not None:
|
|
131
|
+
kw.setdefault("project", f"{repo_path}-{project}")
|
|
132
|
+
else:
|
|
133
|
+
kw.setdefault("project", repo_path)
|
|
134
|
+
kw.setdefault("trace", True)
|
|
135
|
+
|
|
136
|
+
return AgentClass(config=cfg, **kw)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class AutoRetriever:
|
|
140
|
+
"""
|
|
141
|
+
Dynamic loader for precompiled retrievers hosted on a hub or local path.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def from_precompiled(
|
|
146
|
+
repo_path: str,
|
|
147
|
+
*,
|
|
148
|
+
config_options: Optional[dict] = None,
|
|
149
|
+
parent_module: Optional[str] = None,
|
|
150
|
+
project: Optional[str] = None,
|
|
151
|
+
**kw,
|
|
152
|
+
) -> Retriever:
|
|
153
|
+
"""
|
|
154
|
+
Load a compiled retriever from the given identifier.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
repo_path: hub path ("user/repo"), or local directory.
|
|
158
|
+
parent_module: Optional dotted module prefix (e.g., "swagginty.TableRAG") to use to import classes from repo_path. If provided, overides default parent_module behavior.
|
|
159
|
+
project: Optional project name. If not provided and repo_path is a hub path, defaults to the repo name.
|
|
160
|
+
**kw: Additional keyword arguments forwarded to the Retriever constructor.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
An instantiated Retriever subclass.
|
|
164
|
+
"""
|
|
165
|
+
local = is_local_path(repo_path)
|
|
166
|
+
repo_dir = load_repo(repo_path, local)
|
|
167
|
+
|
|
168
|
+
if config_options is None:
|
|
169
|
+
config_options = {}
|
|
170
|
+
|
|
171
|
+
cfg = AutoConfig.from_precompiled(repo_dir, local=True, parent_module=parent_module, **config_options)
|
|
172
|
+
RetrieverClass = _load_auto_class(repo_path, repo_dir, "AutoRetriever", parent_module=parent_module) # noqa: N806
|
|
173
|
+
|
|
174
|
+
# automatically configure repo and project from repo_path if not provided
|
|
175
|
+
if not local and "/" in repo_path and not repo_path.startswith("/"):
|
|
176
|
+
parts = repo_path.split("/")
|
|
177
|
+
if len(parts) >= 2:
|
|
178
|
+
kw.setdefault("repo", repo_path)
|
|
179
|
+
if project is not None:
|
|
180
|
+
kw.setdefault("project", f"{repo_path}-{project}")
|
|
181
|
+
else:
|
|
182
|
+
kw.setdefault("project", repo_path)
|
|
183
|
+
kw.setdefault("trace", True)
|
|
184
|
+
|
|
185
|
+
return RetrieverClass(config=cfg, **kw)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _load_auto_class(
|
|
189
|
+
repo_path: str,
|
|
190
|
+
repo_dir: Path,
|
|
191
|
+
auto_name: Literal["AutoConfig", "AutoAgent", "AutoRetriever"],
|
|
192
|
+
parent_module: Optional[str] = None,
|
|
193
|
+
) -> Type[PrecompiledConfig | PrecompiledAgent | Retriever]:
|
|
194
|
+
"""
|
|
195
|
+
Load a class from the auto_classes.json file.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
repo_path: The path to the repo. (local or hub path)
|
|
199
|
+
repo_dir: The path to the repo directory. the loaded local repository directory.
|
|
200
|
+
auto_name: The name of the auto class to load. (AutoConfig, AutoAgent, AutoRetriever)
|
|
201
|
+
parent_module: The parent module to use to import the class.
|
|
202
|
+
"""
|
|
203
|
+
# determine if the repo was loaded from local or hub
|
|
204
|
+
local = is_local_path(repo_path)
|
|
205
|
+
auto_classes_path = repo_dir / "auto_classes.json"
|
|
206
|
+
|
|
207
|
+
if not auto_classes_path.exists():
|
|
208
|
+
raise FileNotFoundError(
|
|
209
|
+
f"Failed to load {auto_name}, auto_classes.json not found in {repo_path}, if this is your repo, make sure you push_to_hub() with `with_code=True`"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
with open(auto_classes_path, "r") as fp:
|
|
213
|
+
auto_classes = json.load(fp)
|
|
214
|
+
|
|
215
|
+
if not (auto_class_path := auto_classes.get(auto_name)):
|
|
216
|
+
raise KeyError(
|
|
217
|
+
f"{auto_name} not found in {repo_path}/auto_classes.json. Please check that the auto_classes.json file is correct."
|
|
218
|
+
) from None
|
|
219
|
+
|
|
220
|
+
if auto_class_path in _REGISTRY:
|
|
221
|
+
_, LoadedClass = _REGISTRY[auto_class_path] # noqa: N806
|
|
222
|
+
else:
|
|
223
|
+
if parent_module is None and not local:
|
|
224
|
+
parent_module = str(repo_path).replace("/", ".")
|
|
225
|
+
|
|
226
|
+
repo_dir = repo_dir.parent.parent if not local else repo_dir
|
|
227
|
+
LoadedClass = _load_dynamic_class(repo_dir, auto_class_path, parent_module=parent_module) # noqa: N806
|
|
228
|
+
return LoadedClass
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from .base import (
|
|
2
|
+
Context,
|
|
3
|
+
HydratedAttr,
|
|
4
|
+
Relation,
|
|
5
|
+
requires_hydration,
|
|
6
|
+
)
|
|
7
|
+
from .table import (
|
|
8
|
+
BaseTabbedTable,
|
|
9
|
+
BaseTable,
|
|
10
|
+
TabbedTable,
|
|
11
|
+
TabbedTableFile,
|
|
12
|
+
Table,
|
|
13
|
+
TableFile,
|
|
14
|
+
)
|
|
15
|
+
from .text import Text
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"MultiTabbedTable",
|
|
19
|
+
"Context",
|
|
20
|
+
"Atomic",
|
|
21
|
+
"Molecular",
|
|
22
|
+
"Text",
|
|
23
|
+
"Relation",
|
|
24
|
+
"BaseTable",
|
|
25
|
+
"Table",
|
|
26
|
+
"TabbedTable",
|
|
27
|
+
"BaseTabbedTable",
|
|
28
|
+
"TableFile",
|
|
29
|
+
"TabbedTableFile",
|
|
30
|
+
"Filter",
|
|
31
|
+
"Prop",
|
|
32
|
+
"HydratedAttr",
|
|
33
|
+
"requires_hydration",
|
|
34
|
+
]
|