modaic 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modaic might be problematic. Click here for more details.
- {modaic-0.1.1/src/modaic.egg-info → modaic-0.2.0}/PKG-INFO +7 -4
- {modaic-0.1.1 → modaic-0.2.0}/README.md +4 -2
- {modaic-0.1.1 → modaic-0.2.0}/pyproject.toml +5 -4
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/__init__.py +13 -1
- modaic-0.1.1/src/modaic/auto_agent.py → modaic-0.2.0/src/modaic/auto.py +85 -53
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/context/__init__.py +2 -1
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/context/base.py +1 -1
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/context/table.py +1 -1
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/context/text.py +2 -2
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/graph_database.py +3 -2
- modaic-0.2.0/src/modaic/datasets.py +22 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/hub.py +28 -5
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/indexing.py +1 -1
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/module_utils.py +52 -17
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/observability.py +13 -9
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/precompiled.py +24 -39
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/query_language.py +2 -26
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/types.py +2 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/utils.py +6 -3
- {modaic-0.1.1 → modaic-0.2.0/src/modaic.egg-info}/PKG-INFO +7 -4
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic.egg-info/SOURCES.txt +3 -2
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic.egg-info/requires.txt +2 -1
- modaic-0.1.1/tests/test_auto_agent.py → modaic-0.2.0/tests/test_auto.py +18 -13
- {modaic-0.1.1 → modaic-0.2.0}/tests/test_precompiled.py +39 -35
- {modaic-0.1.1 → modaic-0.2.0}/tests/test_types.py +1 -3
- {modaic-0.1.1 → modaic-0.2.0}/LICENSE +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/setup.cfg +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/agents/rag_agent.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/agents/registry.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/context/dtype_mapping.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/__init__.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/sql_database.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/__init__.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/benchmarks/baseline.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/benchmarks/common.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/benchmarks/fork.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/benchmarks/threaded.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/vector_database.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/vendors/milvus.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/vendors/mongodb.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/vendors/pinecone.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/databases/vector_database/vendors/qdrant.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/exceptions.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/storage/__init__.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/storage/file_store.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic/storage/pickle_store.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic.egg-info/dependency_links.txt +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/src/modaic.egg-info/top_level.txt +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/tests/test_observability.py +0 -0
- {modaic-0.1.1 → modaic-0.2.0}/tests/test_query_language.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modaic
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Modular Agent Infrastructure Collective, a python framework for managing and sharing DSPy agents
|
|
5
5
|
Author-email: Tyrin <tytodd@mit.edu>, Farouk <farouk@modaic.dev>
|
|
6
6
|
License: MIT License
|
|
@@ -48,7 +48,8 @@ Requires-Dist: langchain-community>=0.3.29
|
|
|
48
48
|
Requires-Dist: langchain-core>=0.3.72
|
|
49
49
|
Requires-Dist: langchain-text-splitters>=0.3.9
|
|
50
50
|
Requires-Dist: more-itertools>=10.8.0
|
|
51
|
-
Requires-Dist:
|
|
51
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
52
|
+
Requires-Dist: opik==1.8.42
|
|
52
53
|
Requires-Dist: pillow>=11.3.0
|
|
53
54
|
Requires-Dist: pymilvus>=2.5.14
|
|
54
55
|
Requires-Dist: sqlalchemy>=2.0.42
|
|
@@ -58,8 +59,11 @@ Requires-Dist: pinecone>=7.3.0; extra == "pinecone"
|
|
|
58
59
|
Dynamic: license-file
|
|
59
60
|
|
|
60
61
|
[](https://docs.modaic.dev)
|
|
62
|
+
[](https://pypi.org/project/modaic/)
|
|
63
|
+
|
|
64
|
+
|
|
61
65
|
# Modaic 🐙
|
|
62
|
-
**Mod**ular **A**gent **I**nfrastructure **C**
|
|
66
|
+
**Mod**ular **A**gent **I**nfrastructure **C**ollection, a Python framework for building AI agents with structured context management, database integration, and retrieval-augmented generation (RAG) capabilities.
|
|
63
67
|
|
|
64
68
|
## Overview
|
|
65
69
|
|
|
@@ -255,7 +259,6 @@ from modaic.databases import VectorDatabase, SQLDatabase
|
|
|
255
259
|
from modaic.types import Indexer
|
|
256
260
|
|
|
257
261
|
class TableRAGConfig(PrecompiledConfig):
|
|
258
|
-
agent_type = "TableRAGAgent"
|
|
259
262
|
k_recall: int = 50
|
|
260
263
|
k_rerank: int = 5
|
|
261
264
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
[](https://docs.modaic.dev)
|
|
2
|
+
[](https://pypi.org/project/modaic/)
|
|
3
|
+
|
|
4
|
+
|
|
2
5
|
# Modaic 🐙
|
|
3
|
-
**Mod**ular **A**gent **I**nfrastructure **C**
|
|
6
|
+
**Mod**ular **A**gent **I**nfrastructure **C**ollection, a Python framework for building AI agents with structured context management, database integration, and retrieval-augmented generation (RAG) capabilities.
|
|
4
7
|
|
|
5
8
|
## Overview
|
|
6
9
|
|
|
@@ -196,7 +199,6 @@ from modaic.databases import VectorDatabase, SQLDatabase
|
|
|
196
199
|
from modaic.types import Indexer
|
|
197
200
|
|
|
198
201
|
class TableRAGConfig(PrecompiledConfig):
|
|
199
|
-
agent_type = "TableRAGAgent"
|
|
200
202
|
k_recall: int = 50
|
|
201
203
|
k_rerank: int = 5
|
|
202
204
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "modaic"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Modular Agent Infrastructure Collective, a python framework for managing and sharing DSPy agents"
|
|
5
5
|
authors = [{ name = "Tyrin", email = "tytodd@mit.edu" }, {name = "Farouk", email = "farouk@modaic.dev"}]
|
|
6
6
|
readme = "README.md"
|
|
@@ -17,7 +17,8 @@ dependencies = [
|
|
|
17
17
|
"langchain-core>=0.3.72",
|
|
18
18
|
"langchain-text-splitters>=0.3.9",
|
|
19
19
|
"more-itertools>=10.8.0",
|
|
20
|
-
"
|
|
20
|
+
"openpyxl>=3.1.5",
|
|
21
|
+
"opik==1.8.42",
|
|
21
22
|
"pillow>=11.3.0",
|
|
22
23
|
"pymilvus>=2.5.14",
|
|
23
24
|
"sqlalchemy>=2.0.42",
|
|
@@ -34,7 +35,6 @@ dev = [
|
|
|
34
35
|
"pdoc>=15.0.4",
|
|
35
36
|
"pytest>=8.4.1",
|
|
36
37
|
"ruff>=0.12.7",
|
|
37
|
-
"opik>=1.8.42",
|
|
38
38
|
"matplotlib>=3.10.6",
|
|
39
39
|
]
|
|
40
40
|
examples = [
|
|
@@ -61,7 +61,7 @@ exclude = [
|
|
|
61
61
|
[tool.ruff]
|
|
62
62
|
line-length = 120
|
|
63
63
|
fix = true
|
|
64
|
-
exclude = ["*scratchpad*.py"]
|
|
64
|
+
exclude = ["*scratchpad*.py", "**/*.ipynb"]
|
|
65
65
|
|
|
66
66
|
[tool.ruff.lint]
|
|
67
67
|
select = ["E", "F", "I", "B", "T20", "ERA", "ANN", "N"]
|
|
@@ -85,5 +85,6 @@ members = [
|
|
|
85
85
|
"tests/artifacts/test_repos/simple_repo_with_compile",
|
|
86
86
|
"tests/artifacts/test_repos/nested_repo",
|
|
87
87
|
"tests/artifacts/test_repos/nested_repo_2",
|
|
88
|
+
"tests/artifacts/test_repos/nested_repo_3",
|
|
88
89
|
"tests/artifacts/test_repos/multi_module_repo",
|
|
89
90
|
]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from .auto import AutoAgent, AutoConfig, AutoRetriever
|
|
2
2
|
from .indexing import Embedder
|
|
3
3
|
from .observability import Trackable, configure, track, track_modaic_obj
|
|
4
4
|
from .precompiled import Indexer, PrecompiledAgent, PrecompiledConfig, Retriever
|
|
@@ -22,4 +22,16 @@ __all__ = [
|
|
|
22
22
|
"Prop",
|
|
23
23
|
"Value",
|
|
24
24
|
"parse_modaic_filter",
|
|
25
|
+
"Condition",
|
|
25
26
|
]
|
|
27
|
+
_configured = False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _auto_configure():
|
|
31
|
+
global _configured
|
|
32
|
+
if not _configured:
|
|
33
|
+
configure()
|
|
34
|
+
_configured = True
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_auto_configure()
|
|
@@ -4,24 +4,38 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
from functools import lru_cache
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Literal, Optional, Type
|
|
7
|
+
from typing import Callable, Literal, Optional, Type, TypedDict
|
|
8
8
|
|
|
9
|
-
from .hub import load_repo
|
|
9
|
+
from .hub import AGENTS_CACHE, load_repo
|
|
10
10
|
from .precompiled import PrecompiledAgent, PrecompiledConfig, Retriever, is_local_path
|
|
11
11
|
|
|
12
12
|
MODAIC_TOKEN = os.getenv("MODAIC_TOKEN")
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
class RegisteredRepo(TypedDict, total=False):
|
|
16
|
+
AutoConfig: Type[PrecompiledConfig]
|
|
17
|
+
AutoAgent: Type[PrecompiledAgent]
|
|
18
|
+
AutoRetriever: Type[Retriever]
|
|
16
19
|
|
|
17
20
|
|
|
18
|
-
|
|
19
|
-
_REGISTRY[model_type] = (config_cls, model_cls)
|
|
21
|
+
_REGISTRY: dict[str, RegisteredRepo] = {}
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
def register(
|
|
25
|
+
name: str,
|
|
26
|
+
auto_type: Literal["AutoConfig", "AutoAgent", "AutoRetriever"],
|
|
27
|
+
cls: Type[PrecompiledConfig | PrecompiledAgent | Retriever],
|
|
28
|
+
):
|
|
29
|
+
if name in _REGISTRY:
|
|
30
|
+
_REGISTRY[name][auto_type] = cls
|
|
31
|
+
else:
|
|
32
|
+
_REGISTRY[name] = {auto_type: cls}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# TODO: Cleanup code still using parent_mdoule
|
|
22
36
|
@lru_cache
|
|
23
37
|
def _load_dynamic_class(
|
|
24
|
-
repo_dir:
|
|
38
|
+
repo_dir: Path, class_path: str, hub_path: str = None
|
|
25
39
|
) -> Type[PrecompiledConfig | PrecompiledAgent | Retriever]:
|
|
26
40
|
"""
|
|
27
41
|
Load a class from a given repository directory and fully qualified class path.
|
|
@@ -29,27 +43,24 @@ def _load_dynamic_class(
|
|
|
29
43
|
Args:
|
|
30
44
|
repo_dir: Absolute path to a local repository directory containing the code.
|
|
31
45
|
class_path: Dotted path to the target class (e.g., "pkg.module.Class").
|
|
32
|
-
|
|
33
|
-
class_path is treated as relative to this module and only the agents cache
|
|
34
|
-
root is added to sys.path.
|
|
46
|
+
hub_path: The path to the repo on modaic hub (if its a hub repo) *Must be specified if its a hub repo*
|
|
35
47
|
|
|
36
48
|
Returns:
|
|
37
49
|
The resolved class object.
|
|
38
50
|
"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
)
|
|
51
|
+
if hub_path is None:
|
|
52
|
+
# Local folder case
|
|
53
|
+
repo_dir_str = str(repo_dir)
|
|
54
|
+
if repo_dir_str not in sys.path:
|
|
55
|
+
sys.path.insert(0, repo_dir_str)
|
|
56
|
+
full_path = f"{class_path}"
|
|
57
|
+
else:
|
|
58
|
+
# loaded hub repo case
|
|
59
|
+
agents_cache_str = str(AGENTS_CACHE)
|
|
60
|
+
if agents_cache_str not in sys.path:
|
|
61
|
+
sys.path.insert(0, agents_cache_str)
|
|
62
|
+
parent_module = hub_path.replace("/", ".")
|
|
63
|
+
full_path = f"{parent_module}.{class_path}"
|
|
53
64
|
|
|
54
65
|
module_name, _, attr = full_path.rpartition(".")
|
|
55
66
|
module = importlib.import_module(module_name)
|
|
@@ -62,27 +73,31 @@ class AutoConfig:
|
|
|
62
73
|
"""
|
|
63
74
|
|
|
64
75
|
@staticmethod
|
|
65
|
-
def from_precompiled(repo_path: str,
|
|
76
|
+
def from_precompiled(repo_path: str, **kwargs) -> PrecompiledConfig:
|
|
77
|
+
local = is_local_path(repo_path)
|
|
78
|
+
repo_dir = load_repo(repo_path, local)
|
|
79
|
+
return AutoConfig._from_precompiled(repo_dir, hub_path=repo_path if not local else None, **kwargs)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _from_precompiled(repo_dir: Path, hub_path: str = None, **kwargs) -> PrecompiledConfig:
|
|
66
83
|
"""
|
|
67
84
|
Load a config for an agent or retriever from a precompiled repo.
|
|
68
85
|
|
|
69
86
|
Args:
|
|
70
|
-
|
|
71
|
-
|
|
87
|
+
repo_dir: The path to the repo directory. the loaded local repository directory.
|
|
88
|
+
hub_path: The path to the repo on modaic hub (if its a hub repo) *Must be specified if its a hub repo*
|
|
72
89
|
|
|
73
90
|
Returns:
|
|
74
91
|
A config object constructed via the resolved config class.
|
|
75
92
|
"""
|
|
76
|
-
local = is_local_path(repo_path)
|
|
77
|
-
repo_dir = load_repo(repo_path, local)
|
|
78
93
|
|
|
79
94
|
cfg_path = repo_dir / "config.json"
|
|
80
95
|
if not cfg_path.exists():
|
|
81
|
-
raise FileNotFoundError(f"Failed to load AutoConfig, config.json not found in {
|
|
96
|
+
raise FileNotFoundError(f"Failed to load AutoConfig, config.json not found in {hub_path or str(repo_dir)}")
|
|
82
97
|
with open(cfg_path, "r") as fp:
|
|
83
98
|
cfg = json.load(fp)
|
|
84
99
|
|
|
85
|
-
ConfigClass = _load_auto_class(
|
|
100
|
+
ConfigClass = _load_auto_class(repo_dir, "AutoConfig", hub_path=hub_path) # noqa: N806
|
|
86
101
|
return ConfigClass(**{**cfg, **kwargs})
|
|
87
102
|
|
|
88
103
|
|
|
@@ -96,7 +111,6 @@ class AutoAgent:
|
|
|
96
111
|
repo_path: str,
|
|
97
112
|
*,
|
|
98
113
|
config_options: Optional[dict] = None,
|
|
99
|
-
parent_module: Optional[str] = None,
|
|
100
114
|
project: Optional[str] = None,
|
|
101
115
|
**kw,
|
|
102
116
|
) -> PrecompiledAgent:
|
|
@@ -105,23 +119,25 @@ class AutoAgent:
|
|
|
105
119
|
|
|
106
120
|
Args:
|
|
107
121
|
repo_path: Hub path ("user/repo") or local directory.
|
|
108
|
-
parent_module: Optional dotted module prefix (e.g., "swagginty.TableRAG") to use to import classes from repo_path. If provided, overides default parent_module behavior.
|
|
109
122
|
project: Optional project name. If not provided and repo_path is a hub path, defaults to the repo name.
|
|
110
123
|
**kw: Additional keyword arguments forwarded to the Agent constructor.
|
|
111
124
|
|
|
112
125
|
Returns:
|
|
113
126
|
An instantiated Agent subclass.
|
|
114
127
|
"""
|
|
128
|
+
# TODO: fast lookups via registry
|
|
115
129
|
local = is_local_path(repo_path)
|
|
116
130
|
repo_dir = load_repo(repo_path, local)
|
|
131
|
+
hub_path = repo_path if not local else None
|
|
117
132
|
|
|
118
133
|
if config_options is None:
|
|
119
134
|
config_options = {}
|
|
120
135
|
|
|
121
|
-
cfg = AutoConfig.
|
|
122
|
-
AgentClass = _load_auto_class(
|
|
136
|
+
cfg = AutoConfig._from_precompiled(repo_dir, hub_path=hub_path, **config_options)
|
|
137
|
+
AgentClass = _load_auto_class(repo_dir, "AutoAgent", hub_path=hub_path) # noqa: N806
|
|
123
138
|
|
|
124
139
|
# automatically configure repo and project from repo_path if not provided
|
|
140
|
+
# TODO: redundant checks in if statement. Investigate removing.
|
|
125
141
|
if not local and "/" in repo_path and not repo_path.startswith("/"):
|
|
126
142
|
parts = repo_path.split("/")
|
|
127
143
|
if len(parts) >= 2:
|
|
@@ -146,7 +162,6 @@ class AutoRetriever:
|
|
|
146
162
|
repo_path: str,
|
|
147
163
|
*,
|
|
148
164
|
config_options: Optional[dict] = None,
|
|
149
|
-
parent_module: Optional[str] = None,
|
|
150
165
|
project: Optional[str] = None,
|
|
151
166
|
**kw,
|
|
152
167
|
) -> Retriever:
|
|
@@ -155,7 +170,6 @@ class AutoRetriever:
|
|
|
155
170
|
|
|
156
171
|
Args:
|
|
157
172
|
repo_path: hub path ("user/repo"), or local directory.
|
|
158
|
-
parent_module: Optional dotted module prefix (e.g., "swagginty.TableRAG") to use to import classes from repo_path. If provided, overides default parent_module behavior.
|
|
159
173
|
project: Optional project name. If not provided and repo_path is a hub path, defaults to the repo name.
|
|
160
174
|
**kw: Additional keyword arguments forwarded to the Retriever constructor.
|
|
161
175
|
|
|
@@ -164,14 +178,16 @@ class AutoRetriever:
|
|
|
164
178
|
"""
|
|
165
179
|
local = is_local_path(repo_path)
|
|
166
180
|
repo_dir = load_repo(repo_path, local)
|
|
181
|
+
hub_path = repo_path if not local else None
|
|
167
182
|
|
|
168
183
|
if config_options is None:
|
|
169
184
|
config_options = {}
|
|
170
185
|
|
|
171
|
-
cfg = AutoConfig.
|
|
172
|
-
RetrieverClass = _load_auto_class(
|
|
186
|
+
cfg = AutoConfig._from_precompiled(repo_dir, hub_path=hub_path, **config_options)
|
|
187
|
+
RetrieverClass = _load_auto_class(repo_dir, "AutoRetriever", hub_path=hub_path) # noqa: N806
|
|
173
188
|
|
|
174
189
|
# automatically configure repo and project from repo_path if not provided
|
|
190
|
+
# TODO: redundant checks in if statement. Investigate removing.
|
|
175
191
|
if not local and "/" in repo_path and not repo_path.startswith("/"):
|
|
176
192
|
parts = repo_path.split("/")
|
|
177
193
|
if len(parts) >= 2:
|
|
@@ -186,27 +202,25 @@ class AutoRetriever:
|
|
|
186
202
|
|
|
187
203
|
|
|
188
204
|
def _load_auto_class(
|
|
189
|
-
repo_path: str,
|
|
190
205
|
repo_dir: Path,
|
|
191
206
|
auto_name: Literal["AutoConfig", "AutoAgent", "AutoRetriever"],
|
|
192
|
-
|
|
207
|
+
hub_path: str = None,
|
|
193
208
|
) -> Type[PrecompiledConfig | PrecompiledAgent | Retriever]:
|
|
194
209
|
"""
|
|
195
210
|
Load a class from the auto_classes.json file.
|
|
196
211
|
|
|
197
212
|
Args:
|
|
198
|
-
repo_path: The path to the repo. (local or hub path)
|
|
199
213
|
repo_dir: The path to the repo directory. the loaded local repository directory.
|
|
200
214
|
auto_name: The name of the auto class to load. (AutoConfig, AutoAgent, AutoRetriever)
|
|
201
|
-
|
|
215
|
+
hub_path: The path to the repo on modaic hub (if its a hub repo) *Must be specified if its a hub repo*
|
|
202
216
|
"""
|
|
203
217
|
# determine if the repo was loaded from local or hub
|
|
204
|
-
local =
|
|
218
|
+
local = hub_path is None
|
|
205
219
|
auto_classes_path = repo_dir / "auto_classes.json"
|
|
206
220
|
|
|
207
221
|
if not auto_classes_path.exists():
|
|
208
222
|
raise FileNotFoundError(
|
|
209
|
-
f"Failed to load {auto_name}, auto_classes.json not found in {
|
|
223
|
+
f"Failed to load {auto_name}, auto_classes.json not found in {hub_path or str(repo_dir)}, if this is your repo, make sure you push_to_hub() with `with_code=True`"
|
|
210
224
|
)
|
|
211
225
|
|
|
212
226
|
with open(auto_classes_path, "r") as fp:
|
|
@@ -214,15 +228,33 @@ def _load_auto_class(
|
|
|
214
228
|
|
|
215
229
|
if not (auto_class_path := auto_classes.get(auto_name)):
|
|
216
230
|
raise KeyError(
|
|
217
|
-
f"{auto_name} not found in {
|
|
231
|
+
f"{auto_name} not found in {hub_path or str(repo_dir)}/auto_classes.json. Please check that the auto_classes.json file is correct."
|
|
218
232
|
) from None
|
|
219
233
|
|
|
220
|
-
if
|
|
221
|
-
|
|
222
|
-
else:
|
|
223
|
-
if parent_module is None and not local:
|
|
224
|
-
parent_module = str(repo_path).replace("/", ".")
|
|
225
|
-
|
|
226
|
-
repo_dir = repo_dir.parent.parent if not local else repo_dir
|
|
227
|
-
LoadedClass = _load_dynamic_class(repo_dir, auto_class_path, parent_module=parent_module) # noqa: N806
|
|
234
|
+
repo_dir = repo_dir.parent.parent if not local else repo_dir
|
|
235
|
+
LoadedClass = _load_dynamic_class(repo_dir, auto_class_path, hub_path=hub_path) # noqa: N806
|
|
228
236
|
return LoadedClass
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def builtin_agent(name: str) -> Callable[[Type], Type]:
|
|
240
|
+
def _wrap(cls: Type) -> Type:
|
|
241
|
+
register(name, "AutoAgent", cls)
|
|
242
|
+
return cls
|
|
243
|
+
|
|
244
|
+
return _wrap
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def builtin_indexer(name: str) -> Callable[[Type], Type]:
|
|
248
|
+
def _wrap(cls: Type) -> Type:
|
|
249
|
+
register(name, "AutoRetriever", cls)
|
|
250
|
+
return cls
|
|
251
|
+
|
|
252
|
+
return _wrap
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def builtin_config(name: str) -> Callable[[Type], Type]:
|
|
256
|
+
def _wrap(cls: Type) -> Type:
|
|
257
|
+
register(name, "AutoConfig", cls)
|
|
258
|
+
return cls
|
|
259
|
+
|
|
260
|
+
return _wrap
|
|
@@ -12,7 +12,7 @@ from .table import (
|
|
|
12
12
|
Table,
|
|
13
13
|
TableFile,
|
|
14
14
|
)
|
|
15
|
-
from .text import Text
|
|
15
|
+
from .text import Text, TextFile
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
18
|
"MultiTabbedTable",
|
|
@@ -31,4 +31,5 @@ __all__ = [
|
|
|
31
31
|
"Prop",
|
|
32
32
|
"HydratedAttr",
|
|
33
33
|
"requires_hydration",
|
|
34
|
+
"TextFile",
|
|
34
35
|
]
|
|
@@ -22,7 +22,7 @@ from pydantic._internal._model_construction import ModelMetaclass
|
|
|
22
22
|
from pydantic.fields import ModelPrivateAttr
|
|
23
23
|
from pydantic.main import IncEx
|
|
24
24
|
from pydantic.v1 import Field as V1Field
|
|
25
|
-
from pydantic_core import
|
|
25
|
+
from pydantic_core import SchemaSerializer
|
|
26
26
|
|
|
27
27
|
from ..query_language import Prop
|
|
28
28
|
from ..storage.file_store import FileStore
|
|
@@ -38,7 +38,7 @@ class BaseTable(Context, ABC):
|
|
|
38
38
|
"""
|
|
39
39
|
Return up to 3 distinct sample values from the given column.
|
|
40
40
|
|
|
41
|
-
Picks at most three unique, non-null, short (
|
|
41
|
+
Picks at most three unique, non-null, short (<64 chars) values from
|
|
42
42
|
the column, favoring speed by sampling after de-duplicating values.
|
|
43
43
|
|
|
44
44
|
Args:
|
|
@@ -47,7 +47,7 @@ class TextFile(Context):
|
|
|
47
47
|
file_type: Literal["txt"] = "txt"
|
|
48
48
|
|
|
49
49
|
def hydrate(self, file_store: FileStore) -> None:
|
|
50
|
-
file = file_store.get(self.file_ref)
|
|
50
|
+
file = file_store.get(self.file_ref).file
|
|
51
51
|
if isinstance(file, Path):
|
|
52
52
|
file = file.read_text()
|
|
53
53
|
else:
|
|
@@ -91,4 +91,4 @@ class TextFile(Context):
|
|
|
91
91
|
chunks.append(Text(text=chunk))
|
|
92
92
|
return chunks
|
|
93
93
|
|
|
94
|
-
self.
|
|
94
|
+
self.chunk_with(chunk_text_fn)
|
|
@@ -12,12 +12,13 @@ from typing import (
|
|
|
12
12
|
Type,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
from dotenv import load_dotenv
|
|
15
|
+
from dotenv import find_dotenv, load_dotenv
|
|
16
16
|
|
|
17
17
|
from ..context.base import Context, Relation
|
|
18
18
|
from ..observability import Trackable, track_modaic_obj
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
env_file = find_dotenv(usecwd=True)
|
|
21
|
+
load_dotenv(env_file)
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
if TYPE_CHECKING:
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import dspy
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Dataset:
|
|
5
|
+
def __init__(self, data: list):
|
|
6
|
+
self.data = data
|
|
7
|
+
|
|
8
|
+
def to_dspy(self) -> list:
|
|
9
|
+
return dspy.Dataset(self.data)
|
|
10
|
+
|
|
11
|
+
@classmethod
|
|
12
|
+
def from_csv(cls, file_path: str) -> "Dataset":
|
|
13
|
+
with open(file_path, "r") as file:
|
|
14
|
+
data = file.read()
|
|
15
|
+
return cls(data)
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_hub(cls, dataset_name: str) -> "Dataset":
|
|
19
|
+
from datasets import load_dataset
|
|
20
|
+
|
|
21
|
+
data = load_dataset(dataset_name)
|
|
22
|
+
return cls(data)
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import shutil
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Any, Dict, Optional
|
|
4
5
|
|
|
5
6
|
import git
|
|
6
7
|
import requests
|
|
7
|
-
from dotenv import load_dotenv
|
|
8
|
+
from dotenv import find_dotenv, load_dotenv
|
|
8
9
|
|
|
9
10
|
from .exceptions import AuthenticationError, RepositoryExistsError, RepositoryNotFoundError
|
|
10
11
|
from .utils import compute_cache_dir
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
env_file = find_dotenv(usecwd=True)
|
|
14
|
+
load_dotenv(env_file)
|
|
13
15
|
|
|
14
16
|
MODAIC_TOKEN = os.getenv("MODAIC_TOKEN")
|
|
15
17
|
MODAIC_GIT_URL = os.getenv("MODAIC_GIT_URL", "git.modaic.dev").replace("https://", "").rstrip("/")
|
|
@@ -80,6 +82,7 @@ def create_remote_repo(repo_path: str, access_token: str, exist_ok: bool = False
|
|
|
80
82
|
raise Exception(f"Request failed: {str(e)}") from e
|
|
81
83
|
|
|
82
84
|
|
|
85
|
+
# FIXME: make faster. Currently takes ~9 seconds
|
|
83
86
|
def push_folder_to_hub(
|
|
84
87
|
folder: str,
|
|
85
88
|
repo_path: str,
|
|
@@ -123,10 +126,11 @@ def push_folder_to_hub(
|
|
|
123
126
|
"Modaic fast paths not yet implemented. Please load agents with 'user/repo' or 'org/repo' format"
|
|
124
127
|
)
|
|
125
128
|
assert repo_path.count("/") <= 1, f"Extra '/' in repo_path: {repo_path}"
|
|
126
|
-
|
|
129
|
+
# TODO: try pushing first and on error create the repo. create_remote_repo currently takes ~1.5 seconds to run
|
|
127
130
|
create_remote_repo(repo_path, access_token, exist_ok=True)
|
|
128
131
|
username = get_user_info(access_token)["login"]
|
|
129
132
|
|
|
133
|
+
# FIXME: takes 6 seconds
|
|
130
134
|
try:
|
|
131
135
|
# 1) If local folder is not a git repository, initialize it.
|
|
132
136
|
local_repo = git.Repo.init(folder)
|
|
@@ -205,6 +209,7 @@ def get_repo_payload(repo_name: str) -> Dict[str, Any]:
|
|
|
205
209
|
return payload
|
|
206
210
|
|
|
207
211
|
|
|
212
|
+
# TODO: add persistent filesystem based cache mapping access_token to user_info. Currently takes ~1 second
|
|
208
213
|
def get_user_info(access_token: str) -> Dict[str, Any]:
|
|
209
214
|
"""
|
|
210
215
|
Returns the user info for the given access token.
|
|
@@ -214,12 +219,14 @@ def get_user_info(access_token: str) -> Dict[str, Any]:
|
|
|
214
219
|
access_token: The access token to get the user info for.
|
|
215
220
|
|
|
216
221
|
Returns:
|
|
222
|
+
```python
|
|
217
223
|
{
|
|
218
224
|
"login": str,
|
|
219
225
|
"email": str,
|
|
220
226
|
"avatar_url": str,
|
|
221
227
|
"name": str,
|
|
222
228
|
}
|
|
229
|
+
```
|
|
223
230
|
"""
|
|
224
231
|
global user_info
|
|
225
232
|
if user_info:
|
|
@@ -243,6 +250,7 @@ def get_user_info(access_token: str) -> Dict[str, Any]:
|
|
|
243
250
|
return user_info
|
|
244
251
|
|
|
245
252
|
|
|
253
|
+
# TODO:
|
|
246
254
|
def git_snapshot(
|
|
247
255
|
repo_path: str,
|
|
248
256
|
*,
|
|
@@ -265,7 +273,6 @@ def git_snapshot(
|
|
|
265
273
|
elif access_token is None:
|
|
266
274
|
raise ValueError("Access token is required")
|
|
267
275
|
|
|
268
|
-
# If a local folder path is provided, just return it
|
|
269
276
|
repo_dir = Path(AGENTS_CACHE) / repo_path
|
|
270
277
|
username = get_user_info(access_token)["login"]
|
|
271
278
|
try:
|
|
@@ -291,10 +298,26 @@ def git_snapshot(
|
|
|
291
298
|
repo.git.reset("--hard", f"origin/{target}")
|
|
292
299
|
return repo_dir
|
|
293
300
|
except Exception as e:
|
|
294
|
-
|
|
301
|
+
shutil.rmtree(repo_dir)
|
|
295
302
|
raise e
|
|
296
303
|
|
|
297
304
|
|
|
305
|
+
def _move_to_commit_sha_folder(repo: git.Repo) -> git.Repo:
|
|
306
|
+
"""
|
|
307
|
+
Moves the repo to a new path based on the commit SHA. (Unused for now)
|
|
308
|
+
Args:
|
|
309
|
+
repo: The git.Repo object.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
The new git.Repo object.
|
|
313
|
+
"""
|
|
314
|
+
commit = repo.head.commit
|
|
315
|
+
repo_dir = Path(repo.working_dir)
|
|
316
|
+
new_path = repo_dir / commit.hexsha
|
|
317
|
+
repo_dir.rename(new_path)
|
|
318
|
+
return git.Repo(new_path)
|
|
319
|
+
|
|
320
|
+
|
|
298
321
|
def load_repo(repo_path: str, is_local: bool = False) -> Path:
|
|
299
322
|
if is_local:
|
|
300
323
|
path = Path(repo_path)
|
|
@@ -77,7 +77,7 @@ class PineconeReranker(Reranker):
|
|
|
77
77
|
try:
|
|
78
78
|
from pinecone import Pinecone
|
|
79
79
|
except ImportError:
|
|
80
|
-
raise ImportError("Pinecone is not installed. Please install it with `uv add pinecone`")
|
|
80
|
+
raise ImportError("Pinecone is not installed. Please install it with `uv add pinecone`") from None
|
|
81
81
|
|
|
82
82
|
if api_key is None:
|
|
83
83
|
self.pinecone = Pinecone(os.getenv("PINECONE_API_KEY"))
|