agentic-data 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ # Rust
2
+ target/
3
+ **/*.rs.bk
4
+
5
+ # IDE
6
+ .idea/
7
+ .vscode/
8
+ *.swp
9
+ *.swo
10
+ *~
11
+
12
+ # OS
13
+ .DS_Store
14
+ Thumbs.db
15
+ *.orig
16
+
17
+ # Python
18
+ __pycache__/
19
+ *.pyc
20
+ *.egg-info/
21
+ dist/
22
+ build/
23
+ .eggs/
24
+ *.whl
25
+ venv/
26
+ .venv/
27
+
28
+ # Data files (test artifacts)
29
+ *.adat
30
+ *.tmp
31
+
32
+ # LaTeX build artifacts
33
+ paper/**/*.aux
34
+ paper/**/*.bbl
35
+ paper/**/*.blg
36
+ paper/**/*.fdb_latexmk
37
+ paper/**/*.fls
38
+ paper/**/*.log
39
+ paper/**/*.out
40
+ paper/**/*.synctex.gz
41
+
42
+ # Environment / secrets
43
+ .env
44
+ *.key
45
+
46
+ # Internal planning (not published)
47
+ planning-docs/
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentic-data
3
+ Version: 0.1.0
4
+ Summary: Universal data comprehension for AI agents
5
+ Project-URL: Homepage, https://github.com/agentralabs/agentic-data
6
+ Project-URL: Documentation, https://github.com/agentralabs/agentic-data/tree/main/docs
7
+ Project-URL: Repository, https://github.com/agentralabs/agentic-data
8
+ Author: Agentra Labs
9
+ License-Expression: MIT
10
+ Keywords: agents,ai,data,lineage,schema
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.10
20
+ Provides-Extra: dev
21
+ Requires-Dist: mypy>=1.10; extra == 'dev'
22
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
23
+ Requires-Dist: pytest>=8.0; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # AgenticData Python SDK
27
+
28
+ Pure-Python SDK that wraps the `adat` CLI binary.
29
+
30
+ ```bash
31
+ pip install agentic-data
32
+ ```
@@ -0,0 +1,7 @@
1
+ # AgenticData Python SDK
2
+
3
+ Pure-Python SDK that wraps the `adat` CLI binary.
4
+
5
+ ```bash
6
+ pip install agentic-data
7
+ ```
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.27"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agentic-data"
7
+ version = "0.1.0"
8
+ description = "Universal data comprehension for AI agents"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Agentra Labs" },
14
+ ]
15
+ keywords = ["ai", "agents", "data", "schema", "lineage"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
25
+ ]
26
+ dependencies = []
27
+
28
+ [project.optional-dependencies]
29
+ dev = [
30
+ "pytest>=8.0",
31
+ "pytest-cov>=5.0",
32
+ "mypy>=1.10",
33
+ ]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/agentralabs/agentic-data"
37
+ Documentation = "https://github.com/agentralabs/agentic-data/tree/main/docs"
38
+ Repository = "https://github.com/agentralabs/agentic-data"
39
+
40
+ [tool.hatch.build.targets.wheel]
41
+ packages = ["src/agentic_data"]
42
+
43
+ [tool.pytest.ini_options]
44
+ testpaths = ["tests"]
45
+
46
+ [tool.mypy]
47
+ python_version = "3.10"
48
+ strict = true
@@ -0,0 +1,139 @@
1
+ """AgenticData — Universal data comprehension for AI agents.
2
+
3
+ Pure-Python SDK that wraps the ``adat`` CLI binary via subprocess.
4
+ Zero required dependencies; only stdlib: subprocess, json, pathlib, dataclasses.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import subprocess
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any, Optional
15
+
16
+ __version__ = "0.1.0"
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class DataError(Exception):
22
+ """Raised when an adat CLI command fails."""
23
+
24
+
25
+ @dataclass
26
+ class DataStore:
27
+ """Interface to an ``.adat`` data store file.
28
+
29
+ Parameters
30
+ ----------
31
+ path : str | Path
32
+ Path to the ``.adat`` file. Created automatically on first write
33
+ if it does not exist.
34
+ binary : str
35
+ Name or path of the ``adat`` CLI binary.
36
+ """
37
+
38
+ path: str | Path
39
+ binary: str = "adat"
40
+ _resolved_binary: Optional[str] = field(default=None, repr=False, init=False)
41
+
42
+ def __post_init__(self) -> None:
43
+ self.path = Path(self.path)
44
+
45
+ # ------------------------------------------------------------------
46
+ # Internal helpers
47
+ # ------------------------------------------------------------------
48
+
49
+ def _find_binary(self) -> str:
50
+ if self._resolved_binary is not None:
51
+ return self._resolved_binary
52
+
53
+ import shutil
54
+
55
+ found = shutil.which(self.binary)
56
+ if found is None:
57
+ raise DataError(
58
+ f"Cannot find '{self.binary}' on PATH. "
59
+ "Install AgenticData: curl -fsSL https://agentralabs.tech/install/data | bash"
60
+ )
61
+ self._resolved_binary = found
62
+ return found
63
+
64
+ def _run(self, *args: str, check: bool = True) -> str:
65
+ """Execute an adat CLI command and return stdout."""
66
+ cmd = [self._find_binary(), "--file", str(self.path), *args]
67
+ logger.debug("Running: %s", " ".join(cmd))
68
+ result = subprocess.run(cmd, capture_output=True, text=True)
69
+ if check and result.returncode != 0:
70
+ raise DataError(
71
+ f"adat command failed (exit {result.returncode}): {result.stderr.strip()}"
72
+ )
73
+ return result.stdout.strip()
74
+
75
+ def _run_json(self, *args: str) -> Any:
76
+ """Execute a command and parse JSON output."""
77
+ raw = self._run(*args, "--format", "json")
78
+ return json.loads(raw) if raw else {}
79
+
80
+ # ------------------------------------------------------------------
81
+ # Data operations
82
+ # ------------------------------------------------------------------
83
+
84
+ def detect_format(self, source: str) -> dict[str, Any]:
85
+ """Detect the format of a data source. Returns format metadata."""
86
+ return self._run_json("format", "detect", source)
87
+
88
+ def ingest(
89
+ self,
90
+ source: str,
91
+ *,
92
+ format: Optional[str] = None,
93
+ schema: Optional[str] = None,
94
+ ) -> str:
95
+ """Ingest data from a source. Returns the ingestion ID."""
96
+ args = ["ingest", source]
97
+ if format:
98
+ args.extend(["--format", format])
99
+ if schema:
100
+ args.extend(["--schema", schema])
101
+ return self._run(*args)
102
+
103
+ def query(self, expression: str, *, limit: Optional[int] = None) -> list[dict[str, Any]]:
104
+ """Query data with an expression. Returns matching records."""
105
+ args = ["query", expression, "--format", "json"]
106
+ if limit is not None:
107
+ args.extend(["--limit", str(limit)])
108
+ raw = self._run(*args)
109
+ return json.loads(raw) if raw else []
110
+
111
+ def quality_score(self, source: Optional[str] = None) -> dict[str, Any]:
112
+ """Compute data quality score. Returns quality metrics."""
113
+ args = ["quality", "score", "--format", "json"]
114
+ if source:
115
+ args.append(source)
116
+ raw = self._run(*args)
117
+ return json.loads(raw) if raw else {}
118
+
119
+ # ------------------------------------------------------------------
120
+ # Stats
121
+ # ------------------------------------------------------------------
122
+
123
+ def stats(self) -> dict[str, Any]:
124
+ """Get data store statistics."""
125
+ raw = self._run("stats", "--format", "json")
126
+ return json.loads(raw) if raw else {}
127
+
128
+ # ------------------------------------------------------------------
129
+ # File operations
130
+ # ------------------------------------------------------------------
131
+
132
+ def save(self) -> None:
133
+ """Explicit save (most operations auto-save)."""
134
+ pass
135
+
136
+ @property
137
+ def exists(self) -> bool:
138
+ """Whether the .adat file exists on disk."""
139
+ return self.path.exists()
File without changes
@@ -0,0 +1,310 @@
1
+ """Comprehensive tests for AgenticData Python SDK."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import subprocess
6
+ from pathlib import Path, PurePosixPath
7
+ from unittest.mock import patch, MagicMock
8
+
9
+ import pytest
10
+
11
+ from agentic_data import DataStore, DataError, __version__
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # 1. Package Metadata
16
+ # ---------------------------------------------------------------------------
17
+
18
+
19
+ class TestPackageMetadata:
20
+ def test_version_exists(self) -> None:
21
+ assert __version__ is not None
22
+ assert isinstance(__version__, str)
23
+ assert len(__version__) > 0
24
+
25
+ def test_version_semver(self) -> None:
26
+ parts = __version__.split(".")
27
+ assert len(parts) == 3
28
+ assert all(p.isdigit() for p in parts)
29
+
30
+ def test_version_is_010(self) -> None:
31
+ assert __version__ == "0.1.0"
32
+
33
+ def test_import_main_class(self) -> None:
34
+ assert DataStore is not None
35
+
36
+ def test_import_error_class(self) -> None:
37
+ assert DataError is not None
38
+ assert issubclass(DataError, Exception)
39
+
40
+ def test_main_class_has_docstring(self) -> None:
41
+ assert DataStore.__doc__ is not None
42
+ assert len(DataStore.__doc__) > 10
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # 2. Initialization
47
+ # ---------------------------------------------------------------------------
48
+
49
+
50
+ class TestInit:
51
+ def test_create_with_string_path(self, tmp_path: Path) -> None:
52
+ path = str(tmp_path / "test.adat")
53
+ obj = DataStore(path)
54
+ assert str(obj.path) == path
55
+
56
+ def test_create_with_path_object(self, tmp_path: Path) -> None:
57
+ path = tmp_path / "test.adat"
58
+ obj = DataStore(path)
59
+ assert obj.path == path
60
+
61
+ def test_create_with_pure_posix_path(self) -> None:
62
+ obj = DataStore(PurePosixPath("/tmp/test.adat"))
63
+ assert "test.adat" in str(obj.path)
64
+
65
+ def test_path_converted_to_path_object(self, tmp_path: Path) -> None:
66
+ path = str(tmp_path / "test.adat")
67
+ obj = DataStore(path)
68
+ assert isinstance(obj.path, Path)
69
+
70
+ def test_custom_binary_name(self, tmp_path: Path) -> None:
71
+ obj = DataStore(str(tmp_path / "test.adat"), binary="custom-bin")
72
+ assert obj.binary == "custom-bin"
73
+
74
+ def test_default_binary_name(self, tmp_path: Path) -> None:
75
+ obj = DataStore(str(tmp_path / "test.adat"))
76
+ assert obj.binary == "adat"
77
+
78
+ def test_exists_false_for_new(self, tmp_path: Path) -> None:
79
+ obj = DataStore(str(tmp_path / "nonexistent.adat"))
80
+ assert not obj.exists
81
+
82
+ def test_exists_true_when_file_present(self, tmp_path: Path) -> None:
83
+ path = tmp_path / "exists.adat"
84
+ path.touch()
85
+ obj = DataStore(str(path))
86
+ assert obj.exists
87
+
88
+ def test_save_is_noop(self, tmp_path: Path) -> None:
89
+ obj = DataStore(str(tmp_path / "test.adat"))
90
+ obj.save() # should not raise
91
+
92
+ def test_repr_does_not_crash(self, tmp_path: Path) -> None:
93
+ obj = DataStore(str(tmp_path / "test.adat"))
94
+ r = repr(obj)
95
+ assert isinstance(r, str)
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # 3. Binary Resolution
100
+ # ---------------------------------------------------------------------------
101
+
102
+
103
+ class TestBinaryResolution:
104
+ def test_missing_binary_raises(self, tmp_path: Path) -> None:
105
+ obj = DataStore(str(tmp_path / "t.adat"), binary="nonexistent-xyz-999")
106
+ with pytest.raises(DataError):
107
+ obj._find_binary()
108
+
109
+ def test_error_contains_binary_name(self, tmp_path: Path) -> None:
110
+ obj = DataStore(str(tmp_path / "t.adat"), binary="nonexistent-xyz-999")
111
+ with pytest.raises(DataError, match="nonexistent-xyz-999"):
112
+ obj._find_binary()
113
+
114
+ def test_error_contains_install_hint(self, tmp_path: Path) -> None:
115
+ obj = DataStore(str(tmp_path / "t.adat"), binary="nonexistent-xyz-999")
116
+ with pytest.raises(DataError, match="Install"):
117
+ obj._find_binary()
118
+
119
+ def test_caches_result(self, tmp_path: Path) -> None:
120
+ obj = DataStore(str(tmp_path / "t.adat"))
121
+ obj._resolved_binary = "/fake/path/adat"
122
+ assert obj._find_binary() == "/fake/path/adat"
123
+
124
+ def test_cache_persists_across_calls(self, tmp_path: Path) -> None:
125
+ obj = DataStore(str(tmp_path / "t.adat"))
126
+ obj._resolved_binary = "/cached/bin"
127
+ assert obj._find_binary() == "/cached/bin"
128
+ assert obj._find_binary() == "/cached/bin"
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # 4. Subprocess Execution
133
+ # ---------------------------------------------------------------------------
134
+
135
+
136
+ class TestSubprocessExecution:
137
+ def test_run_calls_subprocess(self, tmp_path: Path) -> None:
138
+ obj = DataStore(str(tmp_path / "t.adat"))
139
+ obj._resolved_binary = "/usr/bin/echo"
140
+ with patch("subprocess.run") as mock_run:
141
+ mock_run.return_value = MagicMock(
142
+ returncode=0, stdout="ok\n", stderr=""
143
+ )
144
+ result = obj._run("arg1", "arg2")
145
+ assert mock_run.called
146
+ cmd = mock_run.call_args[0][0]
147
+ assert cmd[0] == "/usr/bin/echo"
148
+ assert "arg1" in cmd
149
+ assert "arg2" in cmd
150
+
151
+ def test_run_includes_file_flag(self, tmp_path: Path) -> None:
152
+ obj = DataStore(str(tmp_path / "t.adat"))
153
+ obj._resolved_binary = "/usr/bin/echo"
154
+ with patch("subprocess.run") as mock_run:
155
+ mock_run.return_value = MagicMock(
156
+ returncode=0, stdout="ok\n", stderr=""
157
+ )
158
+ obj._run("test")
159
+ cmd = mock_run.call_args[0][0]
160
+ assert "--file" in cmd
161
+ assert str(tmp_path / "t.adat") in cmd
162
+
163
+ def test_run_raises_on_nonzero_exit(self, tmp_path: Path) -> None:
164
+ obj = DataStore(str(tmp_path / "t.adat"))
165
+ obj._resolved_binary = "/bin/false"
166
+ with patch("subprocess.run") as mock_run:
167
+ mock_run.return_value = MagicMock(
168
+ returncode=1, stdout="", stderr="error happened"
169
+ )
170
+ with pytest.raises(DataError, match="error happened"):
171
+ obj._run("fail")
172
+
173
+ def test_run_returns_stripped_stdout(self, tmp_path: Path) -> None:
174
+ obj = DataStore(str(tmp_path / "t.adat"))
175
+ obj._resolved_binary = "/usr/bin/echo"
176
+ with patch("subprocess.run") as mock_run:
177
+ mock_run.return_value = MagicMock(
178
+ returncode=0, stdout=" hello world \n", stderr=""
179
+ )
180
+ result = obj._run("test")
181
+ assert result == "hello world"
182
+
183
+ def test_run_json_parses_output(self, tmp_path: Path) -> None:
184
+ obj = DataStore(str(tmp_path / "t.adat"))
185
+ obj._resolved_binary = "/usr/bin/echo"
186
+ with patch("subprocess.run") as mock_run:
187
+ mock_run.return_value = MagicMock(
188
+ returncode=0, stdout='{"key": "value"}\n', stderr=""
189
+ )
190
+ result = obj._run_json("test")
191
+ assert result == {"key": "value"}
192
+
193
+ def test_run_json_raises_on_invalid_json(self, tmp_path: Path) -> None:
194
+ obj = DataStore(str(tmp_path / "t.adat"))
195
+ obj._resolved_binary = "/usr/bin/echo"
196
+ with patch("subprocess.run") as mock_run:
197
+ mock_run.return_value = MagicMock(
198
+ returncode=0, stdout="not json at all", stderr=""
199
+ )
200
+ with pytest.raises((json.JSONDecodeError, DataError)):
201
+ obj._run_json("test")
202
+
203
+ def test_run_json_returns_empty_dict_on_empty_output(self, tmp_path: Path) -> None:
204
+ obj = DataStore(str(tmp_path / "t.adat"))
205
+ obj._resolved_binary = "/usr/bin/echo"
206
+ with patch("subprocess.run") as mock_run:
207
+ mock_run.return_value = MagicMock(
208
+ returncode=0, stdout="", stderr=""
209
+ )
210
+ result = obj._run_json("test")
211
+ assert result == {}
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # 5. Edge Cases
216
+ # ---------------------------------------------------------------------------
217
+
218
+
219
+ class TestEdgeCases:
220
+ def test_empty_path(self) -> None:
221
+ obj = DataStore("")
222
+ assert isinstance(obj.path, Path)
223
+
224
+ def test_path_with_spaces(self, tmp_path: Path) -> None:
225
+ path = tmp_path / "path with spaces" / "test.adat"
226
+ obj = DataStore(str(path))
227
+ assert "spaces" in str(obj.path)
228
+
229
+ def test_path_with_unicode(self, tmp_path: Path) -> None:
230
+ path = tmp_path / "donnees" / "test.adat"
231
+ obj = DataStore(str(path))
232
+ assert "donnees" in str(obj.path)
233
+
234
+ def test_very_long_path(self, tmp_path: Path) -> None:
235
+ long_name = "a" * 200
236
+ path = tmp_path / long_name / "test.adat"
237
+ obj = DataStore(str(path))
238
+ assert len(str(obj.path)) > 200
239
+
240
+ def test_save_idempotent(self, tmp_path: Path) -> None:
241
+ obj = DataStore(str(tmp_path / "t.adat"))
242
+ obj.save()
243
+ obj.save()
244
+ obj.save()
245
+
246
+ def test_multiple_instances_independent(self, tmp_path: Path) -> None:
247
+ a = DataStore(str(tmp_path / "a.adat"))
248
+ b = DataStore(str(tmp_path / "b.adat"))
249
+ assert a.path != b.path
250
+ a._resolved_binary = "/path/a"
251
+ assert b._resolved_binary is None
252
+
253
+ def test_dot_in_directory_name(self, tmp_path: Path) -> None:
254
+ path = tmp_path / "v1.0.0" / "test.adat"
255
+ obj = DataStore(str(path))
256
+ assert "v1.0.0" in str(obj.path)
257
+
258
+
259
+ # ---------------------------------------------------------------------------
260
+ # 6. Error Handling
261
+ # ---------------------------------------------------------------------------
262
+
263
+
264
+ class TestErrorHandling:
265
+ def test_error_is_exception(self) -> None:
266
+ assert issubclass(DataError, Exception)
267
+
268
+ def test_error_stores_message(self) -> None:
269
+ err = DataError("test message")
270
+ assert "test message" in str(err)
271
+
272
+ def test_error_caught_as_exception(self) -> None:
273
+ with pytest.raises(Exception):
274
+ raise DataError("boom")
275
+
276
+ def test_error_caught_specifically(self) -> None:
277
+ try:
278
+ raise DataError("specific")
279
+ except DataError as e:
280
+ assert "specific" in str(e)
281
+
282
+ def test_error_repr(self) -> None:
283
+ err = DataError("repr test")
284
+ assert repr(err) is not None
285
+
286
+
287
+ # ---------------------------------------------------------------------------
288
+ # 7. Stress Tests
289
+ # ---------------------------------------------------------------------------
290
+
291
+
292
+ class TestStress:
293
+ def test_create_1000_instances(self, tmp_path: Path) -> None:
294
+ instances = [
295
+ DataStore(str(tmp_path / f"test_{i}.adat"))
296
+ for i in range(1000)
297
+ ]
298
+ assert len(instances) == 1000
299
+ assert instances[0].path != instances[999].path
300
+
301
+ def test_find_binary_1000_cached(self, tmp_path: Path) -> None:
302
+ obj = DataStore(str(tmp_path / "t.adat"))
303
+ obj._resolved_binary = "/cached/bin"
304
+ for _ in range(1000):
305
+ assert obj._find_binary() == "/cached/bin"
306
+
307
+ def test_save_100_times(self, tmp_path: Path) -> None:
308
+ obj = DataStore(str(tmp_path / "t.adat"))
309
+ for _ in range(100):
310
+ obj.save()