datarep 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ publish:
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ id-token: write
14
+ contents: read
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.12"
21
+
22
+ - name: Build package
23
+ run: |
24
+ pip install build
25
+ python -m build
26
+
27
+ - name: Publish to PyPI
28
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ .env
12
+ *.db
13
+ *.key
14
+ .datarep/
15
+ .pytest_cache/
datarep-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 thyself-fyi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
datarep-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: datarep
3
+ Version: 1.0.0
4
+ Summary: A user-authorized agent runtime that acquires data from arbitrary sources and delivers it to consuming applications.
5
+ Project-URL: Homepage, https://thyself-fyi.github.io/datarep-docs/
6
+ Project-URL: Documentation, https://thyself-fyi.github.io/datarep-docs/integration-guide/
7
+ Project-URL: Repository, https://github.com/thyself-fyi/datarep
8
+ Author: thyself-fyi
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: MacOS
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: anthropic>=0.42.0
22
+ Requires-Dist: bcrypt>=4.2.0
23
+ Requires-Dist: click>=8.1.0
24
+ Requires-Dist: cryptography>=44.0.0
25
+ Requires-Dist: fastapi>=0.115.0
26
+ Requires-Dist: httpx>=0.28.0
27
+ Requires-Dist: mcp[cli]>=1.0.0
28
+ Requires-Dist: uvicorn[standard]>=0.34.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
31
+ Requires-Dist: pytest-httpx>=0.35.0; extra == 'dev'
32
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # datarep
36
+
37
+ A trusted local service that retrieves data from arbitrary sources on behalf of your application. Your app describes what data it wants — datarep figures out how to get it.
38
+
39
+ ## Quick start
40
+
41
+ ```bash
42
+ pip install -e ".[dev]"
43
+ datarep init
44
+ export ANTHROPIC_API_KEY="sk-ant-..."
45
+ datarep start
46
+ ```
47
+
48
+ Register your app and get an API key:
49
+
50
+ ```bash
51
+ datarep app register my-app
52
+ ```
53
+
54
+ Retrieve data:
55
+
56
+ ```bash
57
+ curl -X POST http://127.0.0.1:7080/get \
58
+ -H "Authorization: Bearer dr_<your-api-key>" \
59
+ -H "Content-Type: application/json" \
60
+ -d '{"source": "my_source", "query": "get recent records"}'
61
+ ```
62
+
63
+ ## What it does
64
+
65
+ Instead of every app building its own integrations, datarep:
66
+
67
+ - **Manages credentials** — encrypted storage, browser-based OAuth, automatic token refresh
68
+ - **Writes retrieval code** — a Claude agent inspects sources and generates Python code dynamically
69
+ - **Executes in a sandbox** — network and filesystem restrictions via macOS `sandbox-exec`
70
+ - **Caches as recipes** — working code is saved for instant, deterministic replay
71
+ - **Authenticates apps** — per-app API keys with source-level access control and audit logging
72
+
73
+ Users grant trust once (to datarep). Every consuming app gets safe data access through it.
74
+
75
+ ## Interfaces
76
+
77
+ | Interface | Use case |
78
+ |-----------|----------|
79
+ | **HTTP API** (`localhost:7080`) | Primary interface for all apps. Bearer token auth. |
80
+ | **MCP server** | Native interface for agentic/LLM-powered apps. |
81
+ | **CLI** (`datarep`) | Setup, source management, debugging. |
82
+
83
+ ## Source types
84
+
85
+ | Type | Example | Sandbox |
86
+ |------|---------|---------|
87
+ | `local_db` | iMessage `chat.db`, WhatsApp, any SQLite | No network. Read-only DB access. |
88
+ | `rest_api` | Square, Gmail, Quickbooks | Network restricted to source domain. |
89
+ | `local_files` | Photos, documents, exports | No network. Read-only directory access. |
90
+
91
+ ## Integration guide
92
+
93
+ See **[docs/integration-guide.md](docs/integration-guide.md)** for the full walkthrough: API reference, authentication, handling permissions, MCP setup, recipes, and code examples.
94
+
95
+ ## Development
96
+
97
+ ```bash
98
+ pip install -e ".[dev]"
99
+ pytest
100
+ ```
@@ -0,0 +1,66 @@
1
+ # datarep
2
+
3
+ A trusted local service that retrieves data from arbitrary sources on behalf of your application. Your app describes what data it wants — datarep figures out how to get it.
4
+
5
+ ## Quick start
6
+
7
+ ```bash
8
+ pip install -e ".[dev]"
9
+ datarep init
10
+ export ANTHROPIC_API_KEY="sk-ant-..."
11
+ datarep start
12
+ ```
13
+
14
+ Register your app and get an API key:
15
+
16
+ ```bash
17
+ datarep app register my-app
18
+ ```
19
+
20
+ Retrieve data:
21
+
22
+ ```bash
23
+ curl -X POST http://127.0.0.1:7080/get \
24
+ -H "Authorization: Bearer dr_<your-api-key>" \
25
+ -H "Content-Type: application/json" \
26
+ -d '{"source": "my_source", "query": "get recent records"}'
27
+ ```
28
+
29
+ ## What it does
30
+
31
+ Instead of every app building its own integrations, datarep:
32
+
33
+ - **Manages credentials** — encrypted storage, browser-based OAuth, automatic token refresh
34
+ - **Writes retrieval code** — a Claude agent inspects sources and generates Python code dynamically
35
+ - **Executes in a sandbox** — network and filesystem restrictions via macOS `sandbox-exec`
36
+ - **Caches as recipes** — working code is saved for instant, deterministic replay
37
+ - **Authenticates apps** — per-app API keys with source-level access control and audit logging
38
+
39
+ Users grant trust once (to datarep). Every consuming app gets safe data access through it.
40
+
41
+ ## Interfaces
42
+
43
+ | Interface | Use case |
44
+ |-----------|----------|
45
+ | **HTTP API** (`localhost:7080`) | Primary interface for all apps. Bearer token auth. |
46
+ | **MCP server** | Native interface for agentic/LLM-powered apps. |
47
+ | **CLI** (`datarep`) | Setup, source management, debugging. |
48
+
49
+ ## Source types
50
+
51
+ | Type | Example | Sandbox |
52
+ |------|---------|---------|
53
+ | `local_db` | iMessage `chat.db`, WhatsApp, any SQLite | No network. Read-only DB access. |
54
+ | `rest_api` | Square, Gmail, Quickbooks | Network restricted to source domain. |
55
+ | `local_files` | Photos, documents, exports | No network. Read-only directory access. |
56
+
57
+ ## Integration guide
58
+
59
+ See **[docs/integration-guide.md](docs/integration-guide.md)** for the full walkthrough: API reference, authentication, handling permissions, MCP setup, recipes, and code examples.
60
+
61
+ ## Development
62
+
63
+ ```bash
64
+ pip install -e ".[dev]"
65
+ pytest
66
+ ```
@@ -0,0 +1,148 @@
1
+ """datarep — a user-authorized agent runtime for delegated data retrieval."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "1.0.0"
6
+
7
+ import asyncio
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from datarep.config import ensure_home, get_home
13
+ from datarep.db import get_connection
14
+ from datarep.agent import RetrievalAgent
15
+ from datarep.app_auth import AppAuthManager
16
+ from datarep.credentials import CredentialStore, OAuthFlow
17
+ from datarep.recipes import RecipeStore
18
+ from datarep.registry import SourceRegistry
19
+ from datarep.sync_state import SyncStateManager
20
+
21
+
22
+ class DataRep:
23
+ """High-level Python API for datarep.
24
+
25
+ Usage:
26
+ rep = DataRep()
27
+ rep.add_source("imessage", source_type="local_db",
28
+ config={"path": "~/Library/Messages/chat.db"})
29
+ result = rep.get("imessage", query="messages from last 7 days")
30
+ """
31
+
32
+ def __init__(self, home: str | Path | None = None):
33
+ if home:
34
+ import os
35
+ os.environ["DATAREP_HOME"] = str(home)
36
+ ensure_home()
37
+ self._conn = get_connection()
38
+ self._registry = SourceRegistry(self._conn)
39
+ self._cred_store = CredentialStore(self._conn)
40
+ self._recipe_store = RecipeStore(self._conn)
41
+ self._sync_state = SyncStateManager(self._conn)
42
+ self._app_auth = AppAuthManager(self._conn)
43
+ self._agent: RetrievalAgent | None = None
44
+
45
+ def _get_agent(self) -> RetrievalAgent:
46
+ if self._agent is None:
47
+ self._agent = RetrievalAgent(
48
+ self._conn,
49
+ registry=self._registry,
50
+ cred_store=self._cred_store,
51
+ recipe_store=self._recipe_store,
52
+ sync_state=self._sync_state,
53
+ )
54
+ return self._agent
55
+
56
+ # --- Source management ---
57
+
58
+ def add_source(
59
+ self,
60
+ name: str,
61
+ source_type: str,
62
+ config: dict[str, Any] | None = None,
63
+ ) -> dict[str, Any]:
64
+ return self._registry.add(name, source_type, config)
65
+
66
+ def list_sources(self) -> list[dict[str, Any]]:
67
+ return self._registry.list()
68
+
69
+ def remove_source(self, name: str) -> bool:
70
+ return self._registry.remove(name)
71
+
72
+ # --- Credentials ---
73
+
74
+ def set_credentials(
75
+ self,
76
+ source: str,
77
+ cred_type: str,
78
+ data: dict[str, Any],
79
+ expires_at: str | None = None,
80
+ ) -> None:
81
+ self._cred_store.store(source, cred_type, data, expires_at)
82
+
83
+ def run_oauth(self, source: str, timeout: int = 120) -> dict[str, Any]:
84
+ """Run a browser-based OAuth flow for a source."""
85
+ src = self._registry.get(source)
86
+ if src is None:
87
+ raise ValueError(f"Source '{source}' not found.")
88
+ config = src["config"]
89
+ flow = OAuthFlow(
90
+ auth_url=config["auth_url"],
91
+ token_url=config["token_url"],
92
+ client_id=config["client_id"],
93
+ client_secret=config["client_secret"],
94
+ scopes=config.get("scopes", []),
95
+ )
96
+ tokens = flow.run(timeout=timeout)
97
+ self._cred_store.store(source, "oauth2", {
98
+ **tokens,
99
+ "client_id": config["client_id"],
100
+ "client_secret": config["client_secret"],
101
+ "token_url": config["token_url"],
102
+ })
103
+ return tokens
104
+
105
+ # --- Retrieval ---
106
+
107
+ def get(self, source: str, query: str) -> dict[str, Any]:
108
+ """Agent-driven data retrieval."""
109
+ agent = self._get_agent()
110
+ return asyncio.run(agent.run(source, query))
111
+
112
+ def sync(self, source: str, query: str = "") -> dict[str, Any]:
113
+ """Incremental sync for a source."""
114
+ agent = self._get_agent()
115
+ q = query or f"Incremental sync for {source}"
116
+ return asyncio.run(agent.run(source, q))
117
+
118
+ def run_recipe(self, recipe_id: str) -> dict[str, Any]:
119
+ """Run a saved recipe (no LLM call)."""
120
+ agent = self._get_agent()
121
+ return asyncio.run(agent.run_recipe(recipe_id))
122
+
123
+ # --- Recipes ---
124
+
125
+ def list_recipes(self, source: str | None = None) -> list[dict[str, Any]]:
126
+ return self._recipe_store.list(source_name=source)
127
+
128
+ def get_recipe(self, recipe_id: str) -> dict[str, Any] | None:
129
+ return self._recipe_store.get(recipe_id)
130
+
131
+ # --- Apps ---
132
+
133
+ def register_app(
134
+ self, name: str, allowed_sources: list[str] | None = None
135
+ ) -> tuple[str, str]:
136
+ """Register a consuming app. Returns (app_id, api_key)."""
137
+ return self._app_auth.register_app(name, allowed_sources)
138
+
139
+ def list_apps(self) -> list[dict[str, Any]]:
140
+ return self._app_auth.list_apps()
141
+
142
+ def revoke_app(self, app_id: str) -> bool:
143
+ return self._app_auth.revoke(app_id)
144
+
145
+ # --- Sync state ---
146
+
147
+ def get_sync_state(self, source: str) -> dict[str, Any] | None:
148
+ return self._sync_state.get(source)