atdata 0.2.0a1__py3-none-any.whl → 0.2.2b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atdata/cli/__init__.py ADDED
@@ -0,0 +1,213 @@
1
+ """Command-line interface for atdata.
2
+
3
+ This module provides CLI commands for managing local development infrastructure
4
+ and diagnosing configuration issues.
5
+
6
+ Commands:
7
+ atdata local up Start Redis and MinIO containers for local development
8
+ atdata local down Stop local development containers
9
+ atdata diagnose Check Redis configuration and connectivity
10
+ atdata version Show version information
11
+
12
+ Example:
13
+ $ atdata local up
14
+ Starting Redis on port 6379...
15
+ Starting MinIO on port 9000...
16
+ Local infrastructure ready.
17
+
18
+ $ atdata diagnose
19
+ Checking Redis configuration...
20
+ ✓ Redis connected
21
+ ✓ Persistence enabled (AOF)
22
+ ✓ Memory policy: noeviction
23
+ """
24
+
25
+ import argparse
26
+ import sys
27
+ from typing import Sequence
28
+
29
+
30
+ def main(argv: Sequence[str] | None = None) -> int:
31
+ """Main entry point for the atdata CLI.
32
+
33
+ Args:
34
+ argv: Command-line arguments. If None, uses sys.argv[1:].
35
+
36
+ Returns:
37
+ Exit code (0 for success, non-zero for errors).
38
+ """
39
+ parser = argparse.ArgumentParser(
40
+ prog="atdata",
41
+ description="A loose federation of distributed, typed datasets",
42
+ formatter_class=argparse.RawDescriptionHelpFormatter,
43
+ )
44
+ parser.add_argument(
45
+ "--version", "-v",
46
+ action="store_true",
47
+ help="Show version information",
48
+ )
49
+
50
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
51
+
52
+ # 'local' command group
53
+ local_parser = subparsers.add_parser(
54
+ "local",
55
+ help="Manage local development infrastructure",
56
+ )
57
+ local_subparsers = local_parser.add_subparsers(
58
+ dest="local_command",
59
+ help="Local infrastructure commands",
60
+ )
61
+
62
+ # 'local up' command
63
+ up_parser = local_subparsers.add_parser(
64
+ "up",
65
+ help="Start Redis and MinIO containers",
66
+ )
67
+ up_parser.add_argument(
68
+ "--redis-port",
69
+ type=int,
70
+ default=6379,
71
+ help="Redis port (default: 6379)",
72
+ )
73
+ up_parser.add_argument(
74
+ "--minio-port",
75
+ type=int,
76
+ default=9000,
77
+ help="MinIO API port (default: 9000)",
78
+ )
79
+ up_parser.add_argument(
80
+ "--minio-console-port",
81
+ type=int,
82
+ default=9001,
83
+ help="MinIO console port (default: 9001)",
84
+ )
85
+ up_parser.add_argument(
86
+ "--detach", "-d",
87
+ action="store_true",
88
+ default=True,
89
+ help="Run containers in detached mode (default: True)",
90
+ )
91
+
92
+ # 'local down' command
93
+ down_parser = local_subparsers.add_parser(
94
+ "down",
95
+ help="Stop local development containers",
96
+ )
97
+ down_parser.add_argument(
98
+ "--volumes", "-v",
99
+ action="store_true",
100
+ help="Also remove volumes (deletes all data)",
101
+ )
102
+
103
+ # 'local status' command
104
+ local_subparsers.add_parser(
105
+ "status",
106
+ help="Show status of local infrastructure",
107
+ )
108
+
109
+ # 'diagnose' command
110
+ diagnose_parser = subparsers.add_parser(
111
+ "diagnose",
112
+ help="Diagnose Redis configuration and connectivity",
113
+ )
114
+ diagnose_parser.add_argument(
115
+ "--host",
116
+ default="localhost",
117
+ help="Redis host (default: localhost)",
118
+ )
119
+ diagnose_parser.add_argument(
120
+ "--port",
121
+ type=int,
122
+ default=6379,
123
+ help="Redis port (default: 6379)",
124
+ )
125
+
126
+ # 'version' command (alternative to --version flag)
127
+ subparsers.add_parser(
128
+ "version",
129
+ help="Show version information",
130
+ )
131
+
132
+ args = parser.parse_args(argv)
133
+
134
+ # Handle --version flag
135
+ if args.version or args.command == "version":
136
+ return _cmd_version()
137
+
138
+ # Handle 'local' commands
139
+ if args.command == "local":
140
+ if args.local_command == "up":
141
+ return _cmd_local_up(
142
+ redis_port=args.redis_port,
143
+ minio_port=args.minio_port,
144
+ minio_console_port=args.minio_console_port,
145
+ detach=args.detach,
146
+ )
147
+ elif args.local_command == "down":
148
+ return _cmd_local_down(remove_volumes=args.volumes)
149
+ elif args.local_command == "status":
150
+ return _cmd_local_status()
151
+ else:
152
+ local_parser.print_help()
153
+ return 1
154
+
155
+ # Handle 'diagnose' command
156
+ if args.command == "diagnose":
157
+ return _cmd_diagnose(host=args.host, port=args.port)
158
+
159
+ # No command given
160
+ parser.print_help()
161
+ return 0
162
+
163
+
164
+ def _cmd_version() -> int:
165
+ """Show version information."""
166
+ try:
167
+ from atdata import __version__
168
+ version = __version__
169
+ except ImportError:
170
+ # Fallback to package metadata
171
+ from importlib.metadata import version as pkg_version
172
+ version = pkg_version("atdata")
173
+
174
+ print(f"atdata {version}")
175
+ return 0
176
+
177
+
178
+ def _cmd_local_up(
179
+ redis_port: int,
180
+ minio_port: int,
181
+ minio_console_port: int,
182
+ detach: bool,
183
+ ) -> int:
184
+ """Start local development infrastructure."""
185
+ from .local import local_up
186
+ return local_up(
187
+ redis_port=redis_port,
188
+ minio_port=minio_port,
189
+ minio_console_port=minio_console_port,
190
+ detach=detach,
191
+ )
192
+
193
+
194
+ def _cmd_local_down(remove_volumes: bool) -> int:
195
+ """Stop local development infrastructure."""
196
+ from .local import local_down
197
+ return local_down(remove_volumes=remove_volumes)
198
+
199
+
200
+ def _cmd_local_status() -> int:
201
+ """Show status of local infrastructure."""
202
+ from .local import local_status
203
+ return local_status()
204
+
205
+
206
+ def _cmd_diagnose(host: str, port: int) -> int:
207
+ """Diagnose Redis configuration."""
208
+ from .diagnose import diagnose_redis
209
+ return diagnose_redis(host=host, port=port)
210
+
211
+
212
+ if __name__ == "__main__":
213
+ sys.exit(main())
atdata/cli/diagnose.py ADDED
@@ -0,0 +1,165 @@
1
+ """Diagnostic tools for atdata infrastructure.
2
+
3
+ This module provides commands to diagnose configuration issues with Redis
4
+ and other infrastructure components.
5
+ """
6
+
7
+ import sys
8
+ from typing import Any
9
+
10
+
11
+ def _print_status(label: str, ok: bool, detail: str = "") -> None:
12
+ """Print a status line with checkmark or X."""
13
+ symbol = "✓" if ok else "✗"
14
+ status = f"{symbol} {label}"
15
+ if detail:
16
+ status += f": {detail}"
17
+ print(status)
18
+
19
+
20
+ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
21
+ """Diagnose Redis configuration and connectivity.
22
+
23
+ Checks for common issues that can cause data loss:
24
+ - Connection issues
25
+ - Persistence settings (AOF/RDB)
26
+ - Memory eviction policy
27
+ - Memory usage
28
+
29
+ Args:
30
+ host: Redis host (default: localhost)
31
+ port: Redis port (default: 6379)
32
+
33
+ Returns:
34
+ Exit code (0 if all checks pass, 1 if any issues found)
35
+ """
36
+ print(f"Diagnosing Redis at {host}:{port}...")
37
+ print()
38
+
39
+ issues_found = False
40
+
41
+ # Try to connect
42
+ try:
43
+ from redis import Redis
44
+ redis = Redis(host=host, port=port, socket_connect_timeout=5)
45
+ redis.ping()
46
+ _print_status("Connection", True, "connected")
47
+ except ImportError:
48
+ print("Error: redis package not installed", file=sys.stderr)
49
+ return 1
50
+ except Exception as e:
51
+ _print_status("Connection", False, str(e))
52
+ print()
53
+ print("Cannot connect to Redis. Make sure Redis is running:")
54
+ print(" atdata local up")
55
+ return 1
56
+
57
+ # Check Redis version
58
+ try:
59
+ info = redis.info()
60
+ version = info.get("redis_version", "unknown")
61
+ _print_status("Version", True, version)
62
+ except Exception as e:
63
+ _print_status("Version", False, str(e))
64
+ issues_found = True
65
+
66
+ # Check persistence - AOF
67
+ try:
68
+ aof_enabled = redis.config_get("appendonly").get("appendonly", "no")
69
+ aof_ok = aof_enabled == "yes"
70
+ _print_status(
71
+ "AOF Persistence",
72
+ aof_ok,
73
+ "enabled" if aof_ok else "DISABLED - data may be lost on restart!"
74
+ )
75
+ if not aof_ok:
76
+ issues_found = True
77
+ except Exception as e:
78
+ _print_status("AOF Persistence", False, f"check failed: {e}")
79
+ issues_found = True
80
+
81
+ # Check persistence - RDB
82
+ try:
83
+ save_config = redis.config_get("save").get("save", "")
84
+ rdb_ok = bool(save_config and save_config.strip())
85
+ _print_status(
86
+ "RDB Persistence",
87
+ rdb_ok,
88
+ f"configured ({save_config})" if rdb_ok else "DISABLED"
89
+ )
90
+ # RDB disabled is only a warning if AOF is enabled
91
+ except Exception as e:
92
+ _print_status("RDB Persistence", False, f"check failed: {e}")
93
+
94
+ # Check memory policy
95
+ try:
96
+ policy = redis.config_get("maxmemory-policy").get("maxmemory-policy", "unknown")
97
+ # Safe policies that won't evict index data
98
+ safe_policies = {"noeviction", "volatile-lru", "volatile-lfu", "volatile-ttl", "volatile-random"}
99
+ policy_ok = policy in safe_policies
100
+
101
+ if policy_ok:
102
+ _print_status("Memory Policy", True, policy)
103
+ else:
104
+ _print_status(
105
+ "Memory Policy",
106
+ False,
107
+ f"{policy} - may evict index data! Use 'noeviction' or 'volatile-*'"
108
+ )
109
+ issues_found = True
110
+ except Exception as e:
111
+ _print_status("Memory Policy", False, f"check failed: {e}")
112
+ issues_found = True
113
+
114
+ # Check maxmemory setting
115
+ try:
116
+ maxmemory = redis.config_get("maxmemory").get("maxmemory", "0")
117
+ maxmemory_bytes = int(maxmemory)
118
+ if maxmemory_bytes == 0:
119
+ _print_status("Max Memory", True, "unlimited")
120
+ else:
121
+ maxmemory_mb = maxmemory_bytes / (1024 * 1024)
122
+ _print_status("Max Memory", True, f"{maxmemory_mb:.0f} MB")
123
+ except Exception as e:
124
+ _print_status("Max Memory", False, f"check failed: {e}")
125
+
126
+ # Check current memory usage
127
+ try:
128
+ memory_info = redis.info("memory")
129
+ used_memory = memory_info.get("used_memory_human", "unknown")
130
+ peak_memory = memory_info.get("used_memory_peak_human", "unknown")
131
+ _print_status("Memory Usage", True, f"{used_memory} (peak: {peak_memory})")
132
+ except Exception as e:
133
+ _print_status("Memory Usage", False, f"check failed: {e}")
134
+
135
+ # Check number of atdata keys
136
+ try:
137
+ dataset_count = 0
138
+ schema_count = 0
139
+ for key in redis.scan_iter(match="LocalDatasetEntry:*", count=100):
140
+ dataset_count += 1
141
+ for key in redis.scan_iter(match="LocalSchema:*", count=100):
142
+ schema_count += 1
143
+ _print_status(
144
+ "atdata Keys",
145
+ True,
146
+ f"{dataset_count} datasets, {schema_count} schemas"
147
+ )
148
+ except Exception as e:
149
+ _print_status("atdata Keys", False, f"check failed: {e}")
150
+
151
+ print()
152
+
153
+ if issues_found:
154
+ print("Issues found! Recommended configuration:")
155
+ print()
156
+ print(" # In redis.conf or via CONFIG SET:")
157
+ print(" appendonly yes")
158
+ print(" maxmemory-policy noeviction")
159
+ print()
160
+ print(" # Or use atdata's preconfigured local setup:")
161
+ print(" atdata local up")
162
+ return 1
163
+ else:
164
+ print("All checks passed. Redis is properly configured for atdata.")
165
+ return 0
atdata/cli/local.py ADDED
@@ -0,0 +1,280 @@
1
+ """Local infrastructure management for atdata.
2
+
3
+ This module provides commands to start and stop local development infrastructure:
4
+ - Redis: For index storage and metadata
5
+ - MinIO: S3-compatible object storage for dataset files
6
+
7
+ The infrastructure runs in Docker containers managed via docker-compose or
8
+ direct docker commands.
9
+ """
10
+
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ from pathlib import Path
15
+ from textwrap import dedent
16
+
17
+ # Container names for tracking
18
+ REDIS_CONTAINER = "atdata-redis"
19
+ MINIO_CONTAINER = "atdata-minio"
20
+
21
+ # Docker compose configuration
22
+ COMPOSE_TEMPLATE = dedent("""\
23
+ version: '3.8'
24
+
25
+ services:
26
+ redis:
27
+ image: redis:7-alpine
28
+ container_name: {redis_container}
29
+ ports:
30
+ - "{redis_port}:6379"
31
+ volumes:
32
+ - atdata-redis-data:/data
33
+ command: redis-server --appendonly yes --maxmemory-policy noeviction
34
+ restart: unless-stopped
35
+ healthcheck:
36
+ test: ["CMD", "redis-cli", "ping"]
37
+ interval: 5s
38
+ timeout: 3s
39
+ retries: 3
40
+
41
+ minio:
42
+ image: minio/minio:latest
43
+ container_name: {minio_container}
44
+ ports:
45
+ - "{minio_port}:9000"
46
+ - "{minio_console_port}:9001"
47
+ volumes:
48
+ - atdata-minio-data:/data
49
+ environment:
50
+ MINIO_ROOT_USER: minioadmin
51
+ MINIO_ROOT_PASSWORD: minioadmin
52
+ command: server /data --console-address ":9001"
53
+ restart: unless-stopped
54
+ healthcheck:
55
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
56
+ interval: 5s
57
+ timeout: 3s
58
+ retries: 3
59
+
60
+ volumes:
61
+ atdata-redis-data:
62
+ atdata-minio-data:
63
+ """)
64
+
65
+
66
+ def _check_docker() -> bool:
67
+ """Check if Docker is available and running."""
68
+ if not shutil.which("docker"):
69
+ print("Error: Docker is not installed or not in PATH", file=sys.stderr)
70
+ return False
71
+
72
+ try:
73
+ result = subprocess.run(
74
+ ["docker", "info"],
75
+ capture_output=True,
76
+ text=True,
77
+ timeout=10,
78
+ )
79
+ if result.returncode != 0:
80
+ print("Error: Docker daemon is not running", file=sys.stderr)
81
+ return False
82
+ except subprocess.TimeoutExpired:
83
+ print("Error: Docker daemon not responding", file=sys.stderr)
84
+ return False
85
+ except Exception as e:
86
+ print(f"Error checking Docker: {e}", file=sys.stderr)
87
+ return False
88
+
89
+ return True
90
+
91
+
92
+ def _get_compose_file(
93
+ redis_port: int,
94
+ minio_port: int,
95
+ minio_console_port: int,
96
+ ) -> str:
97
+ """Generate docker-compose configuration."""
98
+ return COMPOSE_TEMPLATE.format(
99
+ redis_container=REDIS_CONTAINER,
100
+ minio_container=MINIO_CONTAINER,
101
+ redis_port=redis_port,
102
+ minio_port=minio_port,
103
+ minio_console_port=minio_console_port,
104
+ )
105
+
106
+
107
+ def _container_running(name: str) -> bool:
108
+ """Check if a container is running."""
109
+ try:
110
+ result = subprocess.run(
111
+ ["docker", "inspect", "-f", "{{.State.Running}}", name],
112
+ capture_output=True,
113
+ text=True,
114
+ timeout=5,
115
+ )
116
+ return result.returncode == 0 and result.stdout.strip() == "true"
117
+ except Exception:
118
+ return False
119
+
120
+
121
+ def _run_compose(
122
+ compose_content: str,
123
+ command: list[str],
124
+ *,
125
+ capture_output: bool = False,
126
+ ) -> subprocess.CompletedProcess:
127
+ """Run a docker-compose command with the given configuration."""
128
+ # Write compose file to temp location
129
+ compose_dir = Path.home() / ".atdata"
130
+ compose_dir.mkdir(exist_ok=True)
131
+ compose_file = compose_dir / "docker-compose.yml"
132
+ compose_file.write_text(compose_content)
133
+
134
+ # Prefer 'docker compose' (v2) over 'docker-compose' (v1)
135
+ if shutil.which("docker"):
136
+ # Check if docker compose v2 is available
137
+ check = subprocess.run(
138
+ ["docker", "compose", "version"],
139
+ capture_output=True,
140
+ timeout=5,
141
+ )
142
+ if check.returncode == 0:
143
+ base_cmd = ["docker", "compose"]
144
+ elif shutil.which("docker-compose"):
145
+ base_cmd = ["docker-compose"]
146
+ else:
147
+ raise RuntimeError("Neither 'docker compose' nor 'docker-compose' available")
148
+ else:
149
+ raise RuntimeError("Docker not found")
150
+
151
+ full_cmd = base_cmd + ["-f", str(compose_file)] + command
152
+
153
+ return subprocess.run(
154
+ full_cmd,
155
+ capture_output=capture_output,
156
+ text=True,
157
+ )
158
+
159
+
160
+ def local_up(
161
+ redis_port: int = 6379,
162
+ minio_port: int = 9000,
163
+ minio_console_port: int = 9001,
164
+ detach: bool = True,
165
+ ) -> int:
166
+ """Start local development infrastructure.
167
+
168
+ Args:
169
+ redis_port: Port for Redis (default: 6379)
170
+ minio_port: Port for MinIO API (default: 9000)
171
+ minio_console_port: Port for MinIO console (default: 9001)
172
+ detach: Run in background (default: True)
173
+
174
+ Returns:
175
+ Exit code (0 for success)
176
+ """
177
+ if not _check_docker():
178
+ return 1
179
+
180
+ print("Starting atdata local infrastructure...")
181
+
182
+ compose_content = _get_compose_file(redis_port, minio_port, minio_console_port)
183
+ command = ["up"]
184
+ if detach:
185
+ command.append("-d")
186
+
187
+ try:
188
+ result = _run_compose(compose_content, command)
189
+ if result.returncode != 0:
190
+ print("Error: Failed to start containers", file=sys.stderr)
191
+ return result.returncode
192
+ except Exception as e:
193
+ print(f"Error: {e}", file=sys.stderr)
194
+ return 1
195
+
196
+ # Wait a moment for containers to be healthy
197
+ import time
198
+ time.sleep(2)
199
+
200
+ # Show status
201
+ print()
202
+ print("Local infrastructure started:")
203
+ print(f" Redis: localhost:{redis_port}")
204
+ print(f" MinIO API: http://localhost:{minio_port}")
205
+ print(f" MinIO Console: http://localhost:{minio_console_port}")
206
+ print()
207
+ print("MinIO credentials: minioadmin / minioadmin")
208
+ print()
209
+ print("Example usage:")
210
+ print(" from atdata.local import Index, S3DataStore")
211
+ print(" ")
212
+ print(" store = S3DataStore.from_credentials({")
213
+ print(f" 'AWS_ENDPOINT': 'http://localhost:{minio_port}',")
214
+ print(" 'AWS_ACCESS_KEY_ID': 'minioadmin',")
215
+ print(" 'AWS_SECRET_ACCESS_KEY': 'minioadmin',")
216
+ print(" }, bucket='datasets')")
217
+ print(" index = Index(data_store=store)")
218
+
219
+ return 0
220
+
221
+
222
+ def local_down(remove_volumes: bool = False) -> int:
223
+ """Stop local development infrastructure.
224
+
225
+ Args:
226
+ remove_volumes: Also remove data volumes (default: False)
227
+
228
+ Returns:
229
+ Exit code (0 for success)
230
+ """
231
+ if not _check_docker():
232
+ return 1
233
+
234
+ print("Stopping atdata local infrastructure...")
235
+
236
+ # Use default ports for compose file (actual ports don't matter for down)
237
+ compose_content = _get_compose_file(6379, 9000, 9001)
238
+ command = ["down"]
239
+ if remove_volumes:
240
+ command.append("-v")
241
+ print("Warning: This will delete all local data!")
242
+
243
+ try:
244
+ result = _run_compose(compose_content, command)
245
+ if result.returncode != 0:
246
+ print("Error: Failed to stop containers", file=sys.stderr)
247
+ return result.returncode
248
+ except Exception as e:
249
+ print(f"Error: {e}", file=sys.stderr)
250
+ return 1
251
+
252
+ print("Local infrastructure stopped.")
253
+ return 0
254
+
255
+
256
+ def local_status() -> int:
257
+ """Show status of local infrastructure.
258
+
259
+ Returns:
260
+ Exit code (0 for success)
261
+ """
262
+ if not _check_docker():
263
+ return 1
264
+
265
+ redis_running = _container_running(REDIS_CONTAINER)
266
+ minio_running = _container_running(MINIO_CONTAINER)
267
+
268
+ print("atdata local infrastructure status:")
269
+ print()
270
+ print(f" Redis ({REDIS_CONTAINER}): {'running' if redis_running else 'stopped'}")
271
+ print(f" MinIO ({MINIO_CONTAINER}): {'running' if minio_running else 'stopped'}")
272
+
273
+ if redis_running or minio_running:
274
+ print()
275
+ print("To stop: atdata local down")
276
+ else:
277
+ print()
278
+ print("To start: atdata local up")
279
+
280
+ return 0