atdata 0.2.0a1__py3-none-any.whl → 0.2.3b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atdata/cli/__init__.py ADDED
@@ -0,0 +1,222 @@
1
+ """Command-line interface for atdata.
2
+
3
+ This module provides CLI commands for managing local development infrastructure
4
+ and diagnosing configuration issues.
5
+
6
+ Commands:
7
+ atdata local up Start Redis and MinIO containers for local development
8
+ atdata local down Stop local development containers
9
+ atdata diagnose Check Redis configuration and connectivity
10
+ atdata version Show version information
11
+
12
+ Example:
13
+ $ atdata local up
14
+ Starting Redis on port 6379...
15
+ Starting MinIO on port 9000...
16
+ Local infrastructure ready.
17
+
18
+ $ atdata diagnose
19
+ Checking Redis configuration...
20
+ ✓ Redis connected
21
+ ✓ Persistence enabled (AOF)
22
+ ✓ Memory policy: noeviction
23
+ """
24
+
25
+ import argparse
26
+ import sys
27
+ from typing import Sequence
28
+
29
+
30
+ def main(argv: Sequence[str] | None = None) -> int:
31
+ """Main entry point for the atdata CLI.
32
+
33
+ Args:
34
+ argv: Command-line arguments. If None, uses sys.argv[1:].
35
+
36
+ Returns:
37
+ Exit code (0 for success, non-zero for errors).
38
+ """
39
+ parser = argparse.ArgumentParser(
40
+ prog="atdata",
41
+ description="A loose federation of distributed, typed datasets",
42
+ formatter_class=argparse.RawDescriptionHelpFormatter,
43
+ )
44
+ parser.add_argument(
45
+ "--version",
46
+ "-v",
47
+ action="store_true",
48
+ help="Show version information",
49
+ )
50
+
51
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
52
+
53
+ # 'local' command group
54
+ local_parser = subparsers.add_parser(
55
+ "local",
56
+ help="Manage local development infrastructure",
57
+ )
58
+ local_subparsers = local_parser.add_subparsers(
59
+ dest="local_command",
60
+ help="Local infrastructure commands",
61
+ )
62
+
63
+ # 'local up' command
64
+ up_parser = local_subparsers.add_parser(
65
+ "up",
66
+ help="Start Redis and MinIO containers",
67
+ )
68
+ up_parser.add_argument(
69
+ "--redis-port",
70
+ type=int,
71
+ default=6379,
72
+ help="Redis port (default: 6379)",
73
+ )
74
+ up_parser.add_argument(
75
+ "--minio-port",
76
+ type=int,
77
+ default=9000,
78
+ help="MinIO API port (default: 9000)",
79
+ )
80
+ up_parser.add_argument(
81
+ "--minio-console-port",
82
+ type=int,
83
+ default=9001,
84
+ help="MinIO console port (default: 9001)",
85
+ )
86
+ up_parser.add_argument(
87
+ "--detach",
88
+ "-d",
89
+ action="store_true",
90
+ default=True,
91
+ help="Run containers in detached mode (default: True)",
92
+ )
93
+
94
+ # 'local down' command
95
+ down_parser = local_subparsers.add_parser(
96
+ "down",
97
+ help="Stop local development containers",
98
+ )
99
+ down_parser.add_argument(
100
+ "--volumes",
101
+ "-v",
102
+ action="store_true",
103
+ help="Also remove volumes (deletes all data)",
104
+ )
105
+
106
+ # 'local status' command
107
+ local_subparsers.add_parser(
108
+ "status",
109
+ help="Show status of local infrastructure",
110
+ )
111
+
112
+ # 'diagnose' command
113
+ diagnose_parser = subparsers.add_parser(
114
+ "diagnose",
115
+ help="Diagnose Redis configuration and connectivity",
116
+ )
117
+ diagnose_parser.add_argument(
118
+ "--host",
119
+ default="localhost",
120
+ help="Redis host (default: localhost)",
121
+ )
122
+ diagnose_parser.add_argument(
123
+ "--port",
124
+ type=int,
125
+ default=6379,
126
+ help="Redis port (default: 6379)",
127
+ )
128
+
129
+ # 'version' command (alternative to --version flag)
130
+ subparsers.add_parser(
131
+ "version",
132
+ help="Show version information",
133
+ )
134
+
135
+ args = parser.parse_args(argv)
136
+
137
+ # Handle --version flag
138
+ if args.version or args.command == "version":
139
+ return _cmd_version()
140
+
141
+ # Handle 'local' commands
142
+ if args.command == "local":
143
+ if args.local_command == "up":
144
+ return _cmd_local_up(
145
+ redis_port=args.redis_port,
146
+ minio_port=args.minio_port,
147
+ minio_console_port=args.minio_console_port,
148
+ detach=args.detach,
149
+ )
150
+ elif args.local_command == "down":
151
+ return _cmd_local_down(remove_volumes=args.volumes)
152
+ elif args.local_command == "status":
153
+ return _cmd_local_status()
154
+ else:
155
+ local_parser.print_help()
156
+ return 1
157
+
158
+ # Handle 'diagnose' command
159
+ if args.command == "diagnose":
160
+ return _cmd_diagnose(host=args.host, port=args.port)
161
+
162
+ # No command given
163
+ parser.print_help()
164
+ return 0
165
+
166
+
167
+ def _cmd_version() -> int:
168
+ """Show version information."""
169
+ try:
170
+ from atdata import __version__
171
+
172
+ version = __version__
173
+ except ImportError:
174
+ # Fallback to package metadata
175
+ from importlib.metadata import version as pkg_version
176
+
177
+ version = pkg_version("atdata")
178
+
179
+ print(f"atdata {version}")
180
+ return 0
181
+
182
+
183
+ def _cmd_local_up(
184
+ redis_port: int,
185
+ minio_port: int,
186
+ minio_console_port: int,
187
+ detach: bool,
188
+ ) -> int:
189
+ """Start local development infrastructure."""
190
+ from .local import local_up
191
+
192
+ return local_up(
193
+ redis_port=redis_port,
194
+ minio_port=minio_port,
195
+ minio_console_port=minio_console_port,
196
+ detach=detach,
197
+ )
198
+
199
+
200
+ def _cmd_local_down(remove_volumes: bool) -> int:
201
+ """Stop local development infrastructure."""
202
+ from .local import local_down
203
+
204
+ return local_down(remove_volumes=remove_volumes)
205
+
206
+
207
+ def _cmd_local_status() -> int:
208
+ """Show status of local infrastructure."""
209
+ from .local import local_status
210
+
211
+ return local_status()
212
+
213
+
214
+ def _cmd_diagnose(host: str, port: int) -> int:
215
+ """Diagnose Redis configuration."""
216
+ from .diagnose import diagnose_redis
217
+
218
+ return diagnose_redis(host=host, port=port)
219
+
220
+
221
+ if __name__ == "__main__":
222
+ sys.exit(main())
atdata/cli/diagnose.py ADDED
@@ -0,0 +1,169 @@
1
+ """Diagnostic tools for atdata infrastructure.
2
+
3
+ This module provides commands to diagnose configuration issues with Redis
4
+ and other infrastructure components.
5
+ """
6
+
7
+ import sys
8
+
9
+
10
+ def _print_status(label: str, ok: bool, detail: str = "") -> None:
11
+ """Print a status line with checkmark or X."""
12
+ symbol = "✓" if ok else "✗"
13
+ status = f"{symbol} {label}"
14
+ if detail:
15
+ status += f": {detail}"
16
+ print(status)
17
+
18
+
19
+ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
20
+ """Diagnose Redis configuration and connectivity.
21
+
22
+ Checks for common issues that can cause data loss:
23
+ - Connection issues
24
+ - Persistence settings (AOF/RDB)
25
+ - Memory eviction policy
26
+ - Memory usage
27
+
28
+ Args:
29
+ host: Redis host (default: localhost)
30
+ port: Redis port (default: 6379)
31
+
32
+ Returns:
33
+ Exit code (0 if all checks pass, 1 if any issues found)
34
+ """
35
+ print(f"Diagnosing Redis at {host}:{port}...")
36
+ print()
37
+
38
+ issues_found = False
39
+
40
+ # Try to connect
41
+ try:
42
+ from redis import Redis
43
+
44
+ redis = Redis(host=host, port=port, socket_connect_timeout=5)
45
+ redis.ping()
46
+ _print_status("Connection", True, "connected")
47
+ except ImportError:
48
+ print("Error: redis package not installed", file=sys.stderr)
49
+ return 1
50
+ except Exception as e:
51
+ _print_status("Connection", False, str(e))
52
+ print()
53
+ print("Cannot connect to Redis. Make sure Redis is running:")
54
+ print(" atdata local up")
55
+ return 1
56
+
57
+ # Check Redis version
58
+ try:
59
+ info = redis.info()
60
+ version = info.get("redis_version", "unknown")
61
+ _print_status("Version", True, version)
62
+ except Exception as e:
63
+ _print_status("Version", False, str(e))
64
+ issues_found = True
65
+
66
+ # Check persistence - AOF
67
+ try:
68
+ aof_enabled = redis.config_get("appendonly").get("appendonly", "no")
69
+ aof_ok = aof_enabled == "yes"
70
+ _print_status(
71
+ "AOF Persistence",
72
+ aof_ok,
73
+ "enabled" if aof_ok else "DISABLED - data may be lost on restart!",
74
+ )
75
+ if not aof_ok:
76
+ issues_found = True
77
+ except Exception as e:
78
+ _print_status("AOF Persistence", False, f"check failed: {e}")
79
+ issues_found = True
80
+
81
+ # Check persistence - RDB
82
+ try:
83
+ save_config = redis.config_get("save").get("save", "")
84
+ rdb_ok = bool(save_config and save_config.strip())
85
+ _print_status(
86
+ "RDB Persistence",
87
+ rdb_ok,
88
+ f"configured ({save_config})" if rdb_ok else "DISABLED",
89
+ )
90
+ # RDB disabled is only a warning if AOF is enabled
91
+ except Exception as e:
92
+ _print_status("RDB Persistence", False, f"check failed: {e}")
93
+
94
+ # Check memory policy
95
+ try:
96
+ policy = redis.config_get("maxmemory-policy").get("maxmemory-policy", "unknown")
97
+ # Safe policies that won't evict index data
98
+ safe_policies = {
99
+ "noeviction",
100
+ "volatile-lru",
101
+ "volatile-lfu",
102
+ "volatile-ttl",
103
+ "volatile-random",
104
+ }
105
+ policy_ok = policy in safe_policies
106
+
107
+ if policy_ok:
108
+ _print_status("Memory Policy", True, policy)
109
+ else:
110
+ _print_status(
111
+ "Memory Policy",
112
+ False,
113
+ f"{policy} - may evict index data! Use 'noeviction' or 'volatile-*'",
114
+ )
115
+ issues_found = True
116
+ except Exception as e:
117
+ _print_status("Memory Policy", False, f"check failed: {e}")
118
+ issues_found = True
119
+
120
+ # Check maxmemory setting
121
+ try:
122
+ maxmemory = redis.config_get("maxmemory").get("maxmemory", "0")
123
+ maxmemory_bytes = int(maxmemory)
124
+ if maxmemory_bytes == 0:
125
+ _print_status("Max Memory", True, "unlimited")
126
+ else:
127
+ maxmemory_mb = maxmemory_bytes / (1024 * 1024)
128
+ _print_status("Max Memory", True, f"{maxmemory_mb:.0f} MB")
129
+ except Exception as e:
130
+ _print_status("Max Memory", False, f"check failed: {e}")
131
+
132
+ # Check current memory usage
133
+ try:
134
+ memory_info = redis.info("memory")
135
+ used_memory = memory_info.get("used_memory_human", "unknown")
136
+ peak_memory = memory_info.get("used_memory_peak_human", "unknown")
137
+ _print_status("Memory Usage", True, f"{used_memory} (peak: {peak_memory})")
138
+ except Exception as e:
139
+ _print_status("Memory Usage", False, f"check failed: {e}")
140
+
141
+ # Check number of atdata keys
142
+ try:
143
+ dataset_count = 0
144
+ schema_count = 0
145
+ for key in redis.scan_iter(match="LocalDatasetEntry:*", count=100):
146
+ dataset_count += 1
147
+ for key in redis.scan_iter(match="LocalSchema:*", count=100):
148
+ schema_count += 1
149
+ _print_status(
150
+ "atdata Keys", True, f"{dataset_count} datasets, {schema_count} schemas"
151
+ )
152
+ except Exception as e:
153
+ _print_status("atdata Keys", False, f"check failed: {e}")
154
+
155
+ print()
156
+
157
+ if issues_found:
158
+ print("Issues found! Recommended configuration:")
159
+ print()
160
+ print(" # In redis.conf or via CONFIG SET:")
161
+ print(" appendonly yes")
162
+ print(" maxmemory-policy noeviction")
163
+ print()
164
+ print(" # Or use atdata's preconfigured local setup:")
165
+ print(" atdata local up")
166
+ return 1
167
+ else:
168
+ print("All checks passed. Redis is properly configured for atdata.")
169
+ return 0
atdata/cli/local.py ADDED
@@ -0,0 +1,283 @@
1
+ """Local infrastructure management for atdata.
2
+
3
+ This module provides commands to start and stop local development infrastructure:
4
+ - Redis: For index storage and metadata
5
+ - MinIO: S3-compatible object storage for dataset files
6
+
7
+ The infrastructure runs in Docker containers managed via docker-compose or
8
+ direct docker commands.
9
+ """
10
+
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ from pathlib import Path
15
+ from textwrap import dedent
16
+
17
+ # Container names for tracking
18
+ REDIS_CONTAINER = "atdata-redis"
19
+ MINIO_CONTAINER = "atdata-minio"
20
+
21
+ # Docker compose configuration
22
+ COMPOSE_TEMPLATE = dedent("""\
23
+ version: '3.8'
24
+
25
+ services:
26
+ redis:
27
+ image: redis:7-alpine
28
+ container_name: {redis_container}
29
+ ports:
30
+ - "{redis_port}:6379"
31
+ volumes:
32
+ - atdata-redis-data:/data
33
+ command: redis-server --appendonly yes --maxmemory-policy noeviction
34
+ restart: unless-stopped
35
+ healthcheck:
36
+ test: ["CMD", "redis-cli", "ping"]
37
+ interval: 5s
38
+ timeout: 3s
39
+ retries: 3
40
+
41
+ minio:
42
+ image: minio/minio:latest
43
+ container_name: {minio_container}
44
+ ports:
45
+ - "{minio_port}:9000"
46
+ - "{minio_console_port}:9001"
47
+ volumes:
48
+ - atdata-minio-data:/data
49
+ environment:
50
+ MINIO_ROOT_USER: minioadmin
51
+ MINIO_ROOT_PASSWORD: minioadmin
52
+ command: server /data --console-address ":9001"
53
+ restart: unless-stopped
54
+ healthcheck:
55
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
56
+ interval: 5s
57
+ timeout: 3s
58
+ retries: 3
59
+
60
+ volumes:
61
+ atdata-redis-data:
62
+ atdata-minio-data:
63
+ """)
64
+
65
+
66
+ def _check_docker() -> bool:
67
+ """Check if Docker is available and running."""
68
+ if not shutil.which("docker"):
69
+ print("Error: Docker is not installed or not in PATH", file=sys.stderr)
70
+ return False
71
+
72
+ try:
73
+ result = subprocess.run(
74
+ ["docker", "info"],
75
+ capture_output=True,
76
+ text=True,
77
+ timeout=10,
78
+ )
79
+ if result.returncode != 0:
80
+ print("Error: Docker daemon is not running", file=sys.stderr)
81
+ return False
82
+ except subprocess.TimeoutExpired:
83
+ print("Error: Docker daemon not responding", file=sys.stderr)
84
+ return False
85
+ except Exception as e:
86
+ print(f"Error checking Docker: {e}", file=sys.stderr)
87
+ return False
88
+
89
+ return True
90
+
91
+
92
+ def _get_compose_file(
93
+ redis_port: int,
94
+ minio_port: int,
95
+ minio_console_port: int,
96
+ ) -> str:
97
+ """Generate docker-compose configuration."""
98
+ return COMPOSE_TEMPLATE.format(
99
+ redis_container=REDIS_CONTAINER,
100
+ minio_container=MINIO_CONTAINER,
101
+ redis_port=redis_port,
102
+ minio_port=minio_port,
103
+ minio_console_port=minio_console_port,
104
+ )
105
+
106
+
107
+ def _container_running(name: str) -> bool:
108
+ """Check if a container is running."""
109
+ try:
110
+ result = subprocess.run(
111
+ ["docker", "inspect", "-f", "{{.State.Running}}", name],
112
+ capture_output=True,
113
+ text=True,
114
+ timeout=5,
115
+ )
116
+ return result.returncode == 0 and result.stdout.strip() == "true"
117
+ except Exception:
118
+ return False
119
+
120
+
121
+ def _run_compose(
122
+ compose_content: str,
123
+ command: list[str],
124
+ *,
125
+ capture_output: bool = False,
126
+ ) -> subprocess.CompletedProcess:
127
+ """Run a docker-compose command with the given configuration."""
128
+ # Write compose file to temp location
129
+ compose_dir = Path.home() / ".atdata"
130
+ compose_dir.mkdir(exist_ok=True)
131
+ compose_file = compose_dir / "docker-compose.yml"
132
+ compose_file.write_text(compose_content)
133
+
134
+ # Prefer 'docker compose' (v2) over 'docker-compose' (v1)
135
+ if shutil.which("docker"):
136
+ # Check if docker compose v2 is available
137
+ check = subprocess.run(
138
+ ["docker", "compose", "version"],
139
+ capture_output=True,
140
+ timeout=5,
141
+ )
142
+ if check.returncode == 0:
143
+ base_cmd = ["docker", "compose"]
144
+ elif shutil.which("docker-compose"):
145
+ base_cmd = ["docker-compose"]
146
+ else:
147
+ raise RuntimeError(
148
+ "Neither 'docker compose' nor 'docker-compose' available"
149
+ )
150
+ else:
151
+ raise RuntimeError("Docker not found")
152
+
153
+ full_cmd = base_cmd + ["-f", str(compose_file)] + command
154
+
155
+ return subprocess.run(
156
+ full_cmd,
157
+ capture_output=capture_output,
158
+ text=True,
159
+ )
160
+
161
+
162
+ def local_up(
163
+ redis_port: int = 6379,
164
+ minio_port: int = 9000,
165
+ minio_console_port: int = 9001,
166
+ detach: bool = True,
167
+ ) -> int:
168
+ """Start local development infrastructure.
169
+
170
+ Args:
171
+ redis_port: Port for Redis (default: 6379)
172
+ minio_port: Port for MinIO API (default: 9000)
173
+ minio_console_port: Port for MinIO console (default: 9001)
174
+ detach: Run in background (default: True)
175
+
176
+ Returns:
177
+ Exit code (0 for success)
178
+ """
179
+ if not _check_docker():
180
+ return 1
181
+
182
+ print("Starting atdata local infrastructure...")
183
+
184
+ compose_content = _get_compose_file(redis_port, minio_port, minio_console_port)
185
+ command = ["up"]
186
+ if detach:
187
+ command.append("-d")
188
+
189
+ try:
190
+ result = _run_compose(compose_content, command)
191
+ if result.returncode != 0:
192
+ print("Error: Failed to start containers", file=sys.stderr)
193
+ return result.returncode
194
+ except Exception as e:
195
+ print(f"Error: {e}", file=sys.stderr)
196
+ return 1
197
+
198
+ # Wait a moment for containers to be healthy
199
+ import time
200
+
201
+ time.sleep(2)
202
+
203
+ # Show status
204
+ print()
205
+ print("Local infrastructure started:")
206
+ print(f" Redis: localhost:{redis_port}")
207
+ print(f" MinIO API: http://localhost:{minio_port}")
208
+ print(f" MinIO Console: http://localhost:{minio_console_port}")
209
+ print()
210
+ print("MinIO credentials: minioadmin / minioadmin")
211
+ print()
212
+ print("Example usage:")
213
+ print(" from atdata.local import Index, S3DataStore")
214
+ print(" ")
215
+ print(" store = S3DataStore.from_credentials({")
216
+ print(f" 'AWS_ENDPOINT': 'http://localhost:{minio_port}',")
217
+ print(" 'AWS_ACCESS_KEY_ID': 'minioadmin',")
218
+ print(" 'AWS_SECRET_ACCESS_KEY': 'minioadmin',")
219
+ print(" }, bucket='datasets')")
220
+ print(" index = Index(data_store=store)")
221
+
222
+ return 0
223
+
224
+
225
+ def local_down(remove_volumes: bool = False) -> int:
226
+ """Stop local development infrastructure.
227
+
228
+ Args:
229
+ remove_volumes: Also remove data volumes (default: False)
230
+
231
+ Returns:
232
+ Exit code (0 for success)
233
+ """
234
+ if not _check_docker():
235
+ return 1
236
+
237
+ print("Stopping atdata local infrastructure...")
238
+
239
+ # Use default ports for compose file (actual ports don't matter for down)
240
+ compose_content = _get_compose_file(6379, 9000, 9001)
241
+ command = ["down"]
242
+ if remove_volumes:
243
+ command.append("-v")
244
+ print("Warning: This will delete all local data!")
245
+
246
+ try:
247
+ result = _run_compose(compose_content, command)
248
+ if result.returncode != 0:
249
+ print("Error: Failed to stop containers", file=sys.stderr)
250
+ return result.returncode
251
+ except Exception as e:
252
+ print(f"Error: {e}", file=sys.stderr)
253
+ return 1
254
+
255
+ print("Local infrastructure stopped.")
256
+ return 0
257
+
258
+
259
+ def local_status() -> int:
260
+ """Show status of local infrastructure.
261
+
262
+ Returns:
263
+ Exit code (0 for success)
264
+ """
265
+ if not _check_docker():
266
+ return 1
267
+
268
+ redis_running = _container_running(REDIS_CONTAINER)
269
+ minio_running = _container_running(MINIO_CONTAINER)
270
+
271
+ print("atdata local infrastructure status:")
272
+ print()
273
+ print(f" Redis ({REDIS_CONTAINER}): {'running' if redis_running else 'stopped'}")
274
+ print(f" MinIO ({MINIO_CONTAINER}): {'running' if minio_running else 'stopped'}")
275
+
276
+ if redis_running or minio_running:
277
+ print()
278
+ print("To stop: atdata local down")
279
+ else:
280
+ print()
281
+ print("To start: atdata local up")
282
+
283
+ return 0