chemsift 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.4
2
+ Name: chemsift
3
+ Version: 1.1.0
4
+ Summary:
5
+ Author: Dave Martinez
6
+ Author-email: 34991082+dkm-coder@users.noreply.github.com
7
+ Requires-Python: >=3.9
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Requires-Dist: boto3 (>=1.34.135,<2.0.0)
16
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
17
+ Requires-Dist: requests (>=2.32.5,<3.0.0)
18
+ Requires-Dist: rich (>=13.7.0,<14.0.0)
19
+ Requires-Dist: typer (>=0.21.0,<0.22.0)
20
+ Requires-Dist: urllib3 (<2)
21
+ Description-Content-Type: text/markdown
22
+
23
+ # chemsift-cloud
24
+
25
+ ## Commands
26
+
27
+ - `chemsift-cloud login --api-key <key>`: Store credentials in configuration file
28
+ - `chemsift-cloud submit <input-dir>`: Submit input to be processed
29
+ - `chemsift-cloud status <job-id>`: Check status of work for a given ID
30
+ - `chemsift-cloud download <job-id> <output-dir>`: Download the result of the given job ID
31
+ - `chemsift-cloud list`: Get list of job IDs for the user
32
+
33
+ ## Development
34
+
35
+ This project uses [Poetry](https://python-poetry.org/) for dependency management and packaging.
36
+
37
+ ### Bumping the version
38
+
39
+ It is possible to bump the version of the project using the `poetry version` command with one of the following values as an argument:
40
+
41
+ - `patch`
42
+ - `minor`
43
+ - `major`
44
+
45
+ ### Publishing to PyPI
46
+
47
+ #### PyPI configuration (just once)
48
+
49
+ From your [PyPI](https://pypi.org/) account, configure a new [Trusted Publisher](https://pypi.org/manage/project/python-bap/settings/publishing/) by providing the following information:
50
+
51
+ - Specify Owner
52
+ - Specify repository name
53
+ - Specify the workflow name
54
+
55
+ #### Publish a new version
56
+
57
+ In order to publish a new version to PyPI, all we will need to do is create a new tag with the version number so that it looks like this:
58
+
59
+ ```
60
+ cli-vX.X.X
61
+ ```
62
+
63
+ > **Important**: The version specified when creating the tag needs to match the one in `pyproject.toml` which is the same one that will be returned when running `poetry version`.
64
+
@@ -0,0 +1,41 @@
1
+ # chemsift-cloud
2
+
3
+ ## Commands
4
+
5
+ - `chemsift-cloud login --api-key <key>`: Store credentials in configuration file
6
+ - `chemsift-cloud submit <input-dir>`: Submit input to be processed
7
+ - `chemsift-cloud status <job-id>`: Check status of work for a given ID
8
+ - `chemsift-cloud download <job-id> <output-dir>`: Download the result of the given job ID
9
+ - `chemsift-cloud list`: Get list of job IDs for the user
10
+
11
+ ## Development
12
+
13
+ This project uses [Poetry](https://python-poetry.org/) for dependency management and packaging.
14
+
15
+ ### Bumping the version
16
+
17
+ It is possible to bump the version of the project using the `poetry version` command with one of the following values as an argument:
18
+
19
+ - `patch`
20
+ - `minor`
21
+ - `major`
22
+
23
+ ### Publishing to PyPI
24
+
25
+ #### PyPI configuration (just once)
26
+
27
+ From your [PyPI](https://pypi.org/) account, configure a new [Trusted Publisher](https://pypi.org/manage/project/python-bap/settings/publishing/) by providing the following information:
28
+
29
+ - Specify Owner
30
+ - Specify repository name
31
+ - Specify the workflow name
32
+
33
+ #### Publish a new version
34
+
35
+ In order to publish a new version to PyPI, all we will need to do is create a new tag with the version number so that it looks like this:
36
+
37
+ ```
38
+ cli-vX.X.X
39
+ ```
40
+
41
+ > **Important**: The version specified when creating the tag needs to match the one in `pyproject.toml` which is the same one that will be returned when running `poetry version`.
@@ -0,0 +1,27 @@
1
+ [project]
2
+ name = "chemsift"
3
+ version = "1.1.0"
4
+ description = ""
5
+ authors = [
6
+ {name = "Dave Martinez",email = "34991082+dkm-coder@users.noreply.github.com"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.9"
10
+ dependencies = [
11
+ "typer (>=0.21.0,<0.22.0)",
12
+ "urllib3 (<2)",
13
+ "requests (>=2.32.5,<3.0.0)",
14
+ "boto3 (>=1.34.135,<2.0.0)",
15
+ "rich (>=13.7.0,<14.0.0)",
16
+ "pyyaml (>=6.0.2,<7.0.0)",
17
+ ]
18
+
19
+ [tool.poetry]
20
+ packages = [{include = "chemsift_cloud", from = "src"}]
21
+
22
+ [tool.poetry.scripts]
23
+ chemsift = "chemsift_cloud.main:app"
24
+
25
+ [build-system]
26
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
27
+ build-backend = "poetry.core.masonry.api"
File without changes
@@ -0,0 +1,33 @@
1
+ import json
2
+ from pathlib import Path
3
+ import typer
4
+ from rich import print
5
+
6
+ def get_config_path():
7
+ home_dir = Path.home()
8
+ config_file = home_dir / ".chemsift" / "config.json"
9
+ return config_file
10
+
11
+ def load():
12
+ config_file = get_config_path()
13
+ if not config_file.exists():
14
+ print("[red][bold]Error[/bold]: Configuration not found. Please run [/red][blue]`chemsift-cloud login`[/blue]")
15
+ raise typer.Exit(code=1)
16
+
17
+ with open(config_file, 'r') as f:
18
+ return json.load(f)
19
+
20
+ def save(config_data: dict):
21
+ config_file = get_config_path()
22
+ config_file.parent.mkdir(parents=True, exist_ok=True)
23
+
24
+ with open(config_file, 'w') as f:
25
+ json.dump(config_data, f, indent=4)
26
+
27
+ print(f"[green]Configuration successfully written to:[/green] [blue]{config_file}[/blue]")
28
+
29
+ # For backward compatibility / simple check
30
+ def check_existance():
31
+ if not get_config_path().exists():
32
+ print("[red][bold]Error[/bold]: Configuration not found. Please run [/red][blue]`chemsift-cloud login`[/blue]")
33
+ raise typer.Exit(code=1)
@@ -0,0 +1,63 @@
1
+ import typer
2
+ from typing_extensions import Annotated
3
+ import boto3
4
+ from rich import print
5
+ from rich.progress import track
6
+ from chemsift_cloud import config
7
+ from chemsift_cloud.register import get_session
8
+ from pathlib import Path
9
+
10
+ app = typer.Typer()
11
+
12
+ @app.command()
13
+ def download(
14
+ job_id: Annotated[str, typer.Argument(help="ID of the job to download the results of")],
15
+ output_dir: Annotated[Path, typer.Argument(help="Directory in which to download the job result")],
16
+ bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
17
+ table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
18
+ ):
19
+ """
20
+ Download the result of a given job ID
21
+ """
22
+ cfg = config.load()
23
+ session = get_session(cfg)
24
+ s3 = session.client('s3')
25
+ db = session.resource('dynamodb')
26
+ table = db.Table(table_name)
27
+
28
+ # Look up job type from DynamoDB to determine correct S3 prefix
29
+ resp = table.get_item(Key={'user_id': cfg['identity_id'], 'job_id': job_id})
30
+ item = resp.get('Item', {})
31
+ job_type = item.get('job_type', 'register')
32
+
33
+ if job_type == 'query':
34
+ prefix = f"users/{cfg['identity_id']}/{job_id}/query_output/"
35
+ elif job_type == 'map':
36
+ source_job_id = item.get('source_job_id', job_id)
37
+ prefix = f"users/{cfg['identity_id']}/{source_job_id}/registry_output/"
38
+ else:
39
+ # register (default)
40
+ prefix = f"users/{cfg['identity_id']}/{job_id}/registry_output/"
41
+
42
+ print(f"Downloading results for job [blue]{job_id}[/blue] (type: {job_type})...")
43
+
44
+ resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
45
+
46
+ if 'Contents' not in resp:
47
+ print("[yellow]No results found yet. Job might still be processing or failed.[/yellow]")
48
+ return
49
+
50
+ output_dir.mkdir(parents=True, exist_ok=True)
51
+
52
+ for obj in track(resp['Contents'], description="Downloading..."):
53
+ s3_key = obj['Key']
54
+ # Remove prefix from local path
55
+ relative_path = s3_key[len(prefix):]
56
+ if not relative_path: continue
57
+
58
+ local_path = output_dir / relative_path
59
+ local_path.parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ s3.download_file(bucket, s3_key, str(local_path))
62
+
63
+ print(f"[green]Download complete![/green] Results saved to [blue]{output_dir}[/blue]")
@@ -0,0 +1,47 @@
1
+ import typer
2
+ import boto3
3
+ from rich import print
4
+ from rich.table import Table
5
+ from chemsift_cloud import config
6
+ from chemsift_cloud.register import get_session
7
+
8
+ app = typer.Typer()
9
+
10
+ @app.command()
11
+ def list(table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")):
12
+ """
13
+ Get list of job IDs for the current user
14
+ """
15
+ cfg = config.load()
16
+ session = get_session(cfg)
17
+ db = session.resource('dynamodb')
18
+ table = db.Table(table_name)
19
+
20
+ print(f"Retrieving jobs for {cfg['username']}...")
21
+
22
+ resp = table.query(
23
+ KeyConditionExpression=boto3.dynamodb.conditions.Key('user_id').eq(cfg['identity_id'])
24
+ )
25
+
26
+ items = resp.get('Items', [])
27
+ if not items:
28
+ print("[yellow]No jobs found.[/yellow]")
29
+ return
30
+
31
+ table_ui = Table(title="Your Jobs")
32
+ table_ui.add_column("Job ID", style="cyan")
33
+ table_ui.add_column("Type", style="yellow")
34
+ table_ui.add_column("Status", style="green")
35
+ table_ui.add_column("Source Job", style="dim")
36
+ table_ui.add_column("Created At", style="magenta")
37
+
38
+ for item in items:
39
+ table_ui.add_row(
40
+ item['job_id'],
41
+ item.get('job_type', 'register'),
42
+ item.get('status', 'N/A'),
43
+ item.get('source_job_id', '-'),
44
+ item.get('created_at', 'N/A')
45
+ )
46
+
47
+ print(table_ui)
@@ -0,0 +1,172 @@
1
+ import typer
2
+ import boto3
3
+ import requests
4
+ import webbrowser
5
+ import hashlib
6
+ import base64
7
+ import secrets
8
+ import threading
9
+ import json
10
+ from urllib.parse import urlparse, parse_qs
11
+ from http.server import BaseHTTPRequestHandler, HTTPServer
12
+ from rich import print
13
+ from chemsift_cloud import config
14
+
15
+ app = typer.Typer()
16
+
17
+ # Static configuration for now (could be passed as options)
18
+ USER_POOL_ID = "eu-west-1_7ZI3fBR1w" # Placeholder
19
+ CLIENT_ID = "71ginl5pbckv6vrfltid7lkimi" # Placeholder
20
+ IDENTITY_POOL_ID = "eu-west-1:2d13c2cd-06f1-498b-9fcf-2fa6f345b8b7" # Placeholder
21
+ REGION = "eu-west-1"
22
+ COGNITO_DOMAIN = "chemsift-register-pipeline-prod.auth.eu-west-1.amazoncognito.com" # Placeholder
23
+ CALLBACK_PORT = 53090
24
+ CALLBACK_URL = f"http://localhost:{CALLBACK_PORT}/callback"
25
+
26
+ class OAuthCallbackHandler(BaseHTTPRequestHandler):
27
+ def do_GET(self):
28
+ query_components = parse_qs(urlparse(self.path).query)
29
+ if 'code' in query_components and 'state' in query_components:
30
+ self.server.auth_code = query_components['code'][0]
31
+ self.server.returned_state = query_components['state'][0]
32
+ self.send_response(200)
33
+ self.send_header("Content-type", "text/html")
34
+ self.end_headers()
35
+ self.wfile.write(b"<html><body><h1>Login successful!</h1><p>You can close this window and return to the CLI.</p></body></html>")
36
+ else:
37
+ self.send_response(400)
38
+ self.send_header("Content-type", "text/html")
39
+ self.end_headers()
40
+ self.wfile.write(b"<html><body><h1>Login failed</h1><p>Missing code or state.</p></body></html>")
41
+
42
+ # Shutdown server after handling request
43
+ threading.Thread(target=self.server.shutdown).start()
44
+
45
+ def log_message(self, format, *args):
46
+ # Suppress standard HTTP server logging
47
+ pass
48
+
49
+ def generate_pkce_pair() -> tuple[str, str]:
50
+ # 64 bytes generates a 86 character url-safe string
51
+ code_verifier = secrets.token_urlsafe(64)
52
+ hashed = hashlib.sha256(code_verifier.encode('utf-8')).digest()
53
+ code_challenge = base64.urlsafe_b64encode(hashed).decode('utf-8').rstrip('=')
54
+ return code_verifier, code_challenge
55
+
56
+ def decode_jwt_payload(token: str) -> dict:
57
+ parts = token.split('.')
58
+ if len(parts) != 3:
59
+ return {}
60
+ payload_str = parts[1]
61
+ payload_str += '=' * (-len(payload_str) % 4)
62
+ try:
63
+ payload_bytes = base64.urlsafe_b64decode(payload_str)
64
+ return json.loads(payload_bytes.decode('utf-8'))
65
+ except Exception:
66
+ return {}
67
+
68
+ @app.command()
69
+ def login(
70
+ user_pool_id: str = typer.Option(USER_POOL_ID, help="Cognito User Pool ID"),
71
+ client_id: str = typer.Option(CLIENT_ID, help="Cognito App Client ID"),
72
+ identity_pool_id: str = typer.Option(IDENTITY_POOL_ID, help="Cognito Identity Pool ID"),
73
+ region: str = typer.Option(REGION, help="AWS Region"),
74
+ cognito_domain: str = typer.Option(COGNITO_DOMAIN, help="Cognito Domain")
75
+ ):
76
+ """
77
+ Login using browser-based OAuth 2.0 flow
78
+ """
79
+ code_verifier, code_challenge = generate_pkce_pair()
80
+ state = secrets.token_urlsafe(32)
81
+
82
+ auth_url = (
83
+ f"https://{cognito_domain}/login?"
84
+ f"response_type=code&"
85
+ f"client_id={client_id}&"
86
+ f"redirect_uri={CALLBACK_URL}&"
87
+ f"state={state}&"
88
+ f"scope=email+openid+profile&"
89
+ f"code_challenge_method=S256&"
90
+ f"code_challenge={code_challenge}"
91
+ )
92
+
93
+ print("[yellow]Opening browser for authentication...[/yellow]")
94
+ webbrowser.open_new(auth_url)
95
+
96
+ # Start local server to listen for callback
97
+ server = HTTPServer(('localhost', CALLBACK_PORT), OAuthCallbackHandler)
98
+ server.auth_code = None
99
+ server.returned_state = None
100
+ server.serve_forever()
101
+
102
+ if not server.auth_code or not server.returned_state:
103
+ print("[red]Login failed: Did not receive authorization code or state.[/red]")
104
+ raise typer.Exit(code=1)
105
+
106
+ if server.returned_state != state:
107
+ print("[red]Login failed: State mismatch. Potential CSRF attack.[/red]")
108
+ raise typer.Exit(code=1)
109
+
110
+ print("[green]Authorization code received. Exchanging for tokens...[/green]")
111
+
112
+ # Exchange code for tokens
113
+ token_url = f"https://{cognito_domain}/oauth2/token"
114
+ token_data = {
115
+ 'grant_type': 'authorization_code',
116
+ 'client_id': client_id,
117
+ 'code': server.auth_code,
118
+ 'redirect_uri': CALLBACK_URL,
119
+ 'code_verifier': code_verifier
120
+ }
121
+
122
+ headers = {
123
+ 'Content-Type': 'application/x-www-form-urlencoded'
124
+ }
125
+
126
+ token_resp = requests.post(token_url, data=token_data, headers=headers)
127
+
128
+ if token_resp.status_code != 200:
129
+ print(f"[red]Failed to exchange code for tokens: {token_resp.text}[/red]")
130
+ raise typer.Exit(code=1)
131
+
132
+ tokens = token_resp.json()
133
+ id_token = tokens.get('id_token')
134
+
135
+ if not id_token:
136
+ print("[red]Login failed: Missing id_token in response.[/red]")
137
+ raise typer.Exit(code=1)
138
+
139
+ try:
140
+ # Get Identity ID from Identity Pool
141
+ identity_client = boto3.client('cognito-identity', region_name=region)
142
+ id_resp = identity_client.get_id(
143
+ IdentityPoolId=identity_pool_id,
144
+ Logins={
145
+ f'cognito-idp.{region}.amazonaws.com/{user_pool_id}': id_token
146
+ }
147
+ )
148
+ identity_id = id_resp['IdentityId']
149
+
150
+ payload = decode_jwt_payload(id_token)
151
+ username = payload.get('email', 'Unknown User')
152
+
153
+ # Store configuration
154
+ config_data = {
155
+ "username": username,
156
+ "user_pool_id": user_pool_id,
157
+ "client_id": client_id,
158
+ "identity_pool_id": identity_pool_id,
159
+ "region": region,
160
+ "identity_id": identity_id,
161
+ "id_token": id_token,
162
+ "refresh_token": tokens.get('refresh_token'),
163
+ "cognito_domain": cognito_domain
164
+ }
165
+
166
+ config.save(config_data)
167
+ print(f"[green]Successfully logged in as {username}[/green]")
168
+ print(f"Identity ID: [blue]{identity_id}[/blue]")
169
+
170
+ except Exception as e:
171
+ print(f"[red]Failed to get AWS credentials: {str(e)}[/red]")
172
+ raise typer.Exit(code=1)
@@ -0,0 +1,22 @@
1
+ import typer
2
+ from chemsift_cloud.login import app as login_app
3
+ from chemsift_cloud.register import app as register_app
4
+ from chemsift_cloud.map import app as map_app
5
+ from chemsift_cloud.query import app as query_app
6
+ from chemsift_cloud.status import app as status_app
7
+ from chemsift_cloud.download import app as download_app
8
+ from chemsift_cloud.list import app as list_app
9
+
10
+ app = typer.Typer(no_args_is_help=True)
11
+
12
+ app.add_typer(login_app)
13
+ app.add_typer(register_app)
14
+ app.add_typer(map_app)
15
+ app.add_typer(query_app)
16
+ app.add_typer(status_app)
17
+ app.add_typer(download_app)
18
+ app.add_typer(list_app)
19
+
20
+
21
+ if __name__ == "__main__":
22
+ app()
@@ -0,0 +1,102 @@
1
+ import typer
2
+ import uuid
3
+ import datetime
4
+ import json
5
+ from rich import print
6
+ from chemsift_cloud import config
7
+ from chemsift_cloud.register import get_session
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ def discover_s3_prefixes(s3, bucket, prefix):
13
+ """List sub-folder names (one level deep) under a given S3 prefix."""
14
+ paginator = s3.get_paginator('list_objects_v2')
15
+ results = []
16
+ for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
17
+ for folder in page.get('CommonPrefixes', []):
18
+ name = folder['Prefix'].rstrip('/').split('/')[-1]
19
+ results.append(name)
20
+ return results
21
+
22
+
23
+ @app.command()
24
+ def map(
25
+ register_job_id: str = typer.Argument(help="Register job ID to run mapping on"),
26
+ reference_library: str = typer.Argument(help="Name of the reference library in registry_output"),
27
+ bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
28
+ table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name"),
29
+ ):
30
+ """
31
+ Trigger the mapped pipeline against a completed register job.
32
+ """
33
+ cfg = config.load()
34
+ session = get_session(cfg)
35
+ s3 = session.client('s3')
36
+ db = session.resource('dynamodb')
37
+ table = db.Table(table_name)
38
+
39
+ identity_id = cfg['identity_id']
40
+
41
+ # Look up source register job
42
+ resp = table.get_item(Key={'user_id': identity_id, 'job_id': register_job_id})
43
+ source_item = resp.get('Item')
44
+ if not source_item:
45
+ print(f"[red]Register job {register_job_id} not found.[/red]")
46
+ raise typer.Exit(code=1)
47
+
48
+ if source_item.get('job_type', 'register') != 'register':
49
+ print(f"[red]Job {register_job_id} is not a register job (type: {source_item.get('job_type')}).[/red]")
50
+ raise typer.Exit(code=1)
51
+
52
+ if source_item.get('status') != 'SUCCEEDED':
53
+ print(f"[yellow]Warning: source job status is '{source_item.get('status')}'. Proceeding anyway.[/yellow]")
54
+
55
+ # Discover libraries and feature_spaces from registry_output
56
+ registry_output_prefix = f"users/{identity_id}/{register_job_id}/registry_output/"
57
+ libraries = discover_s3_prefixes(s3, bucket, registry_output_prefix)
58
+ feature_spaces_prefix = f"{registry_output_prefix}feature_spaces/"
59
+ feature_spaces = discover_s3_prefixes(s3, bucket, feature_spaces_prefix)
60
+
61
+ if not libraries:
62
+ print(f"[red]No libraries found under {registry_output_prefix}. Is the register job complete?[/red]")
63
+ raise typer.Exit(code=1)
64
+
65
+ job_id = str(uuid.uuid4())
66
+ output_prefix = f"users/{identity_id}/{register_job_id}/registry_output"
67
+
68
+ # Register in DynamoDB
69
+ table.put_item(Item={
70
+ 'user_id': identity_id,
71
+ 'job_id': job_id,
72
+ 'username': cfg['username'],
73
+ 'status': 'SUBMITTED',
74
+ 'job_type': 'map',
75
+ 'source_job_id': register_job_id,
76
+ 'created_at': datetime.datetime.utcnow().isoformat(),
77
+ 's3_path': f"s3://{bucket}/{output_prefix}"
78
+ })
79
+
80
+ sfn_input = {
81
+ 'user_id': identity_id,
82
+ 'job_id': job_id,
83
+ 's3_bucket': bucket,
84
+ 'output_prefix': output_prefix,
85
+ 'libraries': [{'name': lib} for lib in libraries],
86
+ 'feature_spaces': [{'name': fs} for fs in feature_spaces],
87
+ 'reference_library': reference_library,
88
+ }
89
+
90
+ # Upload trigger file — EventBridge watches for this key and invokes the map Lambda
91
+ map_trigger_key = f"users/{identity_id}/{job_id}/map_input/map.json"
92
+ s3.put_object(
93
+ Bucket=bucket,
94
+ Key=map_trigger_key,
95
+ Body=json.dumps(sfn_input),
96
+ ContentType="application/json",
97
+ )
98
+
99
+ print(f"[green]Map job submitted![/green]")
100
+ print(f"Job ID: [bold]{job_id}[/bold]")
101
+ print(f"Libraries to map: {libraries}")
102
+ print(f"Reference library: {reference_library}")
@@ -0,0 +1,128 @@
1
+ import typer
2
+ import uuid
3
+ import datetime
4
+ import json
5
+ from typing_extensions import Annotated
6
+ from rich import print
7
+ from rich.progress import track
8
+ from pathlib import Path
9
+ from chemsift_cloud import config
10
+ from chemsift_cloud.register import get_session
11
+
12
+ app = typer.Typer()
13
+
14
+
15
+ def discover_s3_prefixes(s3, bucket, prefix):
16
+ """List sub-folder names (one level deep) under a given S3 prefix."""
17
+ paginator = s3.get_paginator('list_objects_v2')
18
+ results = []
19
+ for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
20
+ for folder in page.get('CommonPrefixes', []):
21
+ name = folder['Prefix'].rstrip('/').split('/')[-1]
22
+ results.append(name)
23
+ return results
24
+
25
+
26
+ @app.command()
27
+ def query(
28
+ register_job_id: str = typer.Argument(help="Register job ID whose registry_output to query"),
29
+ molecules: Annotated[Path, typer.Option(
30
+ "--molecules", help="Local path to query molecules CSV",
31
+ exists=True, file_okay=True, dir_okay=False, resolve_path=True
32
+ )] = ...,
33
+ yaml: Annotated[Path, typer.Option(
34
+ "--yaml", help="Local path to query YAML config",
35
+ exists=True, file_okay=True, dir_okay=False, resolve_path=True
36
+ )] = ...,
37
+ bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
38
+ table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name"),
39
+ ):
40
+ """
41
+ Submit a query against a completed register job's registry_output.
42
+ """
43
+ cfg = config.load()
44
+ session = get_session(cfg)
45
+ s3 = session.client('s3')
46
+ db = session.resource('dynamodb')
47
+ table = db.Table(table_name)
48
+
49
+ identity_id = cfg['identity_id']
50
+
51
+ # Look up source register job
52
+ resp = table.get_item(Key={'user_id': identity_id, 'job_id': register_job_id})
53
+ source_item = resp.get('Item')
54
+ if not source_item:
55
+ print(f"[red]Register job {register_job_id} not found.[/red]")
56
+ raise typer.Exit(code=1)
57
+
58
+ if source_item.get('job_type', 'register') != 'register':
59
+ print(f"[red]Job {register_job_id} is not a register job (type: {source_item.get('job_type')}).[/red]")
60
+ raise typer.Exit(code=1)
61
+
62
+ if source_item.get('status') != 'SUCCEEDED':
63
+ print(f"[yellow]Warning: source job status is '{source_item.get('status')}'. Proceeding anyway.[/yellow]")
64
+
65
+ job_id = str(uuid.uuid4())
66
+
67
+ # Upload query inputs to S3
68
+ molecules_key = f"users/{identity_id}/{job_id}/query_input/molecules.csv"
69
+ yaml_key = f"users/{identity_id}/{job_id}/query_input/query.yml"
70
+
71
+ print(f"Uploading query inputs for job [blue]{job_id}[/blue]...")
72
+ for local_path, s3_key in track(
73
+ [(str(molecules), molecules_key), (str(yaml), yaml_key)],
74
+ description="Uploading..."
75
+ ):
76
+ s3.upload_file(local_path, bucket, s3_key)
77
+
78
+ # Discover feature_spaces from local query YAML
79
+ import yaml as pyyaml
80
+ with open(yaml, 'r') as f:
81
+ query_config = pyyaml.safe_load(f)
82
+
83
+ rules = query_config.get('rules', [])
84
+ feature_spaces = list(set([rule['feature_space'] for rule in rules]))
85
+
86
+ registry_output_prefix = f"users/{identity_id}/{register_job_id}/registry_output"
87
+ registry_input_prefix = f"users/{identity_id}/{register_job_id}/registry_input"
88
+
89
+ print(f"Discovered feature spaces from query YAML: {feature_spaces}")
90
+
91
+ output_prefix = f"users/{identity_id}/{job_id}/query_output"
92
+
93
+ # Register in DynamoDB
94
+ table.put_item(Item={
95
+ 'user_id': identity_id,
96
+ 'job_id': job_id,
97
+ 'username': cfg['username'],
98
+ 'status': 'SUBMITTED',
99
+ 'job_type': 'query',
100
+ 'source_job_id': register_job_id,
101
+ 'created_at': datetime.datetime.utcnow().isoformat(),
102
+ 's3_path': f"s3://{bucket}/{output_prefix}"
103
+ })
104
+
105
+ sfn_input = {
106
+ 'user_id': identity_id,
107
+ 'job_id': job_id,
108
+ 's3_bucket': bucket,
109
+ 'registry_input_prefix': registry_input_prefix,
110
+ 'registry_output_prefix': registry_output_prefix,
111
+ 'molecules_key': molecules_key,
112
+ 'yaml_key': yaml_key,
113
+ 'output_prefix': output_prefix,
114
+ 'feature_spaces': [{'name': fs} for fs in feature_spaces],
115
+ }
116
+
117
+ # Upload trigger file last — EventBridge watches for this key and invokes the query Lambda
118
+ query_trigger_key = f"users/{identity_id}/{job_id}/query_input/query.json"
119
+ s3.put_object(
120
+ Bucket=bucket,
121
+ Key=query_trigger_key,
122
+ Body=json.dumps(sfn_input),
123
+ ContentType="application/json",
124
+ )
125
+
126
+ print(f"[green]Query job submitted![/green]")
127
+ print(f"Job ID: [bold]{job_id}[/bold]")
128
+ print(f"Feature spaces found: {feature_spaces}")
@@ -0,0 +1,111 @@
1
+ import typer
2
+ import boto3
3
+ from rich import print
4
+ from rich.progress import track
5
+ from chemsift_cloud import config
6
+ from pathlib import Path
7
+ import uuid
8
+ import datetime
9
+
10
+ app = typer.Typer()
11
+
12
+ def get_session(cfg):
13
+ """Get temporary AWS credentials from Identity Pool"""
14
+ client = boto3.client('cognito-identity', region_name=cfg['region'])
15
+ resp = client.get_credentials_for_identity(
16
+ IdentityId=cfg['identity_id'],
17
+ Logins={
18
+ f"cognito-idp.{cfg['region']}.amazonaws.com/{cfg['user_pool_id']}": cfg['id_token']
19
+ }
20
+ )
21
+ creds = resp['Credentials']
22
+ return boto3.Session(
23
+ aws_access_key_id=creds['AccessKeyId'],
24
+ aws_secret_access_key=creds['SecretKey'],
25
+ aws_session_token=creds['SessionToken'],
26
+ region_name=cfg['region']
27
+ )
28
+
29
+ @app.command()
30
+ def register(
31
+ input_dir: Path = typer.Argument(
32
+ help="Directory to submit for processing",
33
+ exists=True,
34
+ file_okay=False,
35
+ resolve_path=True
36
+ ),
37
+ bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
38
+ table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
39
+ ):
40
+ """
41
+ Submit input (folder) to S3 and register job in DynamoDB
42
+ """
43
+ cfg = config.load()
44
+ session = get_session(cfg)
45
+ s3 = session.client('s3')
46
+ db = session.resource('dynamodb')
47
+ table = db.Table(table_name)
48
+
49
+ job_id = str(uuid.uuid4())
50
+ identity_id = cfg['identity_id']
51
+ username = cfg['username']
52
+
53
+ # Path: users/{identity_id}/{job_id}/...
54
+ base_path = f"users/{identity_id}/{job_id}"
55
+ input_path = f"{base_path}/registry_input"
56
+
57
+ print(f"Registering job [blue]{job_id}[/blue] for user [blue]{username}[/blue]...")
58
+
59
+ # Auto-detect if user pointed to parent of registry_input
60
+ if (input_dir / "registry_input").exists() and (input_dir / "registry_input").is_dir():
61
+ print(f"[yellow]Detected 'registry_input' subfolder. using {input_dir}/registry_input as root.[/yellow]")
62
+ upload_root = input_dir / "registry_input"
63
+ else:
64
+ upload_root = input_dir
65
+
66
+ # Validate structure
67
+ if not (upload_root / "libraries").exists():
68
+ print(f"[red]Error: 'libraries' folder not found in {upload_root}. Structure should be libraries/ and feature_spaces/.[/red]")
69
+ raise typer.Exit(code=1)
70
+
71
+ files = [
72
+ p for p in upload_root.rglob("*")
73
+ if p.is_file() and p.name != ".DS_Store"
74
+ ]
75
+
76
+ # 1. Upload files to S3
77
+ for file_path in track(files, description="Uploading files..."):
78
+ relative_path = file_path.relative_to(upload_root)
79
+ s3_key = f"{input_path}/{relative_path}"
80
+
81
+ with open(file_path, 'rb') as f:
82
+ s3.put_object(
83
+ Bucket=bucket,
84
+ Key=s3_key,
85
+ Body=f
86
+ )
87
+
88
+ # 2. Upload trigger file (input.json) if it doesn't exist in the list
89
+ # Assuming input_dir *is* the registry_input content
90
+ trigger_key = f"{input_path}/input.json"
91
+ s3.put_object(
92
+ Bucket=bucket,
93
+ Key=trigger_key,
94
+ Body="{}" # Simple trigger
95
+ )
96
+
97
+ # 3. Register job in DynamoDB
98
+ table.put_item(
99
+ Item={
100
+ 'user_id': identity_id,
101
+ 'job_id': job_id,
102
+ 'username': username,
103
+ 'status': 'SUBMITTED',
104
+ 'job_type': 'register',
105
+ 'created_at': datetime.datetime.utcnow().isoformat(),
106
+ 's3_path': f"s3://{bucket}/{base_path}"
107
+ }
108
+ )
109
+
110
+ print("[green]Job registered successfully![/green]")
111
+ print(f"Job ID: [bold]{job_id}[/bold]")
@@ -0,0 +1,50 @@
1
+ import typer
2
+ from typing_extensions import Annotated
3
+ import boto3
4
+ from rich import print
5
+ from rich.panel import Panel
6
+ from chemsift_cloud import config
7
+ from chemsift_cloud.register import get_session
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def status(
14
+ job_id: Annotated[str, typer.Argument(help="ID of the job to get the status of")],
15
+ table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
16
+ ):
17
+ """
18
+ Check status of a job ID
19
+ """
20
+ cfg = config.load()
21
+ session = get_session(cfg)
22
+ db = session.resource('dynamodb')
23
+ table = db.Table(table_name)
24
+
25
+ resp = table.get_item(
26
+ Key={
27
+ 'user_id': cfg['identity_id'],
28
+ 'job_id': job_id
29
+ }
30
+ )
31
+
32
+ item = resp.get('Item')
33
+ if not item:
34
+ print(f"[red]Job {job_id} not found.[/red]")
35
+ return
36
+
37
+ source_job_line = ""
38
+ if item.get('source_job_id'):
39
+ source_job_line = f"\n[bold]Source Job:[/bold] {item['source_job_id']}"
40
+
41
+ print(Panel(
42
+ f"[bold]Job ID:[/bold] {item['job_id']}\n"
43
+ f"[bold]Type:[/bold] {item.get('job_type', 'register')}\n"
44
+ f"[bold]Status:[/bold] {item.get('status', 'N/A')}"
45
+ f"{source_job_line}\n"
46
+ f"[bold]Created At:[/bold] {item.get('created_at', 'N/A')}\n"
47
+ f"[bold]S3 Path:[/bold] {item.get('s3_path', 'N/A')}",
48
+ title=f"Job Status: {job_id}",
49
+ expand=False
50
+ ))
@@ -0,0 +1,110 @@
1
+ import typer
2
+ import boto3
3
+ from rich import print
4
+ from rich.progress import track
5
+ from chemsift_cloud import config
6
+ from pathlib import Path
7
+ import uuid
8
+ import datetime
9
+
10
+ app = typer.Typer()
11
+
12
+ def get_session(cfg):
13
+ """Get temporary AWS credentials from Identity Pool"""
14
+ client = boto3.client('cognito-identity', region_name=cfg['region'])
15
+ resp = client.get_credentials_for_identity(
16
+ IdentityId=cfg['identity_id'],
17
+ Logins={
18
+ f"cognito-idp.{cfg['region']}.amazonaws.com/{cfg['user_pool_id']}": cfg['id_token']
19
+ }
20
+ )
21
+ creds = resp['Credentials']
22
+ return boto3.Session(
23
+ aws_access_key_id=creds['AccessKeyId'],
24
+ aws_secret_access_key=creds['SecretKey'],
25
+ aws_session_token=creds['SessionToken'],
26
+ region_name=cfg['region']
27
+ )
28
+
29
+ @app.command()
30
+ def submit(
31
+ input_dir: Path = typer.Argument(
32
+ help="Directory to submit for processing",
33
+ exists=True,
34
+ file_okay=False,
35
+ resolve_path=True
36
+ ),
37
+ bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
38
+ table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
39
+ ):
40
+ """
41
+ Submit input (folder) to S3 and register job in DynamoDB
42
+ """
43
+ cfg = config.load()
44
+ session = get_session(cfg)
45
+ s3 = session.client('s3')
46
+ db = session.resource('dynamodb')
47
+ table = db.Table(table_name)
48
+
49
+ job_id = str(uuid.uuid4())
50
+ identity_id = cfg['identity_id']
51
+ username = cfg['username']
52
+
53
+ # Path: users/{identity_id}/{job_id}/...
54
+ base_path = f"users/{identity_id}/{job_id}"
55
+ input_path = f"{base_path}/registry_input"
56
+
57
+ print(f"Submitting job [blue]{job_id}[/blue] for user [blue]{username}[/blue]...")
58
+
59
+ # Auto-detect if user pointed to parent of registry_input
60
+ if (input_dir / "registry_input").exists() and (input_dir / "registry_input").is_dir():
61
+ print(f"[yellow]Detected 'registry_input' subfolder. using {input_dir}/registry_input as root.[/yellow]")
62
+ upload_root = input_dir / "registry_input"
63
+ else:
64
+ upload_root = input_dir
65
+
66
+ # Validate structure
67
+ if not (upload_root / "libraries").exists():
68
+ print(f"[red]Error: 'libraries' folder not found in {upload_root}. Structure should be libraries/ and feature_spaces/.[/red]")
69
+ raise typer.Exit(code=1)
70
+
71
+ files = [
72
+ p for p in upload_root.rglob("*")
73
+ if p.is_file() and p.name != ".DS_Store"
74
+ ]
75
+
76
+ # 1. Upload files to S3
77
+ for file_path in track(files, description="Uploading files..."):
78
+ relative_path = file_path.relative_to(upload_root)
79
+ s3_key = f"{input_path}/{relative_path}"
80
+
81
+ with open(file_path, 'rb') as f:
82
+ s3.put_object(
83
+ Bucket=bucket,
84
+ Key=s3_key,
85
+ Body=f
86
+ )
87
+
88
+ # 2. Upload trigger file (input.json) if it doesn't exist in the list
89
+ # Assuming input_dir *is* the registry_input content
90
+ trigger_key = f"{input_path}/input.json"
91
+ s3.put_object(
92
+ Bucket=bucket,
93
+ Key=trigger_key,
94
+ Body="{}" # Simple trigger
95
+ )
96
+
97
+ # 3. Register job in DynamoDB
98
+ table.put_item(
99
+ Item={
100
+ 'user_id': identity_id,
101
+ 'job_id': job_id,
102
+ 'username': username,
103
+ 'status': 'SUBMITTED',
104
+ 'created_at': datetime.datetime.utcnow().isoformat(),
105
+ 's3_path': f"s3://{bucket}/{base_path}"
106
+ }
107
+ )
108
+
109
+ print("[green]Job submitted successfully![/green]")
110
+ print(f"Job ID: [bold]{job_id}[/bold]")