PyPI - chemsift - Versions diffs - 1.1.0__tar.gz - Mend

chemsift 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

chemsift-1.1.0/PKG-INFO +64 -0
chemsift-1.1.0/README.md +41 -0
chemsift-1.1.0/pyproject.toml +27 -0
chemsift-1.1.0/src/chemsift_cloud/__init__.py +0 -0
chemsift-1.1.0/src/chemsift_cloud/config.py +33 -0
chemsift-1.1.0/src/chemsift_cloud/download.py +63 -0
chemsift-1.1.0/src/chemsift_cloud/list.py +47 -0
chemsift-1.1.0/src/chemsift_cloud/login.py +172 -0
chemsift-1.1.0/src/chemsift_cloud/main.py +22 -0
chemsift-1.1.0/src/chemsift_cloud/map.py +102 -0
chemsift-1.1.0/src/chemsift_cloud/query.py +128 -0
chemsift-1.1.0/src/chemsift_cloud/register.py +111 -0
chemsift-1.1.0/src/chemsift_cloud/status.py +50 -0
chemsift-1.1.0/src/chemsift_cloud/submit.py +110 -0

chemsift-1.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,64 @@
+Metadata-Version: 2.4
+Name: chemsift
+Version: 1.1.0
+Summary:
+Author: Dave Martinez
+Author-email: 34991082+dkm-coder@users.noreply.github.com
+Requires-Python: >=3.9
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Requires-Dist: boto3 (>=1.34.135,<2.0.0)
+Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
+Requires-Dist: requests (>=2.32.5,<3.0.0)
+Requires-Dist: rich (>=13.7.0,<14.0.0)
+Requires-Dist: typer (>=0.21.0,<0.22.0)
+Requires-Dist: urllib3 (<2)
+Description-Content-Type: text/markdown
+# chemsift-cloud
+## Commands
+- `chemsift-cloud login --api-key <key>`: Store credentials in configuration file
+- `chemsift-cloud submit <input-dir>`: Submit input to be processed
+- `chemsift-cloud status <job-id>`: Check status of work for a given ID
+- `chemsift-cloud download <job-id> <output-dir>`: Download the result of the given job ID
+- `chemsift-cloud list`: Get list of job IDs for the user
+## Development
+This project uses [Poetry](https://python-poetry.org/) for dependency management and packaging.
+### Bumping the version
+It is possible to bump the version of the project using the `poetry version` command with one of the following values as an argument:
+- `patch`
+- `minor`
+- `major`
+### Publishing to PyPI
+#### PyPI configuration (just once)
+From your [PyPI](https://pypi.org/) account, configure a new [Trusted Publisher](https://pypi.org/manage/project/python-bap/settings/publishing/) by providing the following information:
+- Specify Owner
+- Specify repository name
+- Specify the workflow name
+#### Publish a new version
+In order to publish a new version to PyPI, all we will need to do is create a new tag with the version number so that it looks like this:
+```
+cli-vX.X.X
+```
+> **Important**: The version specified when creating the tag needs to match the one in `pyproject.toml` which is the same one that will be returned when running `poetry version`.

chemsift-1.1.0/README.md ADDED Viewed

@@ -0,0 +1,41 @@
+# chemsift-cloud
+## Commands
+- `chemsift-cloud login --api-key <key>`: Store credentials in configuration file
+- `chemsift-cloud submit <input-dir>`: Submit input to be processed
+- `chemsift-cloud status <job-id>`: Check status of work for a given ID
+- `chemsift-cloud download <job-id> <output-dir>`: Download the result of the given job ID
+- `chemsift-cloud list`: Get list of job IDs for the user
+## Development
+This project uses [Poetry](https://python-poetry.org/) for dependency management and packaging.
+### Bumping the version
+It is possible to bump the version of the project using the `poetry version` command with one of the following values as an argument:
+- `patch`
+- `minor`
+- `major`
+### Publishing to PyPI
+#### PyPI configuration (just once)
+From your [PyPI](https://pypi.org/) account, configure a new [Trusted Publisher](https://pypi.org/manage/project/python-bap/settings/publishing/) by providing the following information:
+- Specify Owner
+- Specify repository name
+- Specify the workflow name
+#### Publish a new version
+In order to publish a new version to PyPI, all we will need to do is create a new tag with the version number so that it looks like this:
+```
+cli-vX.X.X
+```
+> **Important**: The version specified when creating the tag needs to match the one in `pyproject.toml` which is the same one that will be returned when running `poetry version`.

chemsift-1.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[project]
+name = "chemsift"
+version = "1.1.0"
+description = ""
+authors = [
+    {name = "Dave Martinez",email = "34991082+dkm-coder@users.noreply.github.com"}
+]
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+    "typer (>=0.21.0,<0.22.0)",
+    "urllib3 (<2)",
+    "requests (>=2.32.5,<3.0.0)",
+    "boto3 (>=1.34.135,<2.0.0)",
+    "rich (>=13.7.0,<14.0.0)",
+    "pyyaml (>=6.0.2,<7.0.0)",
+]
+[tool.poetry]
+packages = [{include = "chemsift_cloud", from = "src"}]
+[tool.poetry.scripts]
+chemsift = "chemsift_cloud.main:app"
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

chemsift-1.1.0/src/chemsift_cloud/__init__.py ADDED Viewed

File without changes

chemsift-1.1.0/src/chemsift_cloud/config.py ADDED Viewed

@@ -0,0 +1,33 @@
+import json
+from pathlib import Path
+import typer
+from rich import print
+def get_config_path():
+    home_dir = Path.home()
+    config_file = home_dir / ".chemsift" / "config.json"
+    return config_file
+def load():
+    config_file = get_config_path()
+    if not config_file.exists():
+        print("[red][bold]Error[/bold]: Configuration not found. Please run [/red][blue]`chemsift-cloud login`[/blue]")
+        raise typer.Exit(code=1)
+    with open(config_file, 'r') as f:
+        return json.load(f)
+def save(config_data: dict):
+    config_file = get_config_path()
+    config_file.parent.mkdir(parents=True, exist_ok=True)
+    with open(config_file, 'w') as f:
+        json.dump(config_data, f, indent=4)
+    print(f"[green]Configuration successfully written to:[/green] [blue]{config_file}[/blue]")
+# For backward compatibility / simple check
+def check_existance():
+    if not get_config_path().exists():
+        print("[red][bold]Error[/bold]: Configuration not found. Please run [/red][blue]`chemsift-cloud login`[/blue]")
+        raise typer.Exit(code=1)

chemsift-1.1.0/src/chemsift_cloud/download.py ADDED Viewed

@@ -0,0 +1,63 @@
+import typer
+from typing_extensions import Annotated
+import boto3
+from rich import print
+from rich.progress import track
+from chemsift_cloud import config
+from chemsift_cloud.register import get_session
+from pathlib import Path
+app = typer.Typer()
+@app.command()
+def download(
+    job_id: Annotated[str, typer.Argument(help="ID of the job to download the results of")],
+    output_dir: Annotated[Path, typer.Argument(help="Directory in which to download the job result")],
+    bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
+    table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
+):
+    """
+    Download the result of a given job ID
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    s3 = session.client('s3')
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    # Look up job type from DynamoDB to determine correct S3 prefix
+    resp = table.get_item(Key={'user_id': cfg['identity_id'], 'job_id': job_id})
+    item = resp.get('Item', {})
+    job_type = item.get('job_type', 'register')
+    if job_type == 'query':
+        prefix = f"users/{cfg['identity_id']}/{job_id}/query_output/"
+    elif job_type == 'map':
+        source_job_id = item.get('source_job_id', job_id)
+        prefix = f"users/{cfg['identity_id']}/{source_job_id}/registry_output/"
+    else:
+        # register (default)
+        prefix = f"users/{cfg['identity_id']}/{job_id}/registry_output/"
+    print(f"Downloading results for job [blue]{job_id}[/blue] (type: {job_type})...")
+    resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
+    if 'Contents' not in resp:
+        print("[yellow]No results found yet. Job might still be processing or failed.[/yellow]")
+        return
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for obj in track(resp['Contents'], description="Downloading..."):
+        s3_key = obj['Key']
+        # Remove prefix from local path
+        relative_path = s3_key[len(prefix):]
+        if not relative_path: continue
+        local_path = output_dir / relative_path
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        s3.download_file(bucket, s3_key, str(local_path))
+    print(f"[green]Download complete![/green] Results saved to [blue]{output_dir}[/blue]")

chemsift-1.1.0/src/chemsift_cloud/list.py ADDED Viewed

@@ -0,0 +1,47 @@
+import typer
+import boto3
+from rich import print
+from rich.table import Table
+from chemsift_cloud import config
+from chemsift_cloud.register import get_session
+app = typer.Typer()
+@app.command()
+def list(table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")):
+    """
+    Get list of job IDs for the current user
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    print(f"Retrieving jobs for {cfg['username']}...")
+    resp = table.query(
+        KeyConditionExpression=boto3.dynamodb.conditions.Key('user_id').eq(cfg['identity_id'])
+    )
+    items = resp.get('Items', [])
+    if not items:
+        print("[yellow]No jobs found.[/yellow]")
+        return
+    table_ui = Table(title="Your Jobs")
+    table_ui.add_column("Job ID", style="cyan")
+    table_ui.add_column("Type", style="yellow")
+    table_ui.add_column("Status", style="green")
+    table_ui.add_column("Source Job", style="dim")
+    table_ui.add_column("Created At", style="magenta")
+    for item in items:
+        table_ui.add_row(
+            item['job_id'],
+            item.get('job_type', 'register'),
+            item.get('status', 'N/A'),
+            item.get('source_job_id', '-'),
+            item.get('created_at', 'N/A')
+        )
+    print(table_ui)

chemsift-1.1.0/src/chemsift_cloud/login.py ADDED Viewed

@@ -0,0 +1,172 @@
+import typer
+import boto3
+import requests
+import webbrowser
+import hashlib
+import base64
+import secrets
+import threading
+import json
+from urllib.parse import urlparse, parse_qs
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from rich import print
+from chemsift_cloud import config
+app = typer.Typer()
+# Static configuration for now (could be passed as options)
+USER_POOL_ID = "eu-west-1_7ZI3fBR1w" # Placeholder
+CLIENT_ID = "71ginl5pbckv6vrfltid7lkimi" # Placeholder
+IDENTITY_POOL_ID = "eu-west-1:2d13c2cd-06f1-498b-9fcf-2fa6f345b8b7" # Placeholder
+REGION = "eu-west-1"
+COGNITO_DOMAIN = "chemsift-register-pipeline-prod.auth.eu-west-1.amazoncognito.com" # Placeholder
+CALLBACK_PORT = 53090
+CALLBACK_URL = f"http://localhost:{CALLBACK_PORT}/callback"
+class OAuthCallbackHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        query_components = parse_qs(urlparse(self.path).query)
+        if 'code' in query_components and 'state' in query_components:
+            self.server.auth_code = query_components['code'][0]
+            self.server.returned_state = query_components['state'][0]
+            self.send_response(200)
+            self.send_header("Content-type", "text/html")
+            self.end_headers()
+            self.wfile.write(b"<html><body><h1>Login successful!</h1><p>You can close this window and return to the CLI.</p></body></html>")
+        else:
+            self.send_response(400)
+            self.send_header("Content-type", "text/html")
+            self.end_headers()
+            self.wfile.write(b"<html><body><h1>Login failed</h1><p>Missing code or state.</p></body></html>")
+        # Shutdown server after handling request
+        threading.Thread(target=self.server.shutdown).start()
+    def log_message(self, format, *args):
+        # Suppress standard HTTP server logging
+        pass
+def generate_pkce_pair() -> tuple[str, str]:
+    # 64 bytes generates a 86 character url-safe string
+    code_verifier = secrets.token_urlsafe(64)
+    hashed = hashlib.sha256(code_verifier.encode('utf-8')).digest()
+    code_challenge = base64.urlsafe_b64encode(hashed).decode('utf-8').rstrip('=')
+    return code_verifier, code_challenge
+def decode_jwt_payload(token: str) -> dict:
+    parts = token.split('.')
+    if len(parts) != 3:
+        return {}
+    payload_str = parts[1]
+    payload_str += '=' * (-len(payload_str) % 4)
+    try:
+        payload_bytes = base64.urlsafe_b64decode(payload_str)
+        return json.loads(payload_bytes.decode('utf-8'))
+    except Exception:
+        return {}
+@app.command()
+def login(
+    user_pool_id: str = typer.Option(USER_POOL_ID, help="Cognito User Pool ID"),
+    client_id: str = typer.Option(CLIENT_ID, help="Cognito App Client ID"),
+    identity_pool_id: str = typer.Option(IDENTITY_POOL_ID, help="Cognito Identity Pool ID"),
+    region: str = typer.Option(REGION, help="AWS Region"),
+    cognito_domain: str = typer.Option(COGNITO_DOMAIN, help="Cognito Domain")
+):
+    """
+    Login using browser-based OAuth 2.0 flow
+    """
+    code_verifier, code_challenge = generate_pkce_pair()
+    state = secrets.token_urlsafe(32)
+    auth_url = (
+        f"https://{cognito_domain}/login?"
+        f"response_type=code&"
+        f"client_id={client_id}&"
+        f"redirect_uri={CALLBACK_URL}&"
+        f"state={state}&"
+        f"scope=email+openid+profile&"
+        f"code_challenge_method=S256&"
+        f"code_challenge={code_challenge}"
+    )
+    print("[yellow]Opening browser for authentication...[/yellow]")
+    webbrowser.open_new(auth_url)
+    # Start local server to listen for callback
+    server = HTTPServer(('localhost', CALLBACK_PORT), OAuthCallbackHandler)
+    server.auth_code = None
+    server.returned_state = None
+    server.serve_forever()
+    if not server.auth_code or not server.returned_state:
+        print("[red]Login failed: Did not receive authorization code or state.[/red]")
+        raise typer.Exit(code=1)
+    if server.returned_state != state:
+        print("[red]Login failed: State mismatch. Potential CSRF attack.[/red]")
+        raise typer.Exit(code=1)
+    print("[green]Authorization code received. Exchanging for tokens...[/green]")
+    # Exchange code for tokens
+    token_url = f"https://{cognito_domain}/oauth2/token"
+    token_data = {
+        'grant_type': 'authorization_code',
+        'client_id': client_id,
+        'code': server.auth_code,
+        'redirect_uri': CALLBACK_URL,
+        'code_verifier': code_verifier
+    }
+    headers = {
+        'Content-Type': 'application/x-www-form-urlencoded'
+    }
+    token_resp = requests.post(token_url, data=token_data, headers=headers)
+    if token_resp.status_code != 200:
+        print(f"[red]Failed to exchange code for tokens: {token_resp.text}[/red]")
+        raise typer.Exit(code=1)
+    tokens = token_resp.json()
+    id_token = tokens.get('id_token')
+    if not id_token:
+        print("[red]Login failed: Missing id_token in response.[/red]")
+        raise typer.Exit(code=1)
+    try:
+        # Get Identity ID from Identity Pool
+        identity_client = boto3.client('cognito-identity', region_name=region)
+        id_resp = identity_client.get_id(
+            IdentityPoolId=identity_pool_id,
+            Logins={
+                f'cognito-idp.{region}.amazonaws.com/{user_pool_id}': id_token
+            }
+        )
+        identity_id = id_resp['IdentityId']
+        payload = decode_jwt_payload(id_token)
+        username = payload.get('email', 'Unknown User')
+        # Store configuration
+        config_data = {
+            "username": username,
+            "user_pool_id": user_pool_id,
+            "client_id": client_id,
+            "identity_pool_id": identity_pool_id,
+            "region": region,
+            "identity_id": identity_id,
+            "id_token": id_token,
+            "refresh_token": tokens.get('refresh_token'),
+            "cognito_domain": cognito_domain
+        }
+        config.save(config_data)
+        print(f"[green]Successfully logged in as {username}[/green]")
+        print(f"Identity ID: [blue]{identity_id}[/blue]")
+    except Exception as e:
+        print(f"[red]Failed to get AWS credentials: {str(e)}[/red]")
+        raise typer.Exit(code=1)

chemsift-1.1.0/src/chemsift_cloud/main.py ADDED Viewed

@@ -0,0 +1,22 @@
+import typer
+from chemsift_cloud.login import app as login_app
+from chemsift_cloud.register import app as register_app
+from chemsift_cloud.map import app as map_app
+from chemsift_cloud.query import app as query_app
+from chemsift_cloud.status import app as status_app
+from chemsift_cloud.download import app as download_app
+from chemsift_cloud.list import app as list_app
+app = typer.Typer(no_args_is_help=True)
+app.add_typer(login_app)
+app.add_typer(register_app)
+app.add_typer(map_app)
+app.add_typer(query_app)
+app.add_typer(status_app)
+app.add_typer(download_app)
+app.add_typer(list_app)
+if __name__ == "__main__":
+    app()

chemsift-1.1.0/src/chemsift_cloud/map.py ADDED Viewed

@@ -0,0 +1,102 @@
+import typer
+import uuid
+import datetime
+import json
+from rich import print
+from chemsift_cloud import config
+from chemsift_cloud.register import get_session
+app = typer.Typer()
+def discover_s3_prefixes(s3, bucket, prefix):
+    """List sub-folder names (one level deep) under a given S3 prefix."""
+    paginator = s3.get_paginator('list_objects_v2')
+    results = []
+    for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
+        for folder in page.get('CommonPrefixes', []):
+            name = folder['Prefix'].rstrip('/').split('/')[-1]
+            results.append(name)
+    return results
+@app.command()
+def map(
+    register_job_id: str = typer.Argument(help="Register job ID to run mapping on"),
+    reference_library: str = typer.Argument(help="Name of the reference library in registry_output"),
+    bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
+    table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name"),
+):
+    """
+    Trigger the mapped pipeline against a completed register job.
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    s3 = session.client('s3')
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    identity_id = cfg['identity_id']
+    # Look up source register job
+    resp = table.get_item(Key={'user_id': identity_id, 'job_id': register_job_id})
+    source_item = resp.get('Item')
+    if not source_item:
+        print(f"[red]Register job {register_job_id} not found.[/red]")
+        raise typer.Exit(code=1)
+    if source_item.get('job_type', 'register') != 'register':
+        print(f"[red]Job {register_job_id} is not a register job (type: {source_item.get('job_type')}).[/red]")
+        raise typer.Exit(code=1)
+    if source_item.get('status') != 'SUCCEEDED':
+        print(f"[yellow]Warning: source job status is '{source_item.get('status')}'. Proceeding anyway.[/yellow]")
+    # Discover libraries and feature_spaces from registry_output
+    registry_output_prefix = f"users/{identity_id}/{register_job_id}/registry_output/"
+    libraries = discover_s3_prefixes(s3, bucket, registry_output_prefix)
+    feature_spaces_prefix = f"{registry_output_prefix}feature_spaces/"
+    feature_spaces = discover_s3_prefixes(s3, bucket, feature_spaces_prefix)
+    if not libraries:
+        print(f"[red]No libraries found under {registry_output_prefix}. Is the register job complete?[/red]")
+        raise typer.Exit(code=1)
+    job_id = str(uuid.uuid4())
+    output_prefix = f"users/{identity_id}/{register_job_id}/registry_output"
+    # Register in DynamoDB
+    table.put_item(Item={
+        'user_id': identity_id,
+        'job_id': job_id,
+        'username': cfg['username'],
+        'status': 'SUBMITTED',
+        'job_type': 'map',
+        'source_job_id': register_job_id,
+        'created_at': datetime.datetime.utcnow().isoformat(),
+        's3_path': f"s3://{bucket}/{output_prefix}"
+    })
+    sfn_input = {
+        'user_id': identity_id,
+        'job_id': job_id,
+        's3_bucket': bucket,
+        'output_prefix': output_prefix,
+        'libraries': [{'name': lib} for lib in libraries],
+        'feature_spaces': [{'name': fs} for fs in feature_spaces],
+        'reference_library': reference_library,
+    }
+    # Upload trigger file — EventBridge watches for this key and invokes the map Lambda
+    map_trigger_key = f"users/{identity_id}/{job_id}/map_input/map.json"
+    s3.put_object(
+        Bucket=bucket,
+        Key=map_trigger_key,
+        Body=json.dumps(sfn_input),
+        ContentType="application/json",
+    )
+    print(f"[green]Map job submitted![/green]")
+    print(f"Job ID: [bold]{job_id}[/bold]")
+    print(f"Libraries to map: {libraries}")
+    print(f"Reference library: {reference_library}")

chemsift-1.1.0/src/chemsift_cloud/query.py ADDED Viewed

@@ -0,0 +1,128 @@
+import typer
+import uuid
+import datetime
+import json
+from typing_extensions import Annotated
+from rich import print
+from rich.progress import track
+from pathlib import Path
+from chemsift_cloud import config
+from chemsift_cloud.register import get_session
+app = typer.Typer()
+def discover_s3_prefixes(s3, bucket, prefix):
+    """List sub-folder names (one level deep) under a given S3 prefix."""
+    paginator = s3.get_paginator('list_objects_v2')
+    results = []
+    for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
+        for folder in page.get('CommonPrefixes', []):
+            name = folder['Prefix'].rstrip('/').split('/')[-1]
+            results.append(name)
+    return results
+@app.command()
+def query(
+    register_job_id: str = typer.Argument(help="Register job ID whose registry_output to query"),
+    molecules: Annotated[Path, typer.Option(
+        "--molecules", help="Local path to query molecules CSV",
+        exists=True, file_okay=True, dir_okay=False, resolve_path=True
+    )] = ...,
+    yaml: Annotated[Path, typer.Option(
+        "--yaml", help="Local path to query YAML config",
+        exists=True, file_okay=True, dir_okay=False, resolve_path=True
+    )] = ...,
+    bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
+    table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name"),
+):
+    """
+    Submit a query against a completed register job's registry_output.
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    s3 = session.client('s3')
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    identity_id = cfg['identity_id']
+    # Look up source register job
+    resp = table.get_item(Key={'user_id': identity_id, 'job_id': register_job_id})
+    source_item = resp.get('Item')
+    if not source_item:
+        print(f"[red]Register job {register_job_id} not found.[/red]")
+        raise typer.Exit(code=1)
+    if source_item.get('job_type', 'register') != 'register':
+        print(f"[red]Job {register_job_id} is not a register job (type: {source_item.get('job_type')}).[/red]")
+        raise typer.Exit(code=1)
+    if source_item.get('status') != 'SUCCEEDED':
+        print(f"[yellow]Warning: source job status is '{source_item.get('status')}'. Proceeding anyway.[/yellow]")
+    job_id = str(uuid.uuid4())
+    # Upload query inputs to S3
+    molecules_key = f"users/{identity_id}/{job_id}/query_input/molecules.csv"
+    yaml_key = f"users/{identity_id}/{job_id}/query_input/query.yml"
+    print(f"Uploading query inputs for job [blue]{job_id}[/blue]...")
+    for local_path, s3_key in track(
+        [(str(molecules), molecules_key), (str(yaml), yaml_key)],
+        description="Uploading..."
+    ):
+        s3.upload_file(local_path, bucket, s3_key)
+    # Discover feature_spaces from local query YAML
+    import yaml as pyyaml
+    with open(yaml, 'r') as f:
+        query_config = pyyaml.safe_load(f)
+    rules = query_config.get('rules', [])
+    feature_spaces = list(set([rule['feature_space'] for rule in rules]))
+    registry_output_prefix = f"users/{identity_id}/{register_job_id}/registry_output"
+    registry_input_prefix = f"users/{identity_id}/{register_job_id}/registry_input"
+    print(f"Discovered feature spaces from query YAML: {feature_spaces}")
+    output_prefix = f"users/{identity_id}/{job_id}/query_output"
+    # Register in DynamoDB
+    table.put_item(Item={
+        'user_id': identity_id,
+        'job_id': job_id,
+        'username': cfg['username'],
+        'status': 'SUBMITTED',
+        'job_type': 'query',
+        'source_job_id': register_job_id,
+        'created_at': datetime.datetime.utcnow().isoformat(),
+        's3_path': f"s3://{bucket}/{output_prefix}"
+    })
+    sfn_input = {
+        'user_id': identity_id,
+        'job_id': job_id,
+        's3_bucket': bucket,
+        'registry_input_prefix': registry_input_prefix,
+        'registry_output_prefix': registry_output_prefix,
+        'molecules_key': molecules_key,
+        'yaml_key': yaml_key,
+        'output_prefix': output_prefix,
+        'feature_spaces': [{'name': fs} for fs in feature_spaces],
+    }
+    # Upload trigger file last — EventBridge watches for this key and invokes the query Lambda
+    query_trigger_key = f"users/{identity_id}/{job_id}/query_input/query.json"
+    s3.put_object(
+        Bucket=bucket,
+        Key=query_trigger_key,
+        Body=json.dumps(sfn_input),
+        ContentType="application/json",
+    )
+    print(f"[green]Query job submitted![/green]")
+    print(f"Job ID: [bold]{job_id}[/bold]")
+    print(f"Feature spaces found: {feature_spaces}")

chemsift-1.1.0/src/chemsift_cloud/register.py ADDED Viewed

@@ -0,0 +1,111 @@
+import typer
+import boto3
+from rich import print
+from rich.progress import track
+from chemsift_cloud import config
+from pathlib import Path
+import uuid
+import datetime
+app = typer.Typer()
+def get_session(cfg):
+    """Get temporary AWS credentials from Identity Pool"""
+    client = boto3.client('cognito-identity', region_name=cfg['region'])
+    resp = client.get_credentials_for_identity(
+        IdentityId=cfg['identity_id'],
+        Logins={
+            f"cognito-idp.{cfg['region']}.amazonaws.com/{cfg['user_pool_id']}": cfg['id_token']
+        }
+    )
+    creds = resp['Credentials']
+    return boto3.Session(
+        aws_access_key_id=creds['AccessKeyId'],
+        aws_secret_access_key=creds['SecretKey'],
+        aws_session_token=creds['SessionToken'],
+        region_name=cfg['region']
+    )
+@app.command()
+def register(
+    input_dir: Path = typer.Argument(
+        help="Directory to submit for processing",
+        exists=True,
+        file_okay=False,
+        resolve_path=True
+    ),
+    bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
+    table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
+):
+    """
+    Submit input (folder) to S3 and register job in DynamoDB
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    s3 = session.client('s3')
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    job_id = str(uuid.uuid4())
+    identity_id = cfg['identity_id']
+    username = cfg['username']
+    # Path: users/{identity_id}/{job_id}/...
+    base_path = f"users/{identity_id}/{job_id}"
+    input_path = f"{base_path}/registry_input"
+    print(f"Registering job [blue]{job_id}[/blue] for user [blue]{username}[/blue]...")
+    # Auto-detect if user pointed to parent of registry_input
+    if (input_dir / "registry_input").exists() and (input_dir / "registry_input").is_dir():
+        print(f"[yellow]Detected 'registry_input' subfolder. using {input_dir}/registry_input as root.[/yellow]")
+        upload_root = input_dir / "registry_input"
+    else:
+        upload_root = input_dir
+    # Validate structure
+    if not (upload_root / "libraries").exists():
+         print(f"[red]Error: 'libraries' folder not found in {upload_root}. Structure should be libraries/ and feature_spaces/.[/red]")
+         raise typer.Exit(code=1)
+    files = [
+        p for p in upload_root.rglob("*")
+        if p.is_file() and p.name != ".DS_Store"
+    ]
+    # 1. Upload files to S3
+    for file_path in track(files, description="Uploading files..."):
+        relative_path = file_path.relative_to(upload_root)
+        s3_key = f"{input_path}/{relative_path}"
+        with open(file_path, 'rb') as f:
+            s3.put_object(
+                Bucket=bucket,
+                Key=s3_key,
+                Body=f
+            )
+    # 2. Upload trigger file (input.json) if it doesn't exist in the list
+    # Assuming input_dir *is* the registry_input content
+    trigger_key = f"{input_path}/input.json"
+    s3.put_object(
+        Bucket=bucket,
+        Key=trigger_key,
+        Body="{}" # Simple trigger
+    )
+    # 3. Register job in DynamoDB
+    table.put_item(
+        Item={
+            'user_id': identity_id,
+            'job_id': job_id,
+            'username': username,
+            'status': 'SUBMITTED',
+            'job_type': 'register',
+            'created_at': datetime.datetime.utcnow().isoformat(),
+            's3_path': f"s3://{bucket}/{base_path}"
+        }
+    )
+    print("[green]Job registered successfully![/green]")
+    print(f"Job ID: [bold]{job_id}[/bold]")

chemsift-1.1.0/src/chemsift_cloud/status.py ADDED Viewed

@@ -0,0 +1,50 @@
+import typer
+from typing_extensions import Annotated
+import boto3
+from rich import print
+from rich.panel import Panel
+from chemsift_cloud import config
+from chemsift_cloud.register import get_session
+app = typer.Typer()
+@app.command()
+def status(
+    job_id: Annotated[str, typer.Argument(help="ID of the job to get the status of")],
+    table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
+):
+    """
+    Check status of a job ID
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    resp = table.get_item(
+        Key={
+            'user_id': cfg['identity_id'],
+            'job_id': job_id
+        }
+    )
+    item = resp.get('Item')
+    if not item:
+        print(f"[red]Job {job_id} not found.[/red]")
+        return
+    source_job_line = ""
+    if item.get('source_job_id'):
+        source_job_line = f"\n[bold]Source Job:[/bold] {item['source_job_id']}"
+    print(Panel(
+        f"[bold]Job ID:[/bold] {item['job_id']}\n"
+        f"[bold]Type:[/bold] {item.get('job_type', 'register')}\n"
+        f"[bold]Status:[/bold] {item.get('status', 'N/A')}"
+        f"{source_job_line}\n"
+        f"[bold]Created At:[/bold] {item.get('created_at', 'N/A')}\n"
+        f"[bold]S3 Path:[/bold] {item.get('s3_path', 'N/A')}",
+        title=f"Job Status: {job_id}",
+        expand=False
+    ))

chemsift-1.1.0/src/chemsift_cloud/submit.py ADDED Viewed

@@ -0,0 +1,110 @@
+import typer
+import boto3
+from rich import print
+from rich.progress import track
+from chemsift_cloud import config
+from pathlib import Path
+import uuid
+import datetime
+app = typer.Typer()
+def get_session(cfg):
+    """Get temporary AWS credentials from Identity Pool"""
+    client = boto3.client('cognito-identity', region_name=cfg['region'])
+    resp = client.get_credentials_for_identity(
+        IdentityId=cfg['identity_id'],
+        Logins={
+            f"cognito-idp.{cfg['region']}.amazonaws.com/{cfg['user_pool_id']}": cfg['id_token']
+        }
+    )
+    creds = resp['Credentials']
+    return boto3.Session(
+        aws_access_key_id=creds['AccessKeyId'],
+        aws_secret_access_key=creds['SecretKey'],
+        aws_session_token=creds['SessionToken'],
+        region_name=cfg['region']
+    )
+@app.command()
+def submit(
+    input_dir: Path = typer.Argument(
+        help="Directory to submit for processing",
+        exists=True,
+        file_okay=False,
+        resolve_path=True
+    ),
+    bucket: str = typer.Option("chemsift-register-pipeline", help="S3 Bucket name"),
+    table_name: str = typer.Option("chemsift-jobs", help="DynamoDB Jobs table name")
+):
+    """
+    Submit input (folder) to S3 and register job in DynamoDB
+    """
+    cfg = config.load()
+    session = get_session(cfg)
+    s3 = session.client('s3')
+    db = session.resource('dynamodb')
+    table = db.Table(table_name)
+    job_id = str(uuid.uuid4())
+    identity_id = cfg['identity_id']
+    username = cfg['username']
+    # Path: users/{identity_id}/{job_id}/...
+    base_path = f"users/{identity_id}/{job_id}"
+    input_path = f"{base_path}/registry_input"
+    print(f"Submitting job [blue]{job_id}[/blue] for user [blue]{username}[/blue]...")
+    # Auto-detect if user pointed to parent of registry_input
+    if (input_dir / "registry_input").exists() and (input_dir / "registry_input").is_dir():
+        print(f"[yellow]Detected 'registry_input' subfolder. using {input_dir}/registry_input as root.[/yellow]")
+        upload_root = input_dir / "registry_input"
+    else:
+        upload_root = input_dir
+    # Validate structure
+    if not (upload_root / "libraries").exists():
+         print(f"[red]Error: 'libraries' folder not found in {upload_root}. Structure should be libraries/ and feature_spaces/.[/red]")
+         raise typer.Exit(code=1)
+    files = [
+        p for p in upload_root.rglob("*")
+        if p.is_file() and p.name != ".DS_Store"
+    ]
+    # 1. Upload files to S3
+    for file_path in track(files, description="Uploading files..."):
+        relative_path = file_path.relative_to(upload_root)
+        s3_key = f"{input_path}/{relative_path}"
+        with open(file_path, 'rb') as f:
+            s3.put_object(
+                Bucket=bucket,
+                Key=s3_key,
+                Body=f
+            )
+    # 2. Upload trigger file (input.json) if it doesn't exist in the list
+    # Assuming input_dir *is* the registry_input content
+    trigger_key = f"{input_path}/input.json"
+    s3.put_object(
+        Bucket=bucket,
+        Key=trigger_key,
+        Body="{}" # Simple trigger
+    )
+    # 3. Register job in DynamoDB
+    table.put_item(
+        Item={
+            'user_id': identity_id,
+            'job_id': job_id,
+            'username': username,
+            'status': 'SUBMITTED',
+            'created_at': datetime.datetime.utcnow().isoformat(),
+            's3_path': f"s3://{bucket}/{base_path}"
+        }
+    )
+    print("[green]Job submitted successfully![/green]")
+    print(f"Job ID: [bold]{job_id}[/bold]")