chalk-remote-call-python 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. chalk_remote_call_python-0.0.0/PKG-INFO +193 -0
  2. chalk_remote_call_python-0.0.0/README.md +160 -0
  3. chalk_remote_call_python-0.0.0/chalk_remote_call/__init__.py +6 -0
  4. chalk_remote_call_python-0.0.0/chalk_remote_call/__main__.py +3 -0
  5. chalk_remote_call_python-0.0.0/chalk_remote_call/_gen/__init__.py +0 -0
  6. chalk_remote_call_python-0.0.0/chalk_remote_call/_gen/chalk/__init__.py +0 -0
  7. chalk_remote_call_python-0.0.0/chalk_remote_call/_gen/chalk/runtime/__init__.py +0 -0
  8. chalk_remote_call_python-0.0.0/chalk_remote_call/_gen/chalk/runtime/v1/__init__.py +0 -0
  9. chalk_remote_call_python-0.0.0/chalk_remote_call/_gen/chalk/runtime/v1/remote_python_call_pb2.py +31 -0
  10. chalk_remote_call_python-0.0.0/chalk_remote_call/_gen/chalk/runtime/v1/remote_python_call_pb2_grpc.py +66 -0
  11. chalk_remote_call_python-0.0.0/chalk_remote_call/_native.pyi +11 -0
  12. chalk_remote_call_python-0.0.0/chalk_remote_call/arrow_utils.py +17 -0
  13. chalk_remote_call_python-0.0.0/chalk_remote_call/cli.py +86 -0
  14. chalk_remote_call_python-0.0.0/chalk_remote_call/handler_loader.py +46 -0
  15. chalk_remote_call_python-0.0.0/chalk_remote_call/input_transform.py +32 -0
  16. chalk_remote_call_python-0.0.0/chalk_remote_call/server.py +44 -0
  17. chalk_remote_call_python-0.0.0/chalk_remote_call/servicer.py +82 -0
  18. chalk_remote_call_python-0.0.0/chalk_remote_call_python.egg-info/PKG-INFO +193 -0
  19. chalk_remote_call_python-0.0.0/chalk_remote_call_python.egg-info/SOURCES.txt +29 -0
  20. chalk_remote_call_python-0.0.0/chalk_remote_call_python.egg-info/dependency_links.txt +1 -0
  21. chalk_remote_call_python-0.0.0/chalk_remote_call_python.egg-info/entry_points.txt +2 -0
  22. chalk_remote_call_python-0.0.0/chalk_remote_call_python.egg-info/requires.txt +11 -0
  23. chalk_remote_call_python-0.0.0/chalk_remote_call_python.egg-info/top_level.txt +1 -0
  24. chalk_remote_call_python-0.0.0/pyproject.toml +96 -0
  25. chalk_remote_call_python-0.0.0/setup.cfg +4 -0
  26. chalk_remote_call_python-0.0.0/setup.py +12 -0
  27. chalk_remote_call_python-0.0.0/tests/test_arrow_utils.py +37 -0
  28. chalk_remote_call_python-0.0.0/tests/test_end_to_end.py +113 -0
  29. chalk_remote_call_python-0.0.0/tests/test_error_paths.py +141 -0
  30. chalk_remote_call_python-0.0.0/tests/test_input_transform.py +62 -0
  31. chalk_remote_call_python-0.0.0/tests/test_servicer.py +121 -0
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: chalk-remote-call-python
3
+ Version: 0.0.0
4
+ Summary: Chalk remote call Python runtime interface client
5
+ Author: Chalk AI, Inc.
6
+ Project-URL: Homepage, https://chalk.ai
7
+ Project-URL: Documentation, https://docs.chalk.ai
8
+ Project-URL: Changelog, https://docs.chalk.ai/docs/changelog
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python
15
+ Classifier: Typing :: Typed
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
18
+ Classifier: Topic :: Software Development :: Code Generators
19
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: <3.14,>=3.10
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: pyarrow>=14.0.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: setuptools-rust>=1.7; extra == "dev"
26
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
27
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
28
+ Requires-Dist: grpcio>=1.60.0; extra == "dev"
29
+ Requires-Dist: grpcio-health-checking>=1.60.0; extra == "dev"
30
+ Requires-Dist: grpcio-reflection>=1.60.0; extra == "dev"
31
+ Requires-Dist: protobuf>=4.25.0; extra == "dev"
32
+ Requires-Dist: grpcio-tools>=1.60.0; extra == "dev"
33
+
34
+ # chalk-remote-call-python
35
+
36
+ A Python runtime interface for Chalk's RemoteCallService. This package provides a gRPC server backed by a Rust implementation (tonic + PyO3) that lets you define a `handler(event, context)` function to process incoming Arrow-serialized requests.
37
+
38
+ The server receives Arrow IPC record batches over a bidirectional gRPC stream, transforms them into a Python dict, invokes your handler, and streams the results back as Arrow IPC.
39
+
40
+ ## Requirements
41
+
42
+ - Python >= 3.10
43
+ - Rust toolchain (for building the native extension)
44
+ - `pyarrow >= 14.0.0`
45
+
46
+ ## Usage
47
+
48
+ ### 1. Write a handler
49
+
50
+ Create a Python module with a `handler` function:
51
+
52
+ ```python
53
+ # my_handler.py
54
+ import pyarrow as pa
55
+ import pyarrow.compute as pc
56
+
57
+
58
+ def on_startup():
59
+ """Optional -- runs once before the server starts accepting requests."""
60
+ print("Loading model weights...")
61
+
62
+
63
+ def handler(event: dict[str, pa.Array], context: dict) -> pa.Array:
64
+ """Called for each incoming request.
65
+
66
+ Args:
67
+ event: dict mapping column names to pyarrow.Array values.
68
+ context: dict with request metadata:
69
+ - "peer": remote address string
70
+ - "metadata": dict of gRPC headers
71
+
72
+ Returns:
73
+ A pyarrow.Array, pyarrow.RecordBatch, pyarrow.Table, list, dict, or scalar.
74
+ The framework auto-converts the result to Arrow IPC for the response.
75
+ """
76
+ return pc.multiply(event["x"], event["y"])
77
+ ```
78
+
79
+ ### 2. Start the server
80
+
81
+ ```bash
82
+ chalk-remote-call --handler my_handler.handler
83
+ ```
84
+
85
+ Or via `python -m`:
86
+
87
+ ```bash
88
+ python -m chalk_remote_call --handler my_handler.handler
89
+ ```
90
+
91
+ ### CLI options
92
+
93
+ | Flag | Env var | Default | Description |
94
+ |------|---------|---------|-------------|
95
+ | `--handler` | | *(required)* | Dotted path to handler function |
96
+ | `--port` | `CHALK_REMOTE_CALL_PORT` | `6666` | Port to listen on |
97
+ | `--host` | `CHALK_REMOTE_CALL_HOST` | `[::]` | Host to bind to |
98
+ | `--workers` | `CHALK_REMOTE_CALL_WORKERS` | `10` | Tokio runtime worker threads |
99
+ | `--on-startup` | | | Dotted path to a startup function |
100
+ | `--log-level` | | `INFO` | `DEBUG`, `INFO`, `WARNING`, or `ERROR` |
101
+
102
+ ### Environment variables
103
+
104
+ | Variable | Description |
105
+ |----------|-------------|
106
+ | `CHALK_INPUT_ARGS` | Comma-separated list of column names (e.g. `x,y,z`). Renames incoming RecordBatch columns by index. If unset, the original column names are used. |
107
+
108
+ ### 3. Docker image
109
+
110
+ ```dockerfile
111
+ FROM python:3.11-slim
112
+
113
+ WORKDIR /app
114
+
115
+ RUN pip install chalk-remote-call-python
116
+
117
+ # Copy your handler code
118
+ COPY my_handler.py .
119
+
120
+ # The server listens on port 6666 by default
121
+ EXPOSE 6666
122
+
123
+ ENTRYPOINT ["chalk-remote-call", "--handler", "my_handler.handler"]
124
+ ```
125
+
126
+ Build and run:
127
+
128
+ ```bash
129
+ docker build -t my-chalk-handler .
130
+ docker run -p 6666:6666 -e CHALK_INPUT_ARGS="x,y" my-chalk-handler
131
+ ```
132
+
133
+ ### Programmatic usage
134
+
135
+ You can also start the server from Python:
136
+
137
+ ```python
138
+ from chalk_remote_call import serve
139
+
140
+ def handler(event, context):
141
+ return list(event.values())[0]
142
+
143
+ serve(handler=handler, port=8080)
144
+ ```
145
+
146
+ ## Local Testing
147
+
148
+ Use `grpcurl` to verify the server is running:
149
+
150
+ ```bash
151
+ # Check health
152
+ grpcurl -plaintext localhost:6666 grpc.health.v1.Health/Check
153
+
154
+ # List services via reflection
155
+ grpcurl -plaintext localhost:6666 list
156
+ ```
157
+
158
+ ## Architecture
159
+
160
+ The server uses a Rust backend via PyO3:
161
+
162
+ - **tonic** — gRPC server with health checking and reflection
163
+ - **prost** — protobuf message handling
164
+ - **arrow-rs** — Arrow IPC validation
165
+ - **PyO3** — FFI bridge to call Python handler functions
166
+
167
+ The Rust server runs on a tokio async runtime. When a request arrives, Arrow IPC bytes are passed to Python via PyO3 (`spawn_blocking` + GIL acquisition), where the handler is invoked. Results are passed back as IPC bytes.
168
+
169
+ ## Development
170
+
171
+ ### Setup
172
+
173
+ ```bash
174
+ cd chalk-remote-call-python
175
+ uv venv --python 3.11
176
+ uv pip install -e ".[dev]"
177
+ ```
178
+
179
+ ### Running tests
180
+
181
+ ```bash
182
+ pytest tests/ -v
183
+ ```
184
+
185
+ ### Rust code
186
+
187
+ Rust source lives in `chalk-remote-call-rs/`:
188
+
189
+ To check the Rust code independently:
190
+
191
+ ```bash
192
+ PYO3_PYTHON=python3 cargo check --manifest-path chalk-remote-call-rs/chalk-remote-call-server/Cargo.toml
193
+ ```
@@ -0,0 +1,160 @@
1
+ # chalk-remote-call-python
2
+
3
+ A Python runtime interface for Chalk's RemoteCallService. This package provides a gRPC server backed by a Rust implementation (tonic + PyO3) that lets you define a `handler(event, context)` function to process incoming Arrow-serialized requests.
4
+
5
+ The server receives Arrow IPC record batches over a bidirectional gRPC stream, transforms them into a Python dict, invokes your handler, and streams the results back as Arrow IPC.
6
+
7
+ ## Requirements
8
+
9
+ - Python >= 3.10
10
+ - Rust toolchain (for building the native extension)
11
+ - `pyarrow >= 14.0.0`
12
+
13
+ ## Usage
14
+
15
+ ### 1. Write a handler
16
+
17
+ Create a Python module with a `handler` function:
18
+
19
+ ```python
20
+ # my_handler.py
21
+ import pyarrow as pa
22
+ import pyarrow.compute as pc
23
+
24
+
25
+ def on_startup():
26
+ """Optional -- runs once before the server starts accepting requests."""
27
+ print("Loading model weights...")
28
+
29
+
30
+ def handler(event: dict[str, pa.Array], context: dict) -> pa.Array:
31
+ """Called for each incoming request.
32
+
33
+ Args:
34
+ event: dict mapping column names to pyarrow.Array values.
35
+ context: dict with request metadata:
36
+ - "peer": remote address string
37
+ - "metadata": dict of gRPC headers
38
+
39
+ Returns:
40
+ A pyarrow.Array, pyarrow.RecordBatch, pyarrow.Table, list, dict, or scalar.
41
+ The framework auto-converts the result to Arrow IPC for the response.
42
+ """
43
+ return pc.multiply(event["x"], event["y"])
44
+ ```
45
+
46
+ ### 2. Start the server
47
+
48
+ ```bash
49
+ chalk-remote-call --handler my_handler.handler
50
+ ```
51
+
52
+ Or via `python -m`:
53
+
54
+ ```bash
55
+ python -m chalk_remote_call --handler my_handler.handler
56
+ ```
57
+
58
+ ### CLI options
59
+
60
+ | Flag | Env var | Default | Description |
61
+ |------|---------|---------|-------------|
62
+ | `--handler` | | *(required)* | Dotted path to handler function |
63
+ | `--port` | `CHALK_REMOTE_CALL_PORT` | `6666` | Port to listen on |
64
+ | `--host` | `CHALK_REMOTE_CALL_HOST` | `[::]` | Host to bind to |
65
+ | `--workers` | `CHALK_REMOTE_CALL_WORKERS` | `10` | Tokio runtime worker threads |
66
+ | `--on-startup` | | | Dotted path to a startup function |
67
+ | `--log-level` | | `INFO` | `DEBUG`, `INFO`, `WARNING`, or `ERROR` |
68
+
69
+ ### Environment variables
70
+
71
+ | Variable | Description |
72
+ |----------|-------------|
73
+ | `CHALK_INPUT_ARGS` | Comma-separated list of column names (e.g. `x,y,z`). Renames incoming RecordBatch columns by index. If unset, the original column names are used. |
74
+
75
+ ### 3. Docker image
76
+
77
+ ```dockerfile
78
+ FROM python:3.11-slim
79
+
80
+ WORKDIR /app
81
+
82
+ RUN pip install chalk-remote-call-python
83
+
84
+ # Copy your handler code
85
+ COPY my_handler.py .
86
+
87
+ # The server listens on port 6666 by default
88
+ EXPOSE 6666
89
+
90
+ ENTRYPOINT ["chalk-remote-call", "--handler", "my_handler.handler"]
91
+ ```
92
+
93
+ Build and run:
94
+
95
+ ```bash
96
+ docker build -t my-chalk-handler .
97
+ docker run -p 6666:6666 -e CHALK_INPUT_ARGS="x,y" my-chalk-handler
98
+ ```
99
+
100
+ ### Programmatic usage
101
+
102
+ You can also start the server from Python:
103
+
104
+ ```python
105
+ from chalk_remote_call import serve
106
+
107
+ def handler(event, context):
108
+ return list(event.values())[0]
109
+
110
+ serve(handler=handler, port=8080)
111
+ ```
112
+
113
+ ## Local Testing
114
+
115
+ Use `grpcurl` to verify the server is running:
116
+
117
+ ```bash
118
+ # Check health
119
+ grpcurl -plaintext localhost:6666 grpc.health.v1.Health/Check
120
+
121
+ # List services via reflection
122
+ grpcurl -plaintext localhost:6666 list
123
+ ```
124
+
125
+ ## Architecture
126
+
127
+ The server uses a Rust backend via PyO3:
128
+
129
+ - **tonic** — gRPC server with health checking and reflection
130
+ - **prost** — protobuf message handling
131
+ - **arrow-rs** — Arrow IPC validation
132
+ - **PyO3** — FFI bridge to call Python handler functions
133
+
134
+ The Rust server runs on a tokio async runtime. When a request arrives, Arrow IPC bytes are passed to Python via PyO3 (`spawn_blocking` + GIL acquisition), where the handler is invoked. Results are passed back as IPC bytes.
135
+
136
+ ## Development
137
+
138
+ ### Setup
139
+
140
+ ```bash
141
+ cd chalk-remote-call-python
142
+ uv venv --python 3.11
143
+ uv pip install -e ".[dev]"
144
+ ```
145
+
146
+ ### Running tests
147
+
148
+ ```bash
149
+ pytest tests/ -v
150
+ ```
151
+
152
+ ### Rust code
153
+
154
+ Rust source lives in `chalk-remote-call-rs/`:
155
+
156
+ To check the Rust code independently:
157
+
158
+ ```bash
159
+ PYO3_PYTHON=python3 cargo check --manifest-path chalk-remote-call-rs/chalk-remote-call-server/Cargo.toml
160
+ ```
@@ -0,0 +1,6 @@
1
+ from importlib.metadata import version
2
+
3
+ from chalk_remote_call.server import serve
4
+
5
+ __version__ = version("chalk-remote-call-python")
6
+ __all__ = ["serve"]
@@ -0,0 +1,3 @@
1
+ from chalk_remote_call.cli import main
2
+
3
+ main()
@@ -0,0 +1,31 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: chalk/runtime/v1/remote_python_call.proto
4
+ # Protobuf Python Version: 4.25.3
5
+ """Generated protocol buffer code."""
6
+ from google.protobuf import descriptor as _descriptor
7
+ from google.protobuf import descriptor_pool as _descriptor_pool
8
+ from google.protobuf import symbol_database as _symbol_database
9
+ from google.protobuf.internal import builder as _builder
10
+ # @@protoc_insertion_point(imports)
11
+
12
+ _sym_db = _symbol_database.Default()
13
+
14
+
15
+
16
+
17
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n)chalk/runtime/v1/remote_python_call.proto\x12\x10\x63halk.runtime.v1\"P\n\x13\x43\x61llFunctionRequest\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12%\n\x0e\x66\x65\x61ther_stream\x18\x02 \x01(\x0cR\rfeatherStream\"=\n\x14\x43\x61llFunctionResponse\x12%\n\x0e\x66\x65\x61ther_stream\x18\x01 \x01(\x0cR\rfeatherStream2x\n\x11RemoteCallService\x12\x63\n\x0c\x43\x61llFunction\x12%.chalk.runtime.v1.CallFunctionRequest\x1a&.chalk.runtime.v1.CallFunctionResponse\"\x00(\x01\x30\x01\x42\x8f\x01\n\x14\x63om.chalk.runtime.v1B\x15RemotePythonCallProtoP\x01\xa2\x02\x03\x43RX\xaa\x02\x10\x43halk.Runtime.V1\xca\x02\x10\x43halk\\Runtime\\V1\xe2\x02\x1c\x43halk\\Runtime\\V1\\GPBMetadata\xea\x02\x12\x43halk::Runtime::V1b\x06proto3')
18
+
19
+ _globals = globals()
20
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
21
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chalk.runtime.v1.remote_python_call_pb2', _globals)
22
+ if _descriptor._USE_C_DESCRIPTORS == False:
23
+ _globals['DESCRIPTOR']._options = None
24
+ _globals['DESCRIPTOR']._serialized_options = b'\n\024com.chalk.runtime.v1B\025RemotePythonCallProtoP\001\242\002\003CRX\252\002\020Chalk.Runtime.V1\312\002\020Chalk\\Runtime\\V1\342\002\034Chalk\\Runtime\\V1\\GPBMetadata\352\002\022Chalk::Runtime::V1'
25
+ _globals['_CALLFUNCTIONREQUEST']._serialized_start=63
26
+ _globals['_CALLFUNCTIONREQUEST']._serialized_end=143
27
+ _globals['_CALLFUNCTIONRESPONSE']._serialized_start=145
28
+ _globals['_CALLFUNCTIONRESPONSE']._serialized_end=206
29
+ _globals['_REMOTECALLSERVICE']._serialized_start=208
30
+ _globals['_REMOTECALLSERVICE']._serialized_end=328
31
+ # @@protoc_insertion_point(module_scope)
@@ -0,0 +1,66 @@
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+
5
+ from chalk_remote_call._gen.chalk.runtime.v1 import remote_python_call_pb2 as chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2
6
+
7
+
8
+ class RemoteCallServiceStub(object):
9
+ """Missing associated documentation comment in .proto file."""
10
+
11
+ def __init__(self, channel):
12
+ """Constructor.
13
+
14
+ Args:
15
+ channel: A grpc.Channel.
16
+ """
17
+ self.CallFunction = channel.stream_stream(
18
+ '/chalk.runtime.v1.RemoteCallService/CallFunction',
19
+ request_serializer=chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2.CallFunctionRequest.SerializeToString,
20
+ response_deserializer=chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2.CallFunctionResponse.FromString,
21
+ )
22
+
23
+
24
+ class RemoteCallServiceServicer(object):
25
+ """Missing associated documentation comment in .proto file."""
26
+
27
+ def CallFunction(self, request_iterator, context):
28
+ """Missing associated documentation comment in .proto file."""
29
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
30
+ context.set_details('Method not implemented!')
31
+ raise NotImplementedError('Method not implemented!')
32
+
33
+
34
+ def add_RemoteCallServiceServicer_to_server(servicer, server):
35
+ rpc_method_handlers = {
36
+ 'CallFunction': grpc.stream_stream_rpc_method_handler(
37
+ servicer.CallFunction,
38
+ request_deserializer=chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2.CallFunctionRequest.FromString,
39
+ response_serializer=chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2.CallFunctionResponse.SerializeToString,
40
+ ),
41
+ }
42
+ generic_handler = grpc.method_handlers_generic_handler(
43
+ 'chalk.runtime.v1.RemoteCallService', rpc_method_handlers)
44
+ server.add_generic_rpc_handlers((generic_handler,))
45
+
46
+
47
+ # This class is part of an EXPERIMENTAL API.
48
+ class RemoteCallService(object):
49
+ """Missing associated documentation comment in .proto file."""
50
+
51
+ @staticmethod
52
+ def CallFunction(request_iterator,
53
+ target,
54
+ options=(),
55
+ channel_credentials=None,
56
+ call_credentials=None,
57
+ insecure=False,
58
+ compression=None,
59
+ wait_for_ready=None,
60
+ timeout=None,
61
+ metadata=None):
62
+ return grpc.experimental.stream_stream(request_iterator, target, '/chalk.runtime.v1.RemoteCallService/CallFunction',
63
+ chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2.CallFunctionRequest.SerializeToString,
64
+ chalk_dot_runtime_dot_v1_dot_remote__python__call__pb2.CallFunctionResponse.FromString,
65
+ options, channel_credentials,
66
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@@ -0,0 +1,11 @@
1
+ from collections.abc import Callable
2
+ from typing import Any
3
+
4
+ def start_server(
5
+ handler: Callable[..., Any],
6
+ process_fn: Callable[[bytes, Callable[..., Any], list[str] | None, dict[str, Any]], list[bytes]],
7
+ host: str,
8
+ port: int,
9
+ workers: int,
10
+ arg_names: list[str] | None = None,
11
+ ) -> None: ...
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ import pyarrow as pa
4
+ import pyarrow.ipc as ipc
5
+
6
+
7
+ def decode_ipc_stream(data: bytes) -> list[pa.RecordBatch]:
8
+ """Decode Arrow IPC stream bytes into a list of RecordBatches."""
9
+ return list(ipc.open_stream(data))
10
+
11
+
12
+ def encode_record_batch(batch: pa.RecordBatch) -> bytes:
13
+ """Encode a RecordBatch as Arrow IPC stream bytes."""
14
+ sink = pa.BufferOutputStream()
15
+ with ipc.new_stream(sink, batch.schema) as writer:
16
+ writer.write_batch(batch)
17
+ return sink.getvalue().to_pybytes()
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ import os
6
+ import sys
7
+
8
+ from chalk_remote_call.handler_loader import load_handler, load_startup_function
9
+ from chalk_remote_call.input_transform import parse_input_args
10
+ from chalk_remote_call.server import serve
11
+
12
+
13
+ def main() -> None:
14
+ parser = argparse.ArgumentParser(
15
+ prog="chalk-remote-call",
16
+ description="Start a gRPC server implementing the Chalk RemoteCallService.",
17
+ )
18
+ parser.add_argument(
19
+ "--handler",
20
+ required=True,
21
+ help="Dotted path to the handler function (e.g. my_module.handler)",
22
+ )
23
+ parser.add_argument(
24
+ "--port",
25
+ type=int,
26
+ default=int(os.environ.get("CHALK_REMOTE_CALL_PORT", "6666")),
27
+ help="Port to listen on (default: 6666, env: CHALK_REMOTE_CALL_PORT)",
28
+ )
29
+ parser.add_argument(
30
+ "--host",
31
+ default=os.environ.get("CHALK_REMOTE_CALL_HOST", "[::]"),
32
+ help="Host to bind to (default: [::], env: CHALK_REMOTE_CALL_HOST)",
33
+ )
34
+ parser.add_argument(
35
+ "--workers",
36
+ type=int,
37
+ default=int(os.environ.get("CHALK_REMOTE_CALL_WORKERS", "10")),
38
+ help="Number of worker threads (default: 10, env: CHALK_REMOTE_CALL_WORKERS)",
39
+ )
40
+ parser.add_argument(
41
+ "--on-startup",
42
+ default=None,
43
+ help="Dotted path to a startup function (e.g. my_module.setup)",
44
+ )
45
+ parser.add_argument(
46
+ "--log-level",
47
+ default="INFO",
48
+ choices=["DEBUG", "INFO", "WARNING", "ERROR"],
49
+ help="Log level (default: INFO)",
50
+ )
51
+ args = parser.parse_args()
52
+
53
+ logging.basicConfig(
54
+ level=getattr(logging, args.log_level),
55
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
56
+ )
57
+
58
+ # Load handler
59
+ try:
60
+ handler, auto_startup = load_handler(args.handler)
61
+ except Exception as e:
62
+ print(f"Error loading handler: {e}", file=sys.stderr)
63
+ sys.exit(1)
64
+
65
+ # Determine startup function
66
+ on_startup = None
67
+ if args.on_startup:
68
+ try:
69
+ on_startup = load_startup_function(args.on_startup)
70
+ except Exception as e:
71
+ print(f"Error loading startup function: {e}", file=sys.stderr)
72
+ sys.exit(1)
73
+ elif auto_startup is not None:
74
+ on_startup = auto_startup
75
+
76
+ # Parse input args
77
+ arg_names = parse_input_args()
78
+
79
+ serve(
80
+ handler=handler,
81
+ host=args.host,
82
+ port=args.port,
83
+ workers=args.workers,
84
+ on_startup=on_startup,
85
+ arg_names=arg_names,
86
+ )
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ from collections.abc import Callable
5
+ from typing import Any
6
+
7
+
8
+ def load_handler(dotted_path: str) -> tuple[Callable[..., Any], Callable[[], None] | None]:
9
+ """Load a handler function from a dotted module path.
10
+
11
+ Returns (handler_fn, on_startup_fn_or_None).
12
+ The on_startup function is auto-discovered if the handler's module defines
13
+ a top-level `on_startup` callable.
14
+ """
15
+ if "." not in dotted_path:
16
+ raise ValueError(f"Handler path must be a dotted path like 'my_module.handler', got: {dotted_path!r}")
17
+
18
+ module_path, func_name = dotted_path.rsplit(".", 1)
19
+ module = importlib.import_module(module_path)
20
+ handler = getattr(module, func_name, None)
21
+ if handler is None:
22
+ raise AttributeError(f"Module {module_path!r} has no attribute {func_name!r}")
23
+ if not callable(handler):
24
+ raise TypeError(f"{dotted_path!r} is not callable")
25
+
26
+ on_startup = getattr(module, "on_startup", None)
27
+ if on_startup is not None and not callable(on_startup):
28
+ on_startup = None
29
+
30
+ return handler, on_startup
31
+
32
+
33
+ def load_startup_function(dotted_path: str) -> Callable[[], None]:
34
+ """Load a startup function from a dotted module path."""
35
+ if "." not in dotted_path:
36
+ raise ValueError(f"Startup path must be a dotted path like 'my_module.setup', got: {dotted_path!r}")
37
+
38
+ module_path, func_name = dotted_path.rsplit(".", 1)
39
+ module = importlib.import_module(module_path)
40
+ func = getattr(module, func_name, None)
41
+ if func is None:
42
+ raise AttributeError(f"Module {module_path!r} has no attribute {func_name!r}")
43
+ if not callable(func):
44
+ raise TypeError(f"{dotted_path!r} is not callable")
45
+
46
+ return func
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ import pyarrow as pa
6
+
7
+
8
+ def parse_input_args() -> list[str] | None:
9
+ """Parse CHALK_INPUT_ARGS env var into a list of column names.
10
+
11
+ Returns None if the env var is unset or empty.
12
+ """
13
+ raw = os.environ.get("CHALK_INPUT_ARGS", "").strip()
14
+ if not raw:
15
+ return None
16
+ return [name.strip() for name in raw.split(",") if name.strip()]
17
+
18
+
19
+ def transform(batch: pa.RecordBatch, arg_names: list[str] | None) -> dict[str, pa.Array]:
20
+ """Transform a RecordBatch into a dict of named arrays.
21
+
22
+ If arg_names is provided, columns are renamed by index to the given names
23
+ (ignoring the original column names). If arg_names is None, the original
24
+ column names are used.
25
+ """
26
+ if arg_names is not None:
27
+ if len(arg_names) != batch.num_columns:
28
+ raise ValueError(
29
+ f"CHALK_INPUT_ARGS specifies {len(arg_names)} names but RecordBatch has {batch.num_columns} columns"
30
+ )
31
+ return {name: batch.column(i) for i, name in enumerate(arg_names)}
32
+ return {batch.schema.field(i).name: batch.column(i) for i in range(batch.num_columns)}