pglearned 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: pglearned
3
+ Version: 0.1.0
4
+ Summary: Python bindings for pglearned extension
5
+ Project-URL: Homepage, https://github.com/RMTT/pglearned
6
+ Requires-Python: >=3.7
7
+ Requires-Dist: psycopg[binary]>=3.0.0
8
+ Requires-Dist: grpcio>=1.50.0
9
+ Requires-Dist: grpcio-reflection>=1.50.0
10
+ Requires-Dist: protobuf>=4.0.0
@@ -0,0 +1,77 @@
1
+ # pglearned python
2
+
3
+ Python bindings and framework for interacting with the `pglearned` PostgreSQL extension. This library allows you to collect query execution data for training and to implement custom query planning logic using Python.
4
+
5
+ ## Installation
6
+
7
+ Navigate to the `frameworks/python` directory and install the package:
8
+
9
+ ```bash
10
+ pip install .
11
+ ```
12
+
13
+ Ensure you have the `pglearned` extension installed and configured in your PostgreSQL database.
14
+
15
+ ## Usage
16
+
17
+ ### 1. Data Collection
18
+
19
+ Use `PglClient` to connect to your PostgreSQL database and collect query execution data (dataset) generated by the extension.
20
+
21
+ ```python
22
+ from pgl import PglClient
23
+
24
+ # Connect to the database
25
+ # ensure the connection string is correct for your setup
26
+ db_url = "postgresql://user:password@localhost:5432/dbname"
27
+ client = PglClient(db_url)
28
+
29
+ # Iterate over collected query plans from a named dataset
30
+ # 'training_set' should match the dataset name used in your SQL queries
31
+ for query_id, plan_data in client.qdataset_collect("training_set"):
32
+ print(f"Query ID: {query_id}")
33
+ print(f"Plan Structure: {plan_data.keys()}")
34
+ # plan_data is a dictionary containing the JSON plan representation
35
+ ```
36
+
37
+ ### 2. Custom Adapter
38
+
39
+ To implement custom query processing logic (e.g., a machine learning model that selects the best plan), create a class that inherits from `PglAdapter` and serve it using `run_server`.
40
+
41
+ ```python
42
+ from typing import List, Dict, Any
43
+ from pgl import PglAdapter, run_server
44
+
45
+ class MySmartAdapter(PglAdapter):
46
+ def choose_plan(self, plans: List[Dict[str, Any]]) -> int:
47
+ """
48
+ Choose the best plan from a list of candidates.
49
+
50
+ Args:
51
+ plans: A list of candidate query plans (parsed as dictionaries).
52
+
53
+ Returns:
54
+ The index of the chosen plan (0-based).
55
+ """
56
+ best_score = -1
57
+ best_index = 0
58
+
59
+ for i, plan in enumerate(plans):
60
+ # Implement your scoring logic here
61
+ # For demonstration, we just print the plan cost if available
62
+ print(f"Plan {i} cost: {plan.get('Total Cost', 'N/A')}")
63
+
64
+ # Simple heuristic: select plan based on some criteria
65
+ # ...
66
+
67
+ return best_index
68
+
69
+ if __name__ == "__main__":
70
+ # Initialize your adapter
71
+ adapter = MySmartAdapter()
72
+
73
+ # Start the gRPC server
74
+ # The pglearned extension will connect to this server to request plan selections
75
+ print("Starting PglAdapter server on port 50051...")
76
+ run_server(adapter, host="0.0.0.0", port=50051)
77
+ ```
@@ -0,0 +1,5 @@
1
+ from .client import PglClient
2
+ from .adapter import PglAdapter
3
+ from .server import run_server
4
+
5
+ __all__ = ["PglClient", "PglAdapter", "run_server"]
@@ -0,0 +1,21 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Any
3
+
4
+
5
+ class PglAdapter(ABC):
6
+ """
7
+ Abstract base class for implementing a pglearned adapter.
8
+ """
9
+
10
+ @abstractmethod
11
+ def choose_plan(self, plans: List[Dict[str, Any]]) -> int:
12
+ """
13
+ Choose the best query plan from a list of candidates.
14
+
15
+ Args:
16
+ plans: A list of query plans (parsed as dictionaries).
17
+
18
+ Returns:
19
+ The index of the chosen plan (0-based).
20
+ """
21
+ pass
@@ -0,0 +1,52 @@
1
+ import psycopg
2
+ from typing import Iterator, Tuple, Dict, Any
3
+
4
+
5
+ class PglClient:
6
+ def __init__(self, dburl: str):
7
+ """
8
+ Initialize the PglClient with a database connection string.
9
+
10
+ Args:
11
+ dburl: libpq connection string (e.g., "postgresql://user:password@host:port/dbname")
12
+ """
13
+ self.dburl = dburl
14
+
15
+ def qdataset_collect(
16
+ self,
17
+ dataset_name: str,
18
+ offset: int = -1,
19
+ limit: int = 10,
20
+ method: str = "default",
21
+ arm: int = -1,
22
+ ) -> Iterator[Tuple[int, Dict[str, Any]]]:
23
+ """
24
+ Call the pgl_qdataset_collect UDF to fetch query plans.
25
+
26
+ Args:
27
+ dataset_name: Name of the dataset to run.
28
+ offset: Offset in the dataset (default -1 uses current_pos).
29
+ limit: Number of queries to run (default 10).
30
+ method: Planner method to use (default 'default').
31
+ arm: Planner arm to use (default -1 for all/iterator).
32
+
33
+ Yields:
34
+ Tuple of (id, plan), where plan is a dictionary (JSON).
35
+ """
36
+ # Connect to the database
37
+ with psycopg.connect(self.dburl) as conn:
38
+ conn.autocommit = True
39
+ with conn.cursor() as cur:
40
+ # Check and install extension if needed
41
+ cur.execute("SELECT 1 FROM pg_extension WHERE extname = 'pgl'")
42
+ if not cur.fetchone():
43
+ cur.execute("CREATE EXTENSION IF NOT EXISTS pglearned")
44
+
45
+ cur.execute(
46
+ "SELECT id, plan FROM pgl_qdataset_collect(%s, %s, %s, %s, %s)",
47
+ (dataset_name, offset, limit, method, arm),
48
+ )
49
+
50
+ # Iterate over results
51
+ for row in cur:
52
+ yield row[0], row[1]
File without changes
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: pgl_rpc.proto
5
+ # Protobuf Python Version: 6.31.1
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+ _runtime_version.ValidateProtobufRuntimeVersion(
13
+ _runtime_version.Domain.PUBLIC,
14
+ 6,
15
+ 31,
16
+ 1,
17
+ '',
18
+ 'pgl_rpc.proto'
19
+ )
20
+ # @@protoc_insertion_point(imports)
21
+
22
+ _sym_db = _symbol_database.Default()
23
+
24
+
25
+
26
+
27
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rpgl_rpc.proto\x12\x07pgl_rpc\"\"\n\x11\x43hoosePlanRequest\x12\r\n\x05plans\x18\x01 \x03(\t\"/\n\x12\x43hoosePlanResponse\x12\x19\n\x11\x63hosen_plan_index\x18\x01 \x01(\x05\x32R\n\tPglRemote\x12\x45\n\nChoosePlan\x12\x1a.pgl_rpc.ChoosePlanRequest\x1a\x1b.pgl_rpc.ChoosePlanResponseb\x06proto3')
28
+
29
+ _globals = globals()
30
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'pgl_rpc_pb2', _globals)
32
+ if not _descriptor._USE_C_DESCRIPTORS:
33
+ DESCRIPTOR._loaded_options = None
34
+ _globals['_CHOOSEPLANREQUEST']._serialized_start=26
35
+ _globals['_CHOOSEPLANREQUEST']._serialized_end=60
36
+ _globals['_CHOOSEPLANRESPONSE']._serialized_start=62
37
+ _globals['_CHOOSEPLANRESPONSE']._serialized_end=109
38
+ _globals['_PGLREMOTE']._serialized_start=111
39
+ _globals['_PGLREMOTE']._serialized_end=193
40
+ # @@protoc_insertion_point(module_scope)
@@ -0,0 +1,97 @@
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+ import warnings
5
+
6
+ from . import pgl_rpc_pb2 as pgl__rpc__pb2
7
+
8
+ GRPC_GENERATED_VERSION = '1.78.1'
9
+ GRPC_VERSION = grpc.__version__
10
+ _version_not_supported = False
11
+
12
+ try:
13
+ from grpc._utilities import first_version_is_lower
14
+ _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
15
+ except ImportError:
16
+ _version_not_supported = True
17
+
18
+ if _version_not_supported:
19
+ raise RuntimeError(
20
+ f'The grpc package installed is at version {GRPC_VERSION},'
21
+ + ' but the generated code in pgl_rpc_pb2_grpc.py depends on'
22
+ + f' grpcio>={GRPC_GENERATED_VERSION}.'
23
+ + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
24
+ + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
25
+ )
26
+
27
+
28
+ class PglRemoteStub(object):
29
+ """Missing associated documentation comment in .proto file."""
30
+
31
+ def __init__(self, channel):
32
+ """Constructor.
33
+
34
+ Args:
35
+ channel: A grpc.Channel.
36
+ """
37
+ self.ChoosePlan = channel.unary_unary(
38
+ '/pgl_rpc.PglRemote/ChoosePlan',
39
+ request_serializer=pgl__rpc__pb2.ChoosePlanRequest.SerializeToString,
40
+ response_deserializer=pgl__rpc__pb2.ChoosePlanResponse.FromString,
41
+ _registered_method=True)
42
+
43
+
44
+ class PglRemoteServicer(object):
45
+ """Missing associated documentation comment in .proto file."""
46
+
47
+ def ChoosePlan(self, request, context):
48
+ """Missing associated documentation comment in .proto file."""
49
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
50
+ context.set_details('Method not implemented!')
51
+ raise NotImplementedError('Method not implemented!')
52
+
53
+
54
+ def add_PglRemoteServicer_to_server(servicer, server):
55
+ rpc_method_handlers = {
56
+ 'ChoosePlan': grpc.unary_unary_rpc_method_handler(
57
+ servicer.ChoosePlan,
58
+ request_deserializer=pgl__rpc__pb2.ChoosePlanRequest.FromString,
59
+ response_serializer=pgl__rpc__pb2.ChoosePlanResponse.SerializeToString,
60
+ ),
61
+ }
62
+ generic_handler = grpc.method_handlers_generic_handler(
63
+ 'pgl_rpc.PglRemote', rpc_method_handlers)
64
+ server.add_generic_rpc_handlers((generic_handler,))
65
+ server.add_registered_method_handlers('pgl_rpc.PglRemote', rpc_method_handlers)
66
+
67
+
68
+ # This class is part of an EXPERIMENTAL API.
69
+ class PglRemote(object):
70
+ """Missing associated documentation comment in .proto file."""
71
+
72
+ @staticmethod
73
+ def ChoosePlan(request,
74
+ target,
75
+ options=(),
76
+ channel_credentials=None,
77
+ call_credentials=None,
78
+ insecure=False,
79
+ compression=None,
80
+ wait_for_ready=None,
81
+ timeout=None,
82
+ metadata=None):
83
+ return grpc.experimental.unary_unary(
84
+ request,
85
+ target,
86
+ '/pgl_rpc.PglRemote/ChoosePlan',
87
+ pgl__rpc__pb2.ChoosePlanRequest.SerializeToString,
88
+ pgl__rpc__pb2.ChoosePlanResponse.FromString,
89
+ options,
90
+ channel_credentials,
91
+ insecure,
92
+ call_credentials,
93
+ compression,
94
+ wait_for_ready,
95
+ timeout,
96
+ metadata,
97
+ _registered_method=True)
@@ -0,0 +1,90 @@
1
+ import json
2
+ import logging
3
+ from concurrent import futures
4
+
5
+ import grpc
6
+ from grpc_reflection.v1alpha import reflection
7
+
8
+ # Note: These imports assume the protobuf code has been generated.
9
+ # Run: python -m grpc_tools.protoc -I../../proto --python_out=./pgl/proto --grpc_python_out=./pgl/proto ../../proto/pgl_rpc.proto
10
+ from .proto import pgl_rpc_pb2
11
+ from .proto import pgl_rpc_pb2_grpc
12
+
13
+ from .adapter import PglAdapter
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PglRemoteAdapter(pgl_rpc_pb2_grpc.PglRemoteServicer):
19
+ """
20
+ Internal gRPC servicer that adapts the PglAdapter to the PglRemote service.
21
+ """
22
+
23
+ def __init__(self, adapter: PglAdapter):
24
+ self.adapter = adapter
25
+
26
+ def ChoosePlan(self, request, context):
27
+ if not request.plans:
28
+ return pgl_rpc_pb2.ChoosePlanResponse(chosen_plan_index=0)
29
+
30
+ # Parse JSON strings to Dicts
31
+ parsed_plans = []
32
+ for i, plan_str in enumerate(request.plans):
33
+ try:
34
+ parsed_plans.append(json.loads(plan_str))
35
+ except json.JSONDecodeError as e:
36
+ logger.error(f"Failed to parse plan at index {i}: {e}")
37
+ parsed_plans.append({})
38
+
39
+ try:
40
+ chosen_index = self.adapter.choose_plan(parsed_plans)
41
+ except Exception as e:
42
+ logger.exception("Error in user adapter logic")
43
+ context.abort(grpc.StatusCode.INTERNAL, f"Adapter error: {str(e)}")
44
+ return
45
+
46
+ # Validate return value
47
+ if not (0 <= chosen_index < len(request.plans)):
48
+ logger.error(
49
+ f"Invalid plan index returned: {chosen_index}. Max: {len(request.plans) - 1}"
50
+ )
51
+ chosen_index = 0
52
+
53
+ return pgl_rpc_pb2.ChoosePlanResponse(chosen_plan_index=chosen_index)
54
+
55
+
56
+ def run_server(
57
+ adapter: PglAdapter, host: str = "0.0.0.0", port: int = 50051, max_workers: int = 10
58
+ ):
59
+ """
60
+ Start the gRPC server hosting the given adapter.
61
+
62
+ Args:
63
+ adapter: An instance of a PglAdapter subclass.
64
+ host: Host to bind to.
65
+ port: Port to bind to.
66
+ max_workers: Number of worker threads for the gRPC server.
67
+ """
68
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers))
69
+ servicer = PglRemoteAdapter(adapter)
70
+
71
+ # This registration function is generated by grpc_tools
72
+ pgl_rpc_pb2_grpc.add_PglRemoteServicer_to_server(servicer, server)
73
+ service_full_name = pgl_rpc_pb2.DESCRIPTOR.services_by_name["PglRemote"].full_name
74
+
75
+ # Enable reflection
76
+ SERVICE_NAMES = (
77
+ service_full_name,
78
+ reflection.SERVICE_NAME,
79
+ )
80
+ reflection.enable_server_reflection(SERVICE_NAMES, server)
81
+
82
+ address = f"{host}:{port}"
83
+ server.add_insecure_port(address)
84
+
85
+ print(f"PglServer starting on {address}...")
86
+ server.start()
87
+ try:
88
+ server.wait_for_termination()
89
+ except KeyboardInterrupt:
90
+ server.stop(0)
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: pglearned
3
+ Version: 0.1.0
4
+ Summary: Python bindings for pglearned extension
5
+ Project-URL: Homepage, https://github.com/RMTT/pglearned
6
+ Requires-Python: >=3.7
7
+ Requires-Dist: psycopg[binary]>=3.0.0
8
+ Requires-Dist: grpcio>=1.50.0
9
+ Requires-Dist: grpcio-reflection>=1.50.0
10
+ Requires-Dist: protobuf>=4.0.0
@@ -0,0 +1,14 @@
1
+ README.md
2
+ pyproject.toml
3
+ pgl/__init__.py
4
+ pgl/adapter.py
5
+ pgl/client.py
6
+ pgl/server.py
7
+ pgl/proto/__init__.py
8
+ pgl/proto/pgl_rpc_pb2.py
9
+ pgl/proto/pgl_rpc_pb2_grpc.py
10
+ pglearned.egg-info/PKG-INFO
11
+ pglearned.egg-info/SOURCES.txt
12
+ pglearned.egg-info/dependency_links.txt
13
+ pglearned.egg-info/requires.txt
14
+ pglearned.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ psycopg[binary]>=3.0.0
2
+ grpcio>=1.50.0
3
+ grpcio-reflection>=1.50.0
4
+ protobuf>=4.0.0
@@ -0,0 +1 @@
1
+ pgl
@@ -0,0 +1,18 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pglearned"
7
+ version = "0.1.0"
8
+ description = "Python bindings for pglearned extension"
9
+ requires-python = ">=3.7"
10
+ dependencies = [
11
+ "psycopg[binary]>=3.0.0",
12
+ "grpcio>=1.50.0",
13
+ "grpcio-reflection>=1.50.0",
14
+ "protobuf>=4.0.0",
15
+ ]
16
+
17
+ [project.urls]
18
+ "Homepage" = "https://github.com/RMTT/pglearned"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+