PyPI - duckdb - Versions diffs - 1.5.0.dev37__cp312-cp312-macosx_10_13_universal2.whl → 1.5.0.dev94__cp312-cp312-macosx_10_13_universal2.whl - Mend

duckdb 1.5.0.dev37__cp312-cp312-macosx_10_13_universal2.whl → 1.5.0.dev94__cp312-cp312-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckdb might be problematic. Click here for more details.

Files changed (56) hide show

_duckdb-stubs/__init__.pyi +1443 -0
_duckdb-stubs/_func.pyi +46 -0
_duckdb-stubs/_sqltypes.pyi +75 -0
_duckdb.cpython-312-darwin.so +0 -0
adbc_driver_duckdb/__init__.py +49 -0
adbc_driver_duckdb/dbapi.py +115 -0
duckdb/__init__.py +341 -435
duckdb/_dbapi_type_object.py +231 -0
duckdb/_version.py +22 -0
duckdb/bytes_io_wrapper.py +12 -9
duckdb/experimental/__init__.py +2 -1
duckdb/experimental/spark/__init__.py +3 -4
duckdb/experimental/spark/_globals.py +8 -8
duckdb/experimental/spark/_typing.py +7 -9
duckdb/experimental/spark/conf.py +16 -15
duckdb/experimental/spark/context.py +60 -44
duckdb/experimental/spark/errors/__init__.py +33 -35
duckdb/experimental/spark/errors/error_classes.py +1 -1
duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
duckdb/experimental/spark/errors/exceptions/base.py +39 -88
duckdb/experimental/spark/errors/utils.py +11 -16
duckdb/experimental/spark/exception.py +9 -6
duckdb/experimental/spark/sql/__init__.py +5 -5
duckdb/experimental/spark/sql/_typing.py +8 -15
duckdb/experimental/spark/sql/catalog.py +21 -20
duckdb/experimental/spark/sql/column.py +48 -55
duckdb/experimental/spark/sql/conf.py +9 -8
duckdb/experimental/spark/sql/dataframe.py +185 -233
duckdb/experimental/spark/sql/functions.py +1222 -1248
duckdb/experimental/spark/sql/group.py +56 -52
duckdb/experimental/spark/sql/readwriter.py +80 -94
duckdb/experimental/spark/sql/session.py +64 -59
duckdb/experimental/spark/sql/streaming.py +9 -10
duckdb/experimental/spark/sql/type_utils.py +67 -65
duckdb/experimental/spark/sql/types.py +309 -345
duckdb/experimental/spark/sql/udf.py +6 -6
duckdb/filesystem.py +26 -16
duckdb/func/__init__.py +3 -0
duckdb/functional/__init__.py +12 -16
duckdb/polars_io.py +130 -83
duckdb/query_graph/__main__.py +91 -96
duckdb/sqltypes/__init__.py +63 -0
duckdb/typing/__init__.py +18 -8
duckdb/udf.py +10 -5
duckdb/value/__init__.py +1 -0
duckdb/value/constant/__init__.py +62 -60
{duckdb-1.5.0.dev37.dist-info → duckdb-1.5.0.dev94.dist-info}/METADATA +12 -4
duckdb-1.5.0.dev94.dist-info/RECORD +52 -0
duckdb/__init__.pyi +0 -713
duckdb/functional/__init__.pyi +0 -31
duckdb/typing/__init__.pyi +0 -36
duckdb/value/constant/__init__.pyi +0 -115
duckdb-1.5.0.dev37.dist-info/RECORD +0 -47
/duckdb/{value/__init__.pyi → py.typed} +0 -0
{duckdb-1.5.0.dev37.dist-info → duckdb-1.5.0.dev94.dist-info}/WHEEL +0 -0
{duckdb-1.5.0.dev37.dist-info → duckdb-1.5.0.dev94.dist-info}/licenses/LICENSE +0 -0

duckdb/query_graph/__main__.py CHANGED Viewed

@@ -1,10 +1,9 @@
+import argparse  # noqa: D100
 import json
-import os
-import sys
 import re
 import webbrowser
 from functools import reduce
-import argparse
+from pathlib import Path
 qgraph_css = """
 .styled-table {
@@ -57,7 +56,7 @@ qgraph_css = """
   text-align: center;
   padding: 0px;
   border-radius: 1px;
   /* Positioning */
   position: absolute;
   z-index: 1;
@@ -65,7 +64,7 @@ qgraph_css = """
   left: 50%;
   transform: translateX(-50%);
   margin-bottom: 8px;
   /* Tooltip Arrow */
   width: 400px;
 }
@@ -76,124 +75,128 @@ qgraph_css = """
 """
-class NodeTiming:
-    def __init__(self, phase: str, time: float) -> object:
+class NodeTiming:  # noqa: D101
+    def __init__(self, phase: str, time: float) -> None:  # noqa: D107
         self.phase = phase
         self.time = time
         # percentage is determined later.
         self.percentage = 0
-    def calculate_percentage(self, total_time: float) -> None:
+    def calculate_percentage(self, total_time: float) -> None:  # noqa: D102
         self.percentage = self.time / total_time
-    def combine_timing(l: object, r: object) -> object:
-        # TODO: can only add timings for same-phase nodes
-        total_time = l.time + r.time
-        return NodeTiming(l.phase, total_time)
+    def combine_timing(self, r: "NodeTiming") -> "NodeTiming":  # noqa: D102
+        # TODO: can only add timings for same-phase nodes  # noqa: TD002, TD003
+        total_time = self.time + r.time
+        return NodeTiming(self.phase, total_time)
-class AllTimings:
-    def __init__(self):
+class AllTimings:  # noqa: D101
+    def __init__(self) -> None:  # noqa: D107
         self.phase_to_timings = {}
-    def add_node_timing(self, node_timing: NodeTiming):
+    def add_node_timing(self, node_timing: NodeTiming) -> None:  # noqa: D102
         if node_timing.phase in self.phase_to_timings:
             self.phase_to_timings[node_timing.phase].append(node_timing)
-            return
-        self.phase_to_timings[node_timing.phase] = [node_timing]
+        else:
+            self.phase_to_timings[node_timing.phase] = [node_timing]
-    def get_phase_timings(self, phase: str):
+    def get_phase_timings(self, phase: str) -> list[NodeTiming]:  # noqa: D102
         return self.phase_to_timings[phase]
-    def get_summary_phase_timings(self, phase: str):
+    def get_summary_phase_timings(self, phase: str) -> NodeTiming:  # noqa: D102
         return reduce(NodeTiming.combine_timing, self.phase_to_timings[phase])
-    def get_phases(self):
+    def get_phases(self) -> list[NodeTiming]:  # noqa: D102
         phases = list(self.phase_to_timings.keys())
         phases.sort(key=lambda x: (self.get_summary_phase_timings(x)).time)
         phases.reverse()
         return phases
-    def get_sum_of_all_timings(self):
+    def get_sum_of_all_timings(self) -> float:  # noqa: D102
         total_timing_sum = 0
-        for phase in self.phase_to_timings.keys():
+        for phase in self.phase_to_timings:
             total_timing_sum += self.get_summary_phase_timings(phase).time
         return total_timing_sum
-def open_utf8(fpath: str, flags: str) -> object:
-    return open(fpath, flags, encoding="utf8")
+def open_utf8(fpath: str, flags: str) -> object:  # noqa: D103
+    return Path(fpath).open(mode=flags, encoding="utf8")
-def get_child_timings(top_node: object, query_timings: object) -> str:
-    node_timing = NodeTiming(top_node['operator_type'], float(top_node['operator_timing']))
+def get_child_timings(top_node: object, query_timings: object) -> str:  # noqa: D103
+    node_timing = NodeTiming(top_node["operator_type"], float(top_node["operator_timing"]))
     query_timings.add_node_timing(node_timing)
-    for child in top_node['children']:
+    for child in top_node["children"]:
         get_child_timings(child, query_timings)
-def get_pink_shade_hex(fraction: float):
+def get_pink_shade_hex(fraction: float) -> str:  # noqa: D103
     fraction = max(0, min(1, fraction))
     # Define the RGB values for very light pink (almost white) and dark pink
     light_pink = (255, 250, 250)  # Very light pink
-    dark_pink = (255, 20, 147)    # Dark pink
+    dark_pink = (255, 20, 147)  # Dark pink
     # Calculate the RGB values for the given fraction
     r = int(light_pink[0] + (dark_pink[0] - light_pink[0]) * fraction)
     g = int(light_pink[1] + (dark_pink[1] - light_pink[1]) * fraction)
     b = int(light_pink[2] + (dark_pink[2] - light_pink[2]) * fraction)
     # Return as hexadecimal color code
     return f"#{r:02x}{g:02x}{b:02x}"
-def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str:
-    node_style = f"background-color: {get_pink_shade_hex(float(result)/cpu_time)};"
-    body = f"<span class=\"tf-nc custom-tooltip\" style=\"{node_style}\">"
-    body += "<div class=\"node-body\">"
+def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str:  # noqa: D103
+    node_style = f"background-color: {get_pink_shade_hex(float(result) / cpu_time)};"
+    body = f'<span class="tf-nc custom-tooltip" style="{node_style}">'
+    body += '<div class="node-body">'
     new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ")
     formatted_num = f"{float(result):.4f}"
     body += f"<p><b>{new_name}</b> </p><p>time: {formatted_num} seconds</p>"
-    body += f"<span class=\"tooltip-text\"> {extra_info} </span>"
-    if (width > 0):
+    body += f'<span class="tooltip-text"> {extra_info} </span>'
+    if width > 0:
         body += f"<p>cardinality: {card}</p>"
         body += f"<p>estimate: {est}</p>"
         body += f"<p>width: {width} bytes</p>"
-    # TODO: Expand on timing. Usually available from a detailed profiling
+    # TODO: Expand on timing. Usually available from a detailed profiling  # noqa: TD002, TD003
     body += "</div>"
     body += "</span>"
     return body
-def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:
+def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:  # noqa: D103
     node_prefix_html = "<li>"
     node_suffix_html = "</li>"
     extra_info = ""
     estimate = 0
-    for key in json_graph['extra_info']:
-        value = json_graph['extra_info'][key]
-        if (key == "Estimated Cardinality"):
+    for key in json_graph["extra_info"]:
+        value = json_graph["extra_info"][key]
+        if key == "Estimated Cardinality":
             estimate = int(value)
         else:
             extra_info += f"{key}: {value} <br>"
     cardinality = json_graph["operator_cardinality"]
-    width = int(json_graph["result_set_size"]/max(1,cardinality))
+    width = int(json_graph["result_set_size"] / max(1, cardinality))
     # get rid of some typically long names
     extra_info = re.sub(r"__internal_\s*", "__", extra_info)
     extra_info = re.sub(r"compress_integral\s*", "compress", extra_info)
-    node_body = get_node_body(json_graph["operator_type"],
-                              json_graph["operator_timing"],
-                              cpu_time, cardinality, estimate, width,
-                              re.sub(r",\s*", ", ", extra_info))
+    node_body = get_node_body(
+        json_graph["operator_type"],
+        json_graph["operator_timing"],
+        cpu_time,
+        cardinality,
+        estimate,
+        width,
+        re.sub(r",\s*", ", ", extra_info),
+    )
     children_html = ""
-    if len(json_graph['children']) >= 1:
+    if len(json_graph["children"]) >= 1:
         children_html += "<ul>"
         for child in json_graph["children"]:
             children_html += generate_tree_recursive(child, cpu_time)
@@ -202,12 +205,12 @@ def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:
 # For generating the table in the top left.
-def generate_timing_html(graph_json: object, query_timings: object) -> object:
+def generate_timing_html(graph_json: object, query_timings: object) -> object:  # noqa: D103
     json_graph = json.loads(graph_json)
     gather_timing_information(json_graph, query_timings)
-    total_time = float(json_graph.get('operator_timing') or json_graph.get('latency'))
+    total_time = float(json_graph.get("operator_timing") or json_graph.get("latency"))
     table_head = """
-	<table class=\"styled-table\">
+	<table class=\"styled-table\">
 		<thead>
 			<tr>
 				<th>Phase</th>
@@ -224,7 +227,7 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object:
     all_phases = query_timings.get_phases()
     query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time))
     query_timings.add_node_timing(NodeTiming("Execution Time", execution_time))
-    all_phases = ["TOTAL TIME", "Execution Time"] + all_phases
+    all_phases = ["TOTAL TIME", "Execution Time", *all_phases]
     for phase in all_phases:
         summarized_phase = query_timings.get_summary_phase_timings(phase)
         summarized_phase.calculate_percentage(total_time)
@@ -240,55 +243,48 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object:
     return table_head + table_body
-def generate_tree_html(graph_json: object) -> str:
+def generate_tree_html(graph_json: object) -> str:  # noqa: D103
     json_graph = json.loads(graph_json)
-    cpu_time = float(json_graph['cpu_time'])
-    tree_prefix = "<div class=\"tf-tree tf-gap-sm\"> \n <ul>"
+    cpu_time = float(json_graph["cpu_time"])
+    tree_prefix = '<div class="tf-tree tf-gap-sm"> \n <ul>'
     tree_suffix = "</ul> </div>"
     # first level of json is general overview
-    # FIXME: make sure json output first level always has only 1 level
-    tree_body = generate_tree_recursive(json_graph['children'][0], cpu_time)
+    # TODO: make sure json output first level always has only 1 level  # noqa: TD002, TD003
+    tree_body = generate_tree_recursive(json_graph["children"][0], cpu_time)
     return tree_prefix + tree_body + tree_suffix
-def generate_ipython(json_input: str) -> str:
+def generate_ipython(json_input: str) -> str:  # noqa: D103
     from IPython.core.display import HTML
-    html_output = generate_html(json_input, False)
+    html_output = generate_html(json_input, False)  # noqa: F821
-    return HTML(("\n"
-                 "	${CSS}\n"
-                 "	${LIBRARIES}\n"
-                 "	<div class=\"chart\" id=\"query-profile\"></div>\n"
-                 "	${CHART_SCRIPT}\n"
-                 "	").replace("${CSS}", html_output['css']).replace('${CHART_SCRIPT}',
-                                                                       html_output['chart_script']).replace(
-        '${LIBRARIES}', html_output['libraries']))
+    return HTML(
+        ('\n	${CSS}\n	${LIBRARIES}\n	<div class="chart" id="query-profile"></div>\n	${CHART_SCRIPT}\n	')
+        .replace("${CSS}", html_output["css"])
+        .replace("${CHART_SCRIPT}", html_output["chart_script"])
+        .replace("${LIBRARIES}", html_output["libraries"])
+    )
-def generate_style_html(graph_json: str, include_meta_info: bool) -> None:
-    treeflex_css = "<link rel=\"stylesheet\" href=\"https://unpkg.com/treeflex/dist/css/treeflex.css\">\n"
+def generate_style_html(graph_json: str, include_meta_info: bool) -> None:  # noqa: D103, FBT001
+    treeflex_css = '<link rel="stylesheet" href="https://unpkg.com/treeflex/dist/css/treeflex.css">\n'
     css = "<style>\n"
     css += qgraph_css + "\n"
     css += "</style>\n"
-    return {
-        'treeflex_css': treeflex_css,
-        'duckdb_css': css,
-        'libraries': '',
-        'chart_script': ''
-    }
+    return {"treeflex_css": treeflex_css, "duckdb_css": css, "libraries": "", "chart_script": ""}
-def gather_timing_information(json: str, query_timings: object) -> None:
+def gather_timing_information(json: str, query_timings: object) -> None:  # noqa: D103
     # add up all of the times
     # measure each time as a percentage of the total time.
     # then you can return a list of [phase, time, percentage]
-    get_child_timings(json['children'][0], query_timings)
+    get_child_timings(json["children"][0], query_timings)
-def translate_json_to_html(input_file: str, output_file: str) -> None:
+def translate_json_to_html(input_file: str, output_file: str) -> None:  # noqa: D103
     query_timings = AllTimings()
-    with open_utf8(input_file, 'r') as f:
+    with open_utf8(input_file, "r") as f:
         text = f.read()
     html_output = generate_style_html(text, True)
@@ -317,23 +313,22 @@ def translate_json_to_html(input_file: str, output_file: str) -> None:
 </body>
 </html>
 """
-        html = html.replace("${TREEFLEX_CSS}", html_output['treeflex_css'])
-        html = html.replace("${DUCKDB_CSS}", html_output['duckdb_css'])
+        html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"])
+        html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"])
         html = html.replace("${TIMING_TABLE}", timing_table)
-        html = html.replace('${TREE}', tree_output)
+        html = html.replace("${TREE}", tree_output)
         f.write(html)
-def main() -> None:
-    if sys.version_info[0] < 3:
-        print("Please use python3")
-        exit(1)
+def main() -> None:  # noqa: D103
     parser = argparse.ArgumentParser(
-        prog='Query Graph Generator',
-        description='Given a json profile output, generate a html file showing the query graph and timings of operators')
-    parser.add_argument('profile_input', help='profile input in json')
-    parser.add_argument('--out', required=False, default=False)
-    parser.add_argument('--open', required=False, action='store_true', default=True)
+        prog="Query Graph Generator",
+        description="""Given a json profile output, generate a html file showing the query graph and
+        timings of operators""",
+    )
+    parser.add_argument("profile_input", help="profile input in json")
+    parser.add_argument("--out", required=False, default=False)
+    parser.add_argument("--open", required=False, action="store_true", default=True)
     args = parser.parse_args()
     input = args.profile_input
@@ -356,8 +351,8 @@ def main() -> None:
     translate_json_to_html(input, output)
     if open_output:
-        webbrowser.open('file://' + os.path.abspath(output), new=2)
+        webbrowser.open(f"file://{Path(output).resolve()}", new=2)
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

duckdb/sqltypes/__init__.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""DuckDB's SQL types."""
+from _duckdb._sqltypes import (
+    BIGINT,
+    BIT,
+    BLOB,
+    BOOLEAN,
+    DATE,
+    DOUBLE,
+    FLOAT,
+    HUGEINT,
+    INTEGER,
+    INTERVAL,
+    SMALLINT,
+    SQLNULL,
+    TIME,
+    TIME_TZ,
+    TIMESTAMP,
+    TIMESTAMP_MS,
+    TIMESTAMP_NS,
+    TIMESTAMP_S,
+    TIMESTAMP_TZ,
+    TINYINT,
+    UBIGINT,
+    UHUGEINT,
+    UINTEGER,
+    USMALLINT,
+    UTINYINT,
+    UUID,
+    VARCHAR,
+    DuckDBPyType,
+)
+__all__ = [
+    "BIGINT",
+    "BIT",
+    "BLOB",
+    "BOOLEAN",
+    "DATE",
+    "DOUBLE",
+    "FLOAT",
+    "HUGEINT",
+    "INTEGER",
+    "INTERVAL",
+    "SMALLINT",
+    "SQLNULL",
+    "TIME",
+    "TIMESTAMP",
+    "TIMESTAMP_MS",
+    "TIMESTAMP_NS",
+    "TIMESTAMP_S",
+    "TIMESTAMP_TZ",
+    "TIME_TZ",
+    "TINYINT",
+    "UBIGINT",
+    "UHUGEINT",
+    "UINTEGER",
+    "USMALLINT",
+    "UTINYINT",
+    "UUID",
+    "VARCHAR",
+    "DuckDBPyType",
+]

duckdb/typing/__init__.py CHANGED Viewed

@@ -1,5 +1,8 @@
-from _duckdb.typing import (
-    DuckDBPyType,
+"""DuckDB's SQL types. DEPRECATED. Please use `duckdb.sqltypes` instead."""
+import warnings
+from duckdb.sqltypes import (
     BIGINT,
     BIT,
     BLOB,
@@ -8,29 +11,29 @@ from _duckdb.typing import (
     DOUBLE,
     FLOAT,
     HUGEINT,
-    UHUGEINT,
     INTEGER,
     INTERVAL,
     SMALLINT,
     SQLNULL,
     TIME,
+    TIME_TZ,
     TIMESTAMP,
     TIMESTAMP_MS,
     TIMESTAMP_NS,
     TIMESTAMP_S,
     TIMESTAMP_TZ,
-    TIME_TZ,
     TINYINT,
     UBIGINT,
+    UHUGEINT,
     UINTEGER,
     USMALLINT,
     UTINYINT,
     UUID,
-    VARCHAR
+    VARCHAR,
+    DuckDBPyType,
 )
 __all__ = [
-    "DuckDBPyType",
     "BIGINT",
     "BIT",
     "BLOB",
@@ -39,7 +42,6 @@ __all__ = [
     "DOUBLE",
     "FLOAT",
     "HUGEINT",
-    "UHUGEINT",
     "INTEGER",
     "INTERVAL",
     "SMALLINT",
@@ -53,9 +55,17 @@ __all__ = [
     "TIME_TZ",
     "TINYINT",
     "UBIGINT",
+    "UHUGEINT",
     "UINTEGER",
     "USMALLINT",
     "UTINYINT",
     "UUID",
-    "VARCHAR"
+    "VARCHAR",
+    "DuckDBPyType",
 ]
+warnings.warn(
+    "`duckdb.typing` is deprecated and will be removed in a future version. Please use `duckdb.sqltypes` instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)

duckdb/udf.py CHANGED Viewed

@@ -1,9 +1,15 @@
-def vectorized(func):
-    """
-    Decorate a function with annotated function parameters, so DuckDB can infer that the function should be provided with pyarrow arrays and should expect pyarrow array(s) as output
+# ruff: noqa: D100
+import typing
+def vectorized(func: typing.Callable[..., typing.Any]) -> typing.Callable[..., typing.Any]:
+    """Decorate a function with annotated function parameters.
+    This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
+    pyarrow array(s) as output.
     """
-    from inspect import signature
     import types
+    from inspect import signature
     new_func = types.FunctionType(func.__code__, func.__globals__, func.__name__, func.__defaults__, func.__closure__)
     # Construct the annotations:
@@ -11,7 +17,6 @@ def vectorized(func):
     new_annotations = {}
     sig = signature(func)
-    sig.parameters
     for param in sig.parameters:
         new_annotations[param] = pa.lib.ChunkedArray

duckdb/value/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ # noqa: D104