PyPI - duckdb - Versions diffs - 1.4.1.dev125__cp39-cp39-macosx_11_0_arm64.whl → 1.5.0.dev37__cp39-cp39-macosx_11_0_arm64.whl - Mend

duckdb 1.4.1.dev125__cp39-cp39-macosx_11_0_arm64.whl → 1.5.0.dev37__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckdb might be problematic. Click here for more details.

Files changed (48) hide show

_duckdb.cpython-39-darwin.so +0 -0
duckdb/__init__.py +374 -373
duckdb/__init__.pyi +180 -604
duckdb/bytes_io_wrapper.py +7 -6
duckdb/experimental/__init__.py +1 -2
duckdb/experimental/spark/__init__.py +4 -3
duckdb/experimental/spark/_globals.py +8 -8
duckdb/experimental/spark/_typing.py +9 -7
duckdb/experimental/spark/conf.py +15 -16
duckdb/experimental/spark/context.py +44 -60
duckdb/experimental/spark/errors/__init__.py +35 -33
duckdb/experimental/spark/errors/error_classes.py +1 -1
duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
duckdb/experimental/spark/errors/exceptions/base.py +88 -39
duckdb/experimental/spark/errors/utils.py +16 -11
duckdb/experimental/spark/exception.py +6 -9
duckdb/experimental/spark/sql/__init__.py +5 -5
duckdb/experimental/spark/sql/_typing.py +15 -8
duckdb/experimental/spark/sql/catalog.py +20 -21
duckdb/experimental/spark/sql/column.py +54 -47
duckdb/experimental/spark/sql/conf.py +8 -9
duckdb/experimental/spark/sql/dataframe.py +233 -185
duckdb/experimental/spark/sql/functions.py +1248 -1222
duckdb/experimental/spark/sql/group.py +52 -56
duckdb/experimental/spark/sql/readwriter.py +94 -80
duckdb/experimental/spark/sql/session.py +59 -64
duckdb/experimental/spark/sql/streaming.py +10 -9
duckdb/experimental/spark/sql/type_utils.py +64 -66
duckdb/experimental/spark/sql/types.py +344 -308
duckdb/experimental/spark/sql/udf.py +6 -6
duckdb/filesystem.py +8 -13
duckdb/functional/__init__.py +16 -2
duckdb/polars_io.py +57 -66
duckdb/query_graph/__main__.py +96 -91
duckdb/typing/__init__.py +8 -8
duckdb/typing/__init__.pyi +2 -4
duckdb/udf.py +5 -10
duckdb/value/__init__.py +0 -1
duckdb/value/constant/__init__.py +59 -61
duckdb/value/constant/__init__.pyi +4 -3
duckdb-1.5.0.dev37.dist-info/METADATA +80 -0
duckdb-1.5.0.dev37.dist-info/RECORD +47 -0
adbc_driver_duckdb/__init__.py +0 -50
adbc_driver_duckdb/dbapi.py +0 -115
duckdb-1.4.1.dev125.dist-info/METADATA +0 -326
duckdb-1.4.1.dev125.dist-info/RECORD +0 -49
{duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev37.dist-info}/WHEEL +0 -0
{duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev37.dist-info}/licenses/LICENSE +0 -0

duckdb/query_graph/__main__.py CHANGED Viewed

@@ -1,9 +1,10 @@
-import argparse  # noqa: D100
 import json
+import os
+import sys
 import re
 import webbrowser
 from functools import reduce
-from pathlib import Path
+import argparse
 qgraph_css = """
 .styled-table {
@@ -56,7 +57,7 @@ qgraph_css = """
   text-align: center;
   padding: 0px;
   border-radius: 1px;
   /* Positioning */
   position: absolute;
   z-index: 1;
@@ -64,7 +65,7 @@ qgraph_css = """
   left: 50%;
   transform: translateX(-50%);
   margin-bottom: 8px;
   /* Tooltip Arrow */
   width: 400px;
 }
@@ -75,128 +76,124 @@ qgraph_css = """
 """
-class NodeTiming:  # noqa: D101
-    def __init__(self, phase: str, time: float) -> None:  # noqa: D107
+class NodeTiming:
+    def __init__(self, phase: str, time: float) -> object:
         self.phase = phase
         self.time = time
         # percentage is determined later.
         self.percentage = 0
-    def calculate_percentage(self, total_time: float) -> None:  # noqa: D102
+    def calculate_percentage(self, total_time: float) -> None:
         self.percentage = self.time / total_time
-    def combine_timing(self, r: "NodeTiming") -> "NodeTiming":  # noqa: D102
-        # TODO: can only add timings for same-phase nodes  # noqa: TD002, TD003
-        total_time = self.time + r.time
-        return NodeTiming(self.phase, total_time)
+    def combine_timing(l: object, r: object) -> object:
+        # TODO: can only add timings for same-phase nodes
+        total_time = l.time + r.time
+        return NodeTiming(l.phase, total_time)
-class AllTimings:  # noqa: D101
-    def __init__(self) -> None:  # noqa: D107
+class AllTimings:
+    def __init__(self):
         self.phase_to_timings = {}
-    def add_node_timing(self, node_timing: NodeTiming) -> None:  # noqa: D102
+    def add_node_timing(self, node_timing: NodeTiming):
         if node_timing.phase in self.phase_to_timings:
             self.phase_to_timings[node_timing.phase].append(node_timing)
-        else:
-            self.phase_to_timings[node_timing.phase] = [node_timing]
+            return
+        self.phase_to_timings[node_timing.phase] = [node_timing]
-    def get_phase_timings(self, phase: str) -> list[NodeTiming]:  # noqa: D102
+    def get_phase_timings(self, phase: str):
         return self.phase_to_timings[phase]
-    def get_summary_phase_timings(self, phase: str) -> NodeTiming:  # noqa: D102
+    def get_summary_phase_timings(self, phase: str):
         return reduce(NodeTiming.combine_timing, self.phase_to_timings[phase])
-    def get_phases(self) -> list[NodeTiming]:  # noqa: D102
+    def get_phases(self):
         phases = list(self.phase_to_timings.keys())
         phases.sort(key=lambda x: (self.get_summary_phase_timings(x)).time)
         phases.reverse()
         return phases
-    def get_sum_of_all_timings(self) -> float:  # noqa: D102
+    def get_sum_of_all_timings(self):
         total_timing_sum = 0
-        for phase in self.phase_to_timings:
+        for phase in self.phase_to_timings.keys():
             total_timing_sum += self.get_summary_phase_timings(phase).time
         return total_timing_sum
-def open_utf8(fpath: str, flags: str) -> object:  # noqa: D103
-    return Path(fpath).open(mode=flags, encoding="utf8")
+def open_utf8(fpath: str, flags: str) -> object:
+    return open(fpath, flags, encoding="utf8")
-def get_child_timings(top_node: object, query_timings: object) -> str:  # noqa: D103
-    node_timing = NodeTiming(top_node["operator_type"], float(top_node["operator_timing"]))
+def get_child_timings(top_node: object, query_timings: object) -> str:
+    node_timing = NodeTiming(top_node['operator_type'], float(top_node['operator_timing']))
     query_timings.add_node_timing(node_timing)
-    for child in top_node["children"]:
+    for child in top_node['children']:
         get_child_timings(child, query_timings)
-def get_pink_shade_hex(fraction: float) -> str:  # noqa: D103
+def get_pink_shade_hex(fraction: float):
     fraction = max(0, min(1, fraction))
     # Define the RGB values for very light pink (almost white) and dark pink
     light_pink = (255, 250, 250)  # Very light pink
-    dark_pink = (255, 20, 147)  # Dark pink
+    dark_pink = (255, 20, 147)    # Dark pink
     # Calculate the RGB values for the given fraction
     r = int(light_pink[0] + (dark_pink[0] - light_pink[0]) * fraction)
     g = int(light_pink[1] + (dark_pink[1] - light_pink[1]) * fraction)
     b = int(light_pink[2] + (dark_pink[2] - light_pink[2]) * fraction)
     # Return as hexadecimal color code
     return f"#{r:02x}{g:02x}{b:02x}"
+def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str:
+    node_style = f"background-color: {get_pink_shade_hex(float(result)/cpu_time)};"
-def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str:  # noqa: D103
-    node_style = f"background-color: {get_pink_shade_hex(float(result) / cpu_time)};"
-    body = f'<span class="tf-nc custom-tooltip" style="{node_style}">'
-    body += '<div class="node-body">'
+    body = f"<span class=\"tf-nc custom-tooltip\" style=\"{node_style}\">"
+    body += "<div class=\"node-body\">"
     new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ")
     formatted_num = f"{float(result):.4f}"
     body += f"<p><b>{new_name}</b> </p><p>time: {formatted_num} seconds</p>"
-    body += f'<span class="tooltip-text"> {extra_info} </span>'
-    if width > 0:
+    body += f"<span class=\"tooltip-text\"> {extra_info} </span>"
+    if (width > 0):
         body += f"<p>cardinality: {card}</p>"
         body += f"<p>estimate: {est}</p>"
         body += f"<p>width: {width} bytes</p>"
-    # TODO: Expand on timing. Usually available from a detailed profiling  # noqa: TD002, TD003
+    # TODO: Expand on timing. Usually available from a detailed profiling
     body += "</div>"
     body += "</span>"
     return body
-def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:  # noqa: D103
+def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:
     node_prefix_html = "<li>"
     node_suffix_html = "</li>"
     extra_info = ""
     estimate = 0
-    for key in json_graph["extra_info"]:
-        value = json_graph["extra_info"][key]
-        if key == "Estimated Cardinality":
+    for key in json_graph['extra_info']:
+        value = json_graph['extra_info'][key]
+        if (key == "Estimated Cardinality"):
             estimate = int(value)
         else:
             extra_info += f"{key}: {value} <br>"
     cardinality = json_graph["operator_cardinality"]
-    width = int(json_graph["result_set_size"] / max(1, cardinality))
+    width = int(json_graph["result_set_size"]/max(1,cardinality))
     # get rid of some typically long names
     extra_info = re.sub(r"__internal_\s*", "__", extra_info)
     extra_info = re.sub(r"compress_integral\s*", "compress", extra_info)
-    node_body = get_node_body(
-        json_graph["operator_type"],
-        json_graph["operator_timing"],
-        cpu_time,
-        cardinality,
-        estimate,
-        width,
-        re.sub(r",\s*", ", ", extra_info),
-    )
+    node_body = get_node_body(json_graph["operator_type"],
+                              json_graph["operator_timing"],
+                              cpu_time, cardinality, estimate, width,
+                              re.sub(r",\s*", ", ", extra_info))
     children_html = ""
-    if len(json_graph["children"]) >= 1:
+    if len(json_graph['children']) >= 1:
         children_html += "<ul>"
         for child in json_graph["children"]:
             children_html += generate_tree_recursive(child, cpu_time)
@@ -205,12 +202,12 @@ def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:  # noqa
 # For generating the table in the top left.
-def generate_timing_html(graph_json: object, query_timings: object) -> object:  # noqa: D103
+def generate_timing_html(graph_json: object, query_timings: object) -> object:
     json_graph = json.loads(graph_json)
     gather_timing_information(json_graph, query_timings)
-    total_time = float(json_graph.get("operator_timing") or json_graph.get("latency"))
+    total_time = float(json_graph.get('operator_timing') or json_graph.get('latency'))
     table_head = """
-	<table class=\"styled-table\">
+	<table class=\"styled-table\">
 		<thead>
 			<tr>
 				<th>Phase</th>
@@ -227,7 +224,7 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object:
     all_phases = query_timings.get_phases()
     query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time))
     query_timings.add_node_timing(NodeTiming("Execution Time", execution_time))
-    all_phases = ["TOTAL TIME", "Execution Time", *all_phases]
+    all_phases = ["TOTAL TIME", "Execution Time"] + all_phases
     for phase in all_phases:
         summarized_phase = query_timings.get_summary_phase_timings(phase)
         summarized_phase.calculate_percentage(total_time)
@@ -243,48 +240,55 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object:
     return table_head + table_body
-def generate_tree_html(graph_json: object) -> str:  # noqa: D103
+def generate_tree_html(graph_json: object) -> str:
     json_graph = json.loads(graph_json)
-    cpu_time = float(json_graph["cpu_time"])
-    tree_prefix = '<div class="tf-tree tf-gap-sm"> \n <ul>'
+    cpu_time = float(json_graph['cpu_time'])
+    tree_prefix = "<div class=\"tf-tree tf-gap-sm\"> \n <ul>"
     tree_suffix = "</ul> </div>"
     # first level of json is general overview
-    # TODO: make sure json output first level always has only 1 level  # noqa: TD002, TD003
-    tree_body = generate_tree_recursive(json_graph["children"][0], cpu_time)
+    # FIXME: make sure json output first level always has only 1 level
+    tree_body = generate_tree_recursive(json_graph['children'][0], cpu_time)
     return tree_prefix + tree_body + tree_suffix
-def generate_ipython(json_input: str) -> str:  # noqa: D103
+def generate_ipython(json_input: str) -> str:
     from IPython.core.display import HTML
-    html_output = generate_html(json_input, False)  # noqa: F821
+    html_output = generate_html(json_input, False)
-    return HTML(
-        ('\n	${CSS}\n	${LIBRARIES}\n	<div class="chart" id="query-profile"></div>\n	${CHART_SCRIPT}\n	')
-        .replace("${CSS}", html_output["css"])
-        .replace("${CHART_SCRIPT}", html_output["chart_script"])
-        .replace("${LIBRARIES}", html_output["libraries"])
-    )
+    return HTML(("\n"
+                 "	${CSS}\n"
+                 "	${LIBRARIES}\n"
+                 "	<div class=\"chart\" id=\"query-profile\"></div>\n"
+                 "	${CHART_SCRIPT}\n"
+                 "	").replace("${CSS}", html_output['css']).replace('${CHART_SCRIPT}',
+                                                                       html_output['chart_script']).replace(
+        '${LIBRARIES}', html_output['libraries']))
-def generate_style_html(graph_json: str, include_meta_info: bool) -> None:  # noqa: D103, FBT001
-    treeflex_css = '<link rel="stylesheet" href="https://unpkg.com/treeflex/dist/css/treeflex.css">\n'
+def generate_style_html(graph_json: str, include_meta_info: bool) -> None:
+    treeflex_css = "<link rel=\"stylesheet\" href=\"https://unpkg.com/treeflex/dist/css/treeflex.css\">\n"
     css = "<style>\n"
     css += qgraph_css + "\n"
     css += "</style>\n"
-    return {"treeflex_css": treeflex_css, "duckdb_css": css, "libraries": "", "chart_script": ""}
+    return {
+        'treeflex_css': treeflex_css,
+        'duckdb_css': css,
+        'libraries': '',
+        'chart_script': ''
+    }
-def gather_timing_information(json: str, query_timings: object) -> None:  # noqa: D103
+def gather_timing_information(json: str, query_timings: object) -> None:
     # add up all of the times
     # measure each time as a percentage of the total time.
     # then you can return a list of [phase, time, percentage]
-    get_child_timings(json["children"][0], query_timings)
+    get_child_timings(json['children'][0], query_timings)
-def translate_json_to_html(input_file: str, output_file: str) -> None:  # noqa: D103
+def translate_json_to_html(input_file: str, output_file: str) -> None:
     query_timings = AllTimings()
-    with open_utf8(input_file, "r") as f:
+    with open_utf8(input_file, 'r') as f:
         text = f.read()
     html_output = generate_style_html(text, True)
@@ -313,22 +317,23 @@ def translate_json_to_html(input_file: str, output_file: str) -> None:  # noqa:
 </body>
 </html>
 """
-        html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"])
-        html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"])
+        html = html.replace("${TREEFLEX_CSS}", html_output['treeflex_css'])
+        html = html.replace("${DUCKDB_CSS}", html_output['duckdb_css'])
         html = html.replace("${TIMING_TABLE}", timing_table)
-        html = html.replace("${TREE}", tree_output)
+        html = html.replace('${TREE}', tree_output)
         f.write(html)
-def main() -> None:  # noqa: D103
+def main() -> None:
+    if sys.version_info[0] < 3:
+        print("Please use python3")
+        exit(1)
     parser = argparse.ArgumentParser(
-        prog="Query Graph Generator",
-        description="""Given a json profile output, generate a html file showing the query graph and
-        timings of operators""",
-    )
-    parser.add_argument("profile_input", help="profile input in json")
-    parser.add_argument("--out", required=False, default=False)
-    parser.add_argument("--open", required=False, action="store_true", default=True)
+        prog='Query Graph Generator',
+        description='Given a json profile output, generate a html file showing the query graph and timings of operators')
+    parser.add_argument('profile_input', help='profile input in json')
+    parser.add_argument('--out', required=False, default=False)
+    parser.add_argument('--open', required=False, action='store_true', default=True)
     args = parser.parse_args()
     input = args.profile_input
@@ -351,8 +356,8 @@ def main() -> None:  # noqa: D103
     translate_json_to_html(input, output)
     if open_output:
-        webbrowser.open(f"file://{Path(output).resolve()}", new=2)
+        webbrowser.open('file://' + os.path.abspath(output), new=2)
-if __name__ == "__main__":
+if __name__ == '__main__':
     main()

duckdb/typing/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from _duckdb.typing import (  # noqa: D104
+from _duckdb.typing import (
+    DuckDBPyType,
     BIGINT,
     BIT,
     BLOB,
@@ -7,29 +8,29 @@ from _duckdb.typing import (  # noqa: D104
     DOUBLE,
     FLOAT,
     HUGEINT,
+    UHUGEINT,
     INTEGER,
     INTERVAL,
     SMALLINT,
     SQLNULL,
     TIME,
-    TIME_TZ,
     TIMESTAMP,
     TIMESTAMP_MS,
     TIMESTAMP_NS,
     TIMESTAMP_S,
     TIMESTAMP_TZ,
+    TIME_TZ,
     TINYINT,
     UBIGINT,
-    UHUGEINT,
     UINTEGER,
     USMALLINT,
     UTINYINT,
     UUID,
-    VARCHAR,
-    DuckDBPyType,
+    VARCHAR
 )
 __all__ = [
+    "DuckDBPyType",
     "BIGINT",
     "BIT",
     "BLOB",
@@ -38,6 +39,7 @@ __all__ = [
     "DOUBLE",
     "FLOAT",
     "HUGEINT",
+    "UHUGEINT",
     "INTEGER",
     "INTERVAL",
     "SMALLINT",
@@ -51,11 +53,9 @@ __all__ = [
     "TIME_TZ",
     "TINYINT",
     "UBIGINT",
-    "UHUGEINT",
     "UINTEGER",
     "USMALLINT",
     "UTINYINT",
     "UUID",
-    "VARCHAR",
-    "DuckDBPyType",
+    "VARCHAR"
 ]

duckdb/typing/__init__.pyi CHANGED Viewed

@@ -32,7 +32,5 @@ class DuckDBPyType:
     def __init__(self, type_str: str, connection: DuckDBPyConnection = ...) -> None: ...
     def __repr__(self) -> str: ...
     def __eq__(self, other) -> bool: ...
-    def __getattr__(self, name: str):
-        DuckDBPyType
-    def __getitem__(self, name: str):
-        DuckDBPyType
+    def __getattr__(self, name: str): DuckDBPyType
+    def __getitem__(self, name: str): DuckDBPyType

duckdb/udf.py CHANGED Viewed

@@ -1,15 +1,9 @@
-# ruff: noqa: D100
-from typing import Callable
-def vectorized(func: Callable) -> Callable:
-    """Decorate a function with annotated function parameters.
-    This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
-    pyarrow array(s) as output.
+def vectorized(func):
+    """
+    Decorate a function with annotated function parameters, so DuckDB can infer that the function should be provided with pyarrow arrays and should expect pyarrow array(s) as output
     """
-    import types
     from inspect import signature
+    import types
     new_func = types.FunctionType(func.__code__, func.__globals__, func.__name__, func.__defaults__, func.__closure__)
     # Construct the annotations:
@@ -17,6 +11,7 @@ def vectorized(func: Callable) -> Callable:
     new_annotations = {}
     sig = signature(func)
+    sig.parameters
     for param in sig.parameters:
         new_annotations[param] = pa.lib.ChunkedArray

duckdb/value/__init__.py CHANGED Viewed

	@@ -1 +0,0 @@
1	- # noqa: D104