duckdb 1.4.1.dev125__cp39-cp39-macosx_11_0_arm64.whl → 1.5.0.dev37__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb.cpython-39-darwin.so +0 -0
- duckdb/__init__.py +374 -373
- duckdb/__init__.pyi +180 -604
- duckdb/bytes_io_wrapper.py +7 -6
- duckdb/experimental/__init__.py +1 -2
- duckdb/experimental/spark/__init__.py +4 -3
- duckdb/experimental/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +9 -7
- duckdb/experimental/spark/conf.py +15 -16
- duckdb/experimental/spark/context.py +44 -60
- duckdb/experimental/spark/errors/__init__.py +35 -33
- duckdb/experimental/spark/errors/error_classes.py +1 -1
- duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
- duckdb/experimental/spark/errors/exceptions/base.py +88 -39
- duckdb/experimental/spark/errors/utils.py +16 -11
- duckdb/experimental/spark/exception.py +6 -9
- duckdb/experimental/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +15 -8
- duckdb/experimental/spark/sql/catalog.py +20 -21
- duckdb/experimental/spark/sql/column.py +54 -47
- duckdb/experimental/spark/sql/conf.py +8 -9
- duckdb/experimental/spark/sql/dataframe.py +233 -185
- duckdb/experimental/spark/sql/functions.py +1248 -1222
- duckdb/experimental/spark/sql/group.py +52 -56
- duckdb/experimental/spark/sql/readwriter.py +94 -80
- duckdb/experimental/spark/sql/session.py +59 -64
- duckdb/experimental/spark/sql/streaming.py +10 -9
- duckdb/experimental/spark/sql/type_utils.py +64 -66
- duckdb/experimental/spark/sql/types.py +344 -308
- duckdb/experimental/spark/sql/udf.py +6 -6
- duckdb/filesystem.py +8 -13
- duckdb/functional/__init__.py +16 -2
- duckdb/polars_io.py +57 -66
- duckdb/query_graph/__main__.py +96 -91
- duckdb/typing/__init__.py +8 -8
- duckdb/typing/__init__.pyi +2 -4
- duckdb/udf.py +5 -10
- duckdb/value/__init__.py +0 -1
- duckdb/value/constant/__init__.py +59 -61
- duckdb/value/constant/__init__.pyi +4 -3
- duckdb-1.5.0.dev37.dist-info/METADATA +80 -0
- duckdb-1.5.0.dev37.dist-info/RECORD +47 -0
- adbc_driver_duckdb/__init__.py +0 -50
- adbc_driver_duckdb/dbapi.py +0 -115
- duckdb-1.4.1.dev125.dist-info/METADATA +0 -326
- duckdb-1.4.1.dev125.dist-info/RECORD +0 -49
- {duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev37.dist-info}/WHEEL +0 -0
- {duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev37.dist-info}/licenses/LICENSE +0 -0
duckdb/query_graph/__main__.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import argparse # noqa: D100
|
|
2
1
|
import json
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
3
4
|
import re
|
|
4
5
|
import webbrowser
|
|
5
6
|
from functools import reduce
|
|
6
|
-
|
|
7
|
+
import argparse
|
|
7
8
|
|
|
8
9
|
qgraph_css = """
|
|
9
10
|
.styled-table {
|
|
@@ -56,7 +57,7 @@ qgraph_css = """
|
|
|
56
57
|
text-align: center;
|
|
57
58
|
padding: 0px;
|
|
58
59
|
border-radius: 1px;
|
|
59
|
-
|
|
60
|
+
|
|
60
61
|
/* Positioning */
|
|
61
62
|
position: absolute;
|
|
62
63
|
z-index: 1;
|
|
@@ -64,7 +65,7 @@ qgraph_css = """
|
|
|
64
65
|
left: 50%;
|
|
65
66
|
transform: translateX(-50%);
|
|
66
67
|
margin-bottom: 8px;
|
|
67
|
-
|
|
68
|
+
|
|
68
69
|
/* Tooltip Arrow */
|
|
69
70
|
width: 400px;
|
|
70
71
|
}
|
|
@@ -75,128 +76,124 @@ qgraph_css = """
|
|
|
75
76
|
"""
|
|
76
77
|
|
|
77
78
|
|
|
78
|
-
class NodeTiming:
|
|
79
|
-
|
|
79
|
+
class NodeTiming:
|
|
80
|
+
|
|
81
|
+
def __init__(self, phase: str, time: float) -> object:
|
|
80
82
|
self.phase = phase
|
|
81
83
|
self.time = time
|
|
82
84
|
# percentage is determined later.
|
|
83
85
|
self.percentage = 0
|
|
84
86
|
|
|
85
|
-
def calculate_percentage(self, total_time: float) -> None:
|
|
87
|
+
def calculate_percentage(self, total_time: float) -> None:
|
|
86
88
|
self.percentage = self.time / total_time
|
|
87
89
|
|
|
88
|
-
def combine_timing(
|
|
89
|
-
# TODO: can only add timings for same-phase nodes
|
|
90
|
-
total_time =
|
|
91
|
-
return NodeTiming(
|
|
90
|
+
def combine_timing(l: object, r: object) -> object:
|
|
91
|
+
# TODO: can only add timings for same-phase nodes
|
|
92
|
+
total_time = l.time + r.time
|
|
93
|
+
return NodeTiming(l.phase, total_time)
|
|
92
94
|
|
|
93
95
|
|
|
94
|
-
class AllTimings:
|
|
95
|
-
|
|
96
|
+
class AllTimings:
|
|
97
|
+
|
|
98
|
+
def __init__(self):
|
|
96
99
|
self.phase_to_timings = {}
|
|
97
100
|
|
|
98
|
-
def add_node_timing(self, node_timing: NodeTiming)
|
|
101
|
+
def add_node_timing(self, node_timing: NodeTiming):
|
|
99
102
|
if node_timing.phase in self.phase_to_timings:
|
|
100
103
|
self.phase_to_timings[node_timing.phase].append(node_timing)
|
|
101
|
-
|
|
102
|
-
|
|
104
|
+
return
|
|
105
|
+
self.phase_to_timings[node_timing.phase] = [node_timing]
|
|
103
106
|
|
|
104
|
-
def get_phase_timings(self, phase: str)
|
|
107
|
+
def get_phase_timings(self, phase: str):
|
|
105
108
|
return self.phase_to_timings[phase]
|
|
106
109
|
|
|
107
|
-
def get_summary_phase_timings(self, phase: str)
|
|
110
|
+
def get_summary_phase_timings(self, phase: str):
|
|
108
111
|
return reduce(NodeTiming.combine_timing, self.phase_to_timings[phase])
|
|
109
112
|
|
|
110
|
-
def get_phases(self)
|
|
113
|
+
def get_phases(self):
|
|
111
114
|
phases = list(self.phase_to_timings.keys())
|
|
112
115
|
phases.sort(key=lambda x: (self.get_summary_phase_timings(x)).time)
|
|
113
116
|
phases.reverse()
|
|
114
117
|
return phases
|
|
115
118
|
|
|
116
|
-
def get_sum_of_all_timings(self)
|
|
119
|
+
def get_sum_of_all_timings(self):
|
|
117
120
|
total_timing_sum = 0
|
|
118
|
-
for phase in self.phase_to_timings:
|
|
121
|
+
for phase in self.phase_to_timings.keys():
|
|
119
122
|
total_timing_sum += self.get_summary_phase_timings(phase).time
|
|
120
123
|
return total_timing_sum
|
|
121
124
|
|
|
122
125
|
|
|
123
|
-
def open_utf8(fpath: str, flags: str) -> object:
|
|
124
|
-
return
|
|
126
|
+
def open_utf8(fpath: str, flags: str) -> object:
|
|
127
|
+
return open(fpath, flags, encoding="utf8")
|
|
125
128
|
|
|
126
129
|
|
|
127
|
-
def get_child_timings(top_node: object, query_timings: object) -> str:
|
|
128
|
-
node_timing = NodeTiming(top_node[
|
|
130
|
+
def get_child_timings(top_node: object, query_timings: object) -> str:
|
|
131
|
+
node_timing = NodeTiming(top_node['operator_type'], float(top_node['operator_timing']))
|
|
129
132
|
query_timings.add_node_timing(node_timing)
|
|
130
|
-
for child in top_node[
|
|
133
|
+
for child in top_node['children']:
|
|
131
134
|
get_child_timings(child, query_timings)
|
|
132
135
|
|
|
133
136
|
|
|
134
|
-
def get_pink_shade_hex(fraction: float)
|
|
137
|
+
def get_pink_shade_hex(fraction: float):
|
|
135
138
|
fraction = max(0, min(1, fraction))
|
|
136
|
-
|
|
139
|
+
|
|
137
140
|
# Define the RGB values for very light pink (almost white) and dark pink
|
|
138
141
|
light_pink = (255, 250, 250) # Very light pink
|
|
139
|
-
dark_pink = (255, 20, 147)
|
|
140
|
-
|
|
142
|
+
dark_pink = (255, 20, 147) # Dark pink
|
|
143
|
+
|
|
141
144
|
# Calculate the RGB values for the given fraction
|
|
142
145
|
r = int(light_pink[0] + (dark_pink[0] - light_pink[0]) * fraction)
|
|
143
146
|
g = int(light_pink[1] + (dark_pink[1] - light_pink[1]) * fraction)
|
|
144
147
|
b = int(light_pink[2] + (dark_pink[2] - light_pink[2]) * fraction)
|
|
145
|
-
|
|
148
|
+
|
|
146
149
|
# Return as hexadecimal color code
|
|
147
150
|
return f"#{r:02x}{g:02x}{b:02x}"
|
|
148
151
|
|
|
152
|
+
def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str:
|
|
153
|
+
node_style = f"background-color: {get_pink_shade_hex(float(result)/cpu_time)};"
|
|
149
154
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
body = f'<span class="tf-nc custom-tooltip" style="{node_style}">'
|
|
154
|
-
body += '<div class="node-body">'
|
|
155
|
+
body = f"<span class=\"tf-nc custom-tooltip\" style=\"{node_style}\">"
|
|
156
|
+
body += "<div class=\"node-body\">"
|
|
155
157
|
new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ")
|
|
156
158
|
formatted_num = f"{float(result):.4f}"
|
|
157
159
|
body += f"<p><b>{new_name}</b> </p><p>time: {formatted_num} seconds</p>"
|
|
158
|
-
body += f
|
|
159
|
-
if width > 0:
|
|
160
|
+
body += f"<span class=\"tooltip-text\"> {extra_info} </span>"
|
|
161
|
+
if (width > 0):
|
|
160
162
|
body += f"<p>cardinality: {card}</p>"
|
|
161
163
|
body += f"<p>estimate: {est}</p>"
|
|
162
164
|
body += f"<p>width: {width} bytes</p>"
|
|
163
|
-
# TODO: Expand on timing. Usually available from a detailed profiling
|
|
165
|
+
# TODO: Expand on timing. Usually available from a detailed profiling
|
|
164
166
|
body += "</div>"
|
|
165
167
|
body += "</span>"
|
|
166
168
|
return body
|
|
167
169
|
|
|
168
170
|
|
|
169
|
-
def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:
|
|
171
|
+
def generate_tree_recursive(json_graph: object, cpu_time: float) -> str:
|
|
170
172
|
node_prefix_html = "<li>"
|
|
171
173
|
node_suffix_html = "</li>"
|
|
172
174
|
|
|
173
175
|
extra_info = ""
|
|
174
176
|
estimate = 0
|
|
175
|
-
for key in json_graph[
|
|
176
|
-
value = json_graph[
|
|
177
|
-
if key == "Estimated Cardinality":
|
|
177
|
+
for key in json_graph['extra_info']:
|
|
178
|
+
value = json_graph['extra_info'][key]
|
|
179
|
+
if (key == "Estimated Cardinality"):
|
|
178
180
|
estimate = int(value)
|
|
179
181
|
else:
|
|
180
182
|
extra_info += f"{key}: {value} <br>"
|
|
181
183
|
cardinality = json_graph["operator_cardinality"]
|
|
182
|
-
width = int(json_graph["result_set_size"]
|
|
184
|
+
width = int(json_graph["result_set_size"]/max(1,cardinality))
|
|
183
185
|
|
|
184
186
|
# get rid of some typically long names
|
|
185
187
|
extra_info = re.sub(r"__internal_\s*", "__", extra_info)
|
|
186
188
|
extra_info = re.sub(r"compress_integral\s*", "compress", extra_info)
|
|
187
189
|
|
|
188
|
-
node_body = get_node_body(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
cardinality,
|
|
193
|
-
estimate,
|
|
194
|
-
width,
|
|
195
|
-
re.sub(r",\s*", ", ", extra_info),
|
|
196
|
-
)
|
|
190
|
+
node_body = get_node_body(json_graph["operator_type"],
|
|
191
|
+
json_graph["operator_timing"],
|
|
192
|
+
cpu_time, cardinality, estimate, width,
|
|
193
|
+
re.sub(r",\s*", ", ", extra_info))
|
|
197
194
|
|
|
198
195
|
children_html = ""
|
|
199
|
-
if len(json_graph[
|
|
196
|
+
if len(json_graph['children']) >= 1:
|
|
200
197
|
children_html += "<ul>"
|
|
201
198
|
for child in json_graph["children"]:
|
|
202
199
|
children_html += generate_tree_recursive(child, cpu_time)
|
|
@@ -205,12 +202,12 @@ def generate_tree_recursive(json_graph: object, cpu_time: float) -> str: # noqa
|
|
|
205
202
|
|
|
206
203
|
|
|
207
204
|
# For generating the table in the top left.
|
|
208
|
-
def generate_timing_html(graph_json: object, query_timings: object) -> object:
|
|
205
|
+
def generate_timing_html(graph_json: object, query_timings: object) -> object:
|
|
209
206
|
json_graph = json.loads(graph_json)
|
|
210
207
|
gather_timing_information(json_graph, query_timings)
|
|
211
|
-
total_time = float(json_graph.get(
|
|
208
|
+
total_time = float(json_graph.get('operator_timing') or json_graph.get('latency'))
|
|
212
209
|
table_head = """
|
|
213
|
-
<table class=\"styled-table\">
|
|
210
|
+
<table class=\"styled-table\">
|
|
214
211
|
<thead>
|
|
215
212
|
<tr>
|
|
216
213
|
<th>Phase</th>
|
|
@@ -227,7 +224,7 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object:
|
|
|
227
224
|
all_phases = query_timings.get_phases()
|
|
228
225
|
query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time))
|
|
229
226
|
query_timings.add_node_timing(NodeTiming("Execution Time", execution_time))
|
|
230
|
-
all_phases = ["TOTAL TIME", "Execution Time"
|
|
227
|
+
all_phases = ["TOTAL TIME", "Execution Time"] + all_phases
|
|
231
228
|
for phase in all_phases:
|
|
232
229
|
summarized_phase = query_timings.get_summary_phase_timings(phase)
|
|
233
230
|
summarized_phase.calculate_percentage(total_time)
|
|
@@ -243,48 +240,55 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object:
|
|
|
243
240
|
return table_head + table_body
|
|
244
241
|
|
|
245
242
|
|
|
246
|
-
def generate_tree_html(graph_json: object) -> str:
|
|
243
|
+
def generate_tree_html(graph_json: object) -> str:
|
|
247
244
|
json_graph = json.loads(graph_json)
|
|
248
|
-
cpu_time = float(json_graph[
|
|
249
|
-
tree_prefix =
|
|
245
|
+
cpu_time = float(json_graph['cpu_time'])
|
|
246
|
+
tree_prefix = "<div class=\"tf-tree tf-gap-sm\"> \n <ul>"
|
|
250
247
|
tree_suffix = "</ul> </div>"
|
|
251
248
|
# first level of json is general overview
|
|
252
|
-
#
|
|
253
|
-
tree_body = generate_tree_recursive(json_graph[
|
|
249
|
+
# FIXME: make sure json output first level always has only 1 level
|
|
250
|
+
tree_body = generate_tree_recursive(json_graph['children'][0], cpu_time)
|
|
254
251
|
return tree_prefix + tree_body + tree_suffix
|
|
255
252
|
|
|
256
253
|
|
|
257
|
-
def generate_ipython(json_input: str) -> str:
|
|
254
|
+
def generate_ipython(json_input: str) -> str:
|
|
258
255
|
from IPython.core.display import HTML
|
|
259
256
|
|
|
260
|
-
html_output = generate_html(json_input, False)
|
|
257
|
+
html_output = generate_html(json_input, False)
|
|
261
258
|
|
|
262
|
-
return HTML(
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
259
|
+
return HTML(("\n"
|
|
260
|
+
" ${CSS}\n"
|
|
261
|
+
" ${LIBRARIES}\n"
|
|
262
|
+
" <div class=\"chart\" id=\"query-profile\"></div>\n"
|
|
263
|
+
" ${CHART_SCRIPT}\n"
|
|
264
|
+
" ").replace("${CSS}", html_output['css']).replace('${CHART_SCRIPT}',
|
|
265
|
+
html_output['chart_script']).replace(
|
|
266
|
+
'${LIBRARIES}', html_output['libraries']))
|
|
268
267
|
|
|
269
268
|
|
|
270
|
-
def generate_style_html(graph_json: str, include_meta_info: bool) -> None:
|
|
271
|
-
treeflex_css =
|
|
269
|
+
def generate_style_html(graph_json: str, include_meta_info: bool) -> None:
|
|
270
|
+
treeflex_css = "<link rel=\"stylesheet\" href=\"https://unpkg.com/treeflex/dist/css/treeflex.css\">\n"
|
|
272
271
|
css = "<style>\n"
|
|
273
272
|
css += qgraph_css + "\n"
|
|
274
273
|
css += "</style>\n"
|
|
275
|
-
return {
|
|
274
|
+
return {
|
|
275
|
+
'treeflex_css': treeflex_css,
|
|
276
|
+
'duckdb_css': css,
|
|
277
|
+
'libraries': '',
|
|
278
|
+
'chart_script': ''
|
|
279
|
+
}
|
|
276
280
|
|
|
277
281
|
|
|
278
|
-
def gather_timing_information(json: str, query_timings: object) -> None:
|
|
282
|
+
def gather_timing_information(json: str, query_timings: object) -> None:
|
|
279
283
|
# add up all of the times
|
|
280
284
|
# measure each time as a percentage of the total time.
|
|
281
285
|
# then you can return a list of [phase, time, percentage]
|
|
282
|
-
get_child_timings(json[
|
|
286
|
+
get_child_timings(json['children'][0], query_timings)
|
|
283
287
|
|
|
284
288
|
|
|
285
|
-
def translate_json_to_html(input_file: str, output_file: str) -> None:
|
|
289
|
+
def translate_json_to_html(input_file: str, output_file: str) -> None:
|
|
286
290
|
query_timings = AllTimings()
|
|
287
|
-
with open_utf8(input_file,
|
|
291
|
+
with open_utf8(input_file, 'r') as f:
|
|
288
292
|
text = f.read()
|
|
289
293
|
|
|
290
294
|
html_output = generate_style_html(text, True)
|
|
@@ -313,22 +317,23 @@ def translate_json_to_html(input_file: str, output_file: str) -> None: # noqa:
|
|
|
313
317
|
</body>
|
|
314
318
|
</html>
|
|
315
319
|
"""
|
|
316
|
-
html = html.replace("${TREEFLEX_CSS}", html_output[
|
|
317
|
-
html = html.replace("${DUCKDB_CSS}", html_output[
|
|
320
|
+
html = html.replace("${TREEFLEX_CSS}", html_output['treeflex_css'])
|
|
321
|
+
html = html.replace("${DUCKDB_CSS}", html_output['duckdb_css'])
|
|
318
322
|
html = html.replace("${TIMING_TABLE}", timing_table)
|
|
319
|
-
html = html.replace(
|
|
323
|
+
html = html.replace('${TREE}', tree_output)
|
|
320
324
|
f.write(html)
|
|
321
325
|
|
|
322
326
|
|
|
323
|
-
def main() -> None:
|
|
327
|
+
def main() -> None:
|
|
328
|
+
if sys.version_info[0] < 3:
|
|
329
|
+
print("Please use python3")
|
|
330
|
+
exit(1)
|
|
324
331
|
parser = argparse.ArgumentParser(
|
|
325
|
-
prog=
|
|
326
|
-
description=
|
|
327
|
-
|
|
328
|
-
)
|
|
329
|
-
parser.add_argument(
|
|
330
|
-
parser.add_argument("--out", required=False, default=False)
|
|
331
|
-
parser.add_argument("--open", required=False, action="store_true", default=True)
|
|
332
|
+
prog='Query Graph Generator',
|
|
333
|
+
description='Given a json profile output, generate a html file showing the query graph and timings of operators')
|
|
334
|
+
parser.add_argument('profile_input', help='profile input in json')
|
|
335
|
+
parser.add_argument('--out', required=False, default=False)
|
|
336
|
+
parser.add_argument('--open', required=False, action='store_true', default=True)
|
|
332
337
|
args = parser.parse_args()
|
|
333
338
|
|
|
334
339
|
input = args.profile_input
|
|
@@ -351,8 +356,8 @@ def main() -> None: # noqa: D103
|
|
|
351
356
|
translate_json_to_html(input, output)
|
|
352
357
|
|
|
353
358
|
if open_output:
|
|
354
|
-
webbrowser.open(
|
|
359
|
+
webbrowser.open('file://' + os.path.abspath(output), new=2)
|
|
355
360
|
|
|
356
361
|
|
|
357
|
-
if __name__ ==
|
|
362
|
+
if __name__ == '__main__':
|
|
358
363
|
main()
|
duckdb/typing/__init__.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from _duckdb.typing import (
|
|
1
|
+
from _duckdb.typing import (
|
|
2
|
+
DuckDBPyType,
|
|
2
3
|
BIGINT,
|
|
3
4
|
BIT,
|
|
4
5
|
BLOB,
|
|
@@ -7,29 +8,29 @@ from _duckdb.typing import ( # noqa: D104
|
|
|
7
8
|
DOUBLE,
|
|
8
9
|
FLOAT,
|
|
9
10
|
HUGEINT,
|
|
11
|
+
UHUGEINT,
|
|
10
12
|
INTEGER,
|
|
11
13
|
INTERVAL,
|
|
12
14
|
SMALLINT,
|
|
13
15
|
SQLNULL,
|
|
14
16
|
TIME,
|
|
15
|
-
TIME_TZ,
|
|
16
17
|
TIMESTAMP,
|
|
17
18
|
TIMESTAMP_MS,
|
|
18
19
|
TIMESTAMP_NS,
|
|
19
20
|
TIMESTAMP_S,
|
|
20
21
|
TIMESTAMP_TZ,
|
|
22
|
+
TIME_TZ,
|
|
21
23
|
TINYINT,
|
|
22
24
|
UBIGINT,
|
|
23
|
-
UHUGEINT,
|
|
24
25
|
UINTEGER,
|
|
25
26
|
USMALLINT,
|
|
26
27
|
UTINYINT,
|
|
27
28
|
UUID,
|
|
28
|
-
VARCHAR
|
|
29
|
-
DuckDBPyType,
|
|
29
|
+
VARCHAR
|
|
30
30
|
)
|
|
31
31
|
|
|
32
32
|
__all__ = [
|
|
33
|
+
"DuckDBPyType",
|
|
33
34
|
"BIGINT",
|
|
34
35
|
"BIT",
|
|
35
36
|
"BLOB",
|
|
@@ -38,6 +39,7 @@ __all__ = [
|
|
|
38
39
|
"DOUBLE",
|
|
39
40
|
"FLOAT",
|
|
40
41
|
"HUGEINT",
|
|
42
|
+
"UHUGEINT",
|
|
41
43
|
"INTEGER",
|
|
42
44
|
"INTERVAL",
|
|
43
45
|
"SMALLINT",
|
|
@@ -51,11 +53,9 @@ __all__ = [
|
|
|
51
53
|
"TIME_TZ",
|
|
52
54
|
"TINYINT",
|
|
53
55
|
"UBIGINT",
|
|
54
|
-
"UHUGEINT",
|
|
55
56
|
"UINTEGER",
|
|
56
57
|
"USMALLINT",
|
|
57
58
|
"UTINYINT",
|
|
58
59
|
"UUID",
|
|
59
|
-
"VARCHAR"
|
|
60
|
-
"DuckDBPyType",
|
|
60
|
+
"VARCHAR"
|
|
61
61
|
]
|
duckdb/typing/__init__.pyi
CHANGED
|
@@ -32,7 +32,5 @@ class DuckDBPyType:
|
|
|
32
32
|
def __init__(self, type_str: str, connection: DuckDBPyConnection = ...) -> None: ...
|
|
33
33
|
def __repr__(self) -> str: ...
|
|
34
34
|
def __eq__(self, other) -> bool: ...
|
|
35
|
-
def __getattr__(self, name: str):
|
|
36
|
-
|
|
37
|
-
def __getitem__(self, name: str):
|
|
38
|
-
DuckDBPyType
|
|
35
|
+
def __getattr__(self, name: str): DuckDBPyType
|
|
36
|
+
def __getitem__(self, name: str): DuckDBPyType
|
duckdb/udf.py
CHANGED
|
@@ -1,15 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def vectorized(func: Callable) -> Callable:
|
|
6
|
-
"""Decorate a function with annotated function parameters.
|
|
7
|
-
|
|
8
|
-
This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
|
|
9
|
-
pyarrow array(s) as output.
|
|
1
|
+
def vectorized(func):
|
|
2
|
+
"""
|
|
3
|
+
Decorate a function with annotated function parameters, so DuckDB can infer that the function should be provided with pyarrow arrays and should expect pyarrow array(s) as output
|
|
10
4
|
"""
|
|
11
|
-
import types
|
|
12
5
|
from inspect import signature
|
|
6
|
+
import types
|
|
13
7
|
|
|
14
8
|
new_func = types.FunctionType(func.__code__, func.__globals__, func.__name__, func.__defaults__, func.__closure__)
|
|
15
9
|
# Construct the annotations:
|
|
@@ -17,6 +11,7 @@ def vectorized(func: Callable) -> Callable:
|
|
|
17
11
|
|
|
18
12
|
new_annotations = {}
|
|
19
13
|
sig = signature(func)
|
|
14
|
+
sig.parameters
|
|
20
15
|
for param in sig.parameters:
|
|
21
16
|
new_annotations[param] = pa.lib.ChunkedArray
|
|
22
17
|
|
duckdb/value/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# noqa: D104
|