qutePandas 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qutePandas/apply/__init__.py +4 -0
- qutePandas/apply/apply.py +70 -0
- qutePandas/apply/apply_col.py +50 -0
- qutePandas/cleaning/__init__.py +6 -0
- qutePandas/cleaning/dropna.py +26 -0
- qutePandas/cleaning/dropna_col.py +28 -0
- qutePandas/cleaning/fillna.py +36 -0
- qutePandas/cleaning/remove_duplicates.py +28 -0
- qutePandas/core/__init__.py +9 -0
- qutePandas/core/connection.py +153 -0
- qutePandas/core/dataframe.py +121 -0
- qutePandas/core/display.py +183 -0
- qutePandas/grouping/__init__.py +4 -0
- qutePandas/grouping/groupby_avg.py +39 -0
- qutePandas/grouping/groupby_sum.py +40 -0
- qutePandas/indexing/__init__.py +3 -0
- qutePandas/indexing/iloc.py +73 -0
- qutePandas/indexing/loc.py +53 -0
- qutePandas/introspection/__init__.py +0 -0
- qutePandas/introspection/dtypes.py +25 -0
- qutePandas/io/__init__.py +4 -0
- qutePandas/io/from_csv.py +27 -0
- qutePandas/io/to_csv.py +39 -0
- qutePandas/joining/__init__.py +3 -0
- qutePandas/joining/merge.py +94 -0
- qutePandas/transformation/__init__.py +5 -0
- qutePandas/transformation/cast.py +71 -0
- qutePandas/transformation/drop_col.py +53 -0
- qutePandas/transformation/rename.py +35 -0
- {qutepandas-1.0.0.dist-info → qutepandas-1.1.0.dist-info}/METADATA +1 -1
- qutepandas-1.1.0.dist-info/RECORD +35 -0
- qutepandas-1.0.0.dist-info/RECORD +0 -6
- {qutepandas-1.0.0.dist-info → qutepandas-1.1.0.dist-info}/WHEEL +0 -0
- {qutepandas-1.0.0.dist-info → qutepandas-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as _pd
|
|
3
|
+
import builtins
|
|
4
|
+
|
|
5
|
+
def py(obj):
|
|
6
|
+
"""
|
|
7
|
+
Converts a PyKX object to its Python equivalent.
|
|
8
|
+
|
|
9
|
+
Parameters
|
|
10
|
+
----------
|
|
11
|
+
obj : pykx.K
|
|
12
|
+
The PyKX object to convert.
|
|
13
|
+
|
|
14
|
+
Returns
|
|
15
|
+
-------
|
|
16
|
+
any
|
|
17
|
+
The Python equivalent of the PyKX object.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
return obj.py()
|
|
21
|
+
except Exception as e:
|
|
22
|
+
raise RuntimeError(f"Failed to convert PyKX object to Python: {e}")
|
|
23
|
+
|
|
24
|
+
def np(obj):
|
|
25
|
+
"""
|
|
26
|
+
Converts a PyKX object to a NumPy array.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
obj : pykx.K
|
|
31
|
+
The PyKX object to convert.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
numpy.ndarray
|
|
36
|
+
The NumPy array equivalent.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
return obj.np()
|
|
40
|
+
except Exception as e:
|
|
41
|
+
raise RuntimeError(f"Failed to convert PyKX object to NumPy: {e}")
|
|
42
|
+
|
|
43
|
+
def pd(obj):
|
|
44
|
+
"""
|
|
45
|
+
Converts a PyKX object to a Pandas DataFrame or Series.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
obj : pykx.K
|
|
50
|
+
The PyKX object to convert.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
pandas.DataFrame or pandas.Series
|
|
55
|
+
The Pandas equivalent.
|
|
56
|
+
"""
|
|
57
|
+
try:
|
|
58
|
+
return obj.pd()
|
|
59
|
+
except Exception as e:
|
|
60
|
+
raise RuntimeError(f"Failed to convert PyKX object to Pandas: {e}")
|
|
61
|
+
|
|
62
|
+
def pa(obj):
|
|
63
|
+
"""
|
|
64
|
+
Converts a PyKX object to a PyArrow Table.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
obj : pykx.K
|
|
69
|
+
The PyKX object to convert.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
pyarrow.Table
|
|
74
|
+
The PyArrow equivalent.
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
return obj.pa()
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise RuntimeError(f"Failed to convert PyKX object to PyArrow: {e}")
|
|
80
|
+
|
|
81
|
+
def pt(obj):
|
|
82
|
+
"""
|
|
83
|
+
Converts a PyKX object to a PyTorch Tensor.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
obj : pykx.K
|
|
88
|
+
The PyKX object to convert.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
torch.Tensor
|
|
93
|
+
The PyTorch equivalent.
|
|
94
|
+
"""
|
|
95
|
+
try:
|
|
96
|
+
return obj.pt()
|
|
97
|
+
except Exception as e:
|
|
98
|
+
raise RuntimeError(f"Failed to convert PyKX object to PyTorch: {e}")
|
|
99
|
+
|
|
100
|
+
def print(obj, head=None, tail=None):
|
|
101
|
+
"""
|
|
102
|
+
Prints a PyKX table in a formatted ASCII box.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
obj : pykx.Table or pykx.KeyedTable
|
|
107
|
+
The table to print.
|
|
108
|
+
head : int, optional
|
|
109
|
+
Number of rows to show from the beginning.
|
|
110
|
+
tail : int, optional
|
|
111
|
+
Number of rows to show from the end.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
if isinstance(obj, _pd.DataFrame):
|
|
115
|
+
if len(obj.columns) == 0:
|
|
116
|
+
builtins.print("Empty DataFrame")
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
if isinstance(obj, kx.Dictionary):
|
|
120
|
+
try:
|
|
121
|
+
is_empty = kx.q('{(0=count key x) and (0=count value x)}', obj).py()
|
|
122
|
+
if is_empty:
|
|
123
|
+
builtins.print("Empty DataFrame")
|
|
124
|
+
return
|
|
125
|
+
else:
|
|
126
|
+
builtins.print(obj)
|
|
127
|
+
return
|
|
128
|
+
except:
|
|
129
|
+
builtins.print(obj)
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
if isinstance(obj, (kx.Table, kx.KeyedTable)):
|
|
133
|
+
if head is not None:
|
|
134
|
+
table = kx.q('{[t;n] n sublist t}', obj, head)
|
|
135
|
+
elif tail is not None:
|
|
136
|
+
table = kx.q('{[t;n] neg[n] sublist t}', obj, tail)
|
|
137
|
+
else:
|
|
138
|
+
table = obj
|
|
139
|
+
|
|
140
|
+
cols = kx.q('cols', table).py()
|
|
141
|
+
if len(cols) == 0:
|
|
142
|
+
builtins.print("Empty Table")
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
rows_data = []
|
|
146
|
+
for col in cols:
|
|
147
|
+
col_data = kx.q('{[t;c] t[c]}', table, col).py()
|
|
148
|
+
rows_data.append(col_data)
|
|
149
|
+
|
|
150
|
+
num_rows = len(rows_data[0]) if rows_data else 0
|
|
151
|
+
|
|
152
|
+
col_widths = []
|
|
153
|
+
for i, col in enumerate(cols):
|
|
154
|
+
max_width = len(str(col))
|
|
155
|
+
for j in range(num_rows):
|
|
156
|
+
val_width = len(str(rows_data[i][j]))
|
|
157
|
+
max_width = max(max_width, val_width)
|
|
158
|
+
col_widths.append(max_width)
|
|
159
|
+
|
|
160
|
+
def format_row(values):
|
|
161
|
+
parts = []
|
|
162
|
+
for val, width in zip(values, col_widths):
|
|
163
|
+
parts.append(str(val).ljust(width))
|
|
164
|
+
return "│ " + " │ ".join(parts) + " │"
|
|
165
|
+
|
|
166
|
+
total_width = sum(col_widths) + 3 * len(cols) + 1
|
|
167
|
+
top_border = "┌" + "┬".join("─" * (w + 2) for w in col_widths) + "┐"
|
|
168
|
+
mid_border = "├" + "┼".join("─" * (w + 2) for w in col_widths) + "┤"
|
|
169
|
+
bot_border = "└" + "┴".join("─" * (w + 2) for w in col_widths) + "┘"
|
|
170
|
+
|
|
171
|
+
builtins.print(top_border)
|
|
172
|
+
builtins.print(format_row(cols))
|
|
173
|
+
builtins.print(mid_border)
|
|
174
|
+
|
|
175
|
+
for row_idx in range(num_rows):
|
|
176
|
+
row_values = [rows_data[col_idx][row_idx] for col_idx in range(len(cols))]
|
|
177
|
+
builtins.print(format_row(row_values))
|
|
178
|
+
|
|
179
|
+
builtins.print(bot_border)
|
|
180
|
+
else:
|
|
181
|
+
builtins.print(obj)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
raise RuntimeError(f"Failed to print table: {e}")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..utils import _ensure_q_table, _handle_return, _validate_columns
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def groupby_avg(df, by_cols, avg_col, return_type='q'):
|
|
7
|
+
"""
|
|
8
|
+
Groups table by specified column(s) and averages target column.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
df : pandas.DataFrame or pykx.Table
|
|
13
|
+
Input DataFrame.
|
|
14
|
+
by_cols : str or list of str
|
|
15
|
+
Group by column(s).
|
|
16
|
+
avg_col : str
|
|
17
|
+
Column to average.
|
|
18
|
+
return_type : str, default 'q'
|
|
19
|
+
Desired return type ('p' or 'q').
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
pandas.DataFrame or pykx.Table
|
|
24
|
+
Grouped and averaged DataFrame.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
q_table = _ensure_q_table(df)
|
|
28
|
+
if isinstance(by_cols, str):
|
|
29
|
+
by_cols = [by_cols]
|
|
30
|
+
|
|
31
|
+
_validate_columns(q_table, by_cols + [avg_col])
|
|
32
|
+
by_clause = ",".join(by_cols)
|
|
33
|
+
|
|
34
|
+
keyed_result = kx.q(f"{{select avg {avg_col} by {by_clause} from x}}", q_table)
|
|
35
|
+
result = kx.q("{0!x}", keyed_result)
|
|
36
|
+
return _handle_return(result, return_type)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise RuntimeError(f"Failed to group by avg: {e}")
|
|
39
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..utils import _ensure_q_table, _handle_return, _validate_columns
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def groupby_sum(df, by_cols, sum_col, return_type='q'):
|
|
7
|
+
"""
|
|
8
|
+
Groups table by specified column(s) and sums target column.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
df : pandas.DataFrame or pykx.Table
|
|
13
|
+
Input DataFrame.
|
|
14
|
+
by_cols : str or list of str
|
|
15
|
+
Group by column(s).
|
|
16
|
+
sum_col : str
|
|
17
|
+
Column to sum.
|
|
18
|
+
return_type : str, default 'q'
|
|
19
|
+
Desired return type ('p' or 'q').
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
pandas.DataFrame or pykx.Table
|
|
24
|
+
Grouped and summed DataFrame.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
q_table = _ensure_q_table(df)
|
|
28
|
+
if isinstance(by_cols, str):
|
|
29
|
+
by_cols = [by_cols]
|
|
30
|
+
|
|
31
|
+
_validate_columns(q_table, by_cols + [sum_col])
|
|
32
|
+
by_clause = ",".join(by_cols)
|
|
33
|
+
|
|
34
|
+
keyed_result = kx.q(f"{{select sum {sum_col} by {by_clause} from x}}", q_table)
|
|
35
|
+
result = kx.q("{0!x}", keyed_result)
|
|
36
|
+
|
|
37
|
+
return _handle_return(result, return_type)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
raise RuntimeError(f"Failed to group by sum: {e}")
|
|
40
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
from ..utils import _ensure_q_table, _handle_return, _validate_columns
|
|
3
|
+
|
|
4
|
+
def iloc(df, rows=None, cols=None, return_type='q'):
|
|
5
|
+
"""
|
|
6
|
+
Pure integer-location based indexing for selection by position.
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
df : pykx.Table or pd.DataFrame
|
|
11
|
+
Input data.
|
|
12
|
+
rows : int, list, slice, or None
|
|
13
|
+
Row indices to select.
|
|
14
|
+
cols : int, list, slice, or None
|
|
15
|
+
Column indices to select.
|
|
16
|
+
return_type : str, default 'q'
|
|
17
|
+
'q' for pykx.Table, 'p' for pandas DataFrame.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
pykx.Table or pd.DataFrame
|
|
22
|
+
Subset of the inputs.
|
|
23
|
+
"""
|
|
24
|
+
table = _ensure_q_table(df)
|
|
25
|
+
|
|
26
|
+
count = kx.q("count", table).py()
|
|
27
|
+
all_cols = kx.q("cols", table).py()
|
|
28
|
+
|
|
29
|
+
if rows is None:
|
|
30
|
+
row_indices = None
|
|
31
|
+
elif isinstance(rows, int):
|
|
32
|
+
q_rows = rows
|
|
33
|
+
if q_rows < 0: q_rows += count
|
|
34
|
+
row_indices = [q_rows]
|
|
35
|
+
elif isinstance(rows, slice):
|
|
36
|
+
row_indices = list(range(*rows.indices(count)))
|
|
37
|
+
else:
|
|
38
|
+
row_indices = list(rows)
|
|
39
|
+
row_indices = [r + count if r < 0 else r for r in row_indices]
|
|
40
|
+
|
|
41
|
+
if cols is None:
|
|
42
|
+
target_cols = None
|
|
43
|
+
else:
|
|
44
|
+
if isinstance(cols, int):
|
|
45
|
+
target_cols = [all_cols[cols]]
|
|
46
|
+
elif isinstance(cols, slice):
|
|
47
|
+
target_cols = [all_cols[i] for i in range(*cols.indices(len(all_cols)))]
|
|
48
|
+
else:
|
|
49
|
+
target_cols = [all_cols[i] for i in cols]
|
|
50
|
+
|
|
51
|
+
if row_indices is None and target_cols is None:
|
|
52
|
+
return _handle_return(table, return_type)
|
|
53
|
+
|
|
54
|
+
if row_indices is not None and target_cols is not None:
|
|
55
|
+
q_query = '{?[x y;();0b;z]}'
|
|
56
|
+
syms = kx.SymbolVector(target_cols)
|
|
57
|
+
cols_dict = kx.q('!', syms, syms)
|
|
58
|
+
q_res = kx.q(q_query, table, kx.LongVector(row_indices), cols_dict)
|
|
59
|
+
|
|
60
|
+
elif row_indices is not None:
|
|
61
|
+
q_query = '{x y}'
|
|
62
|
+
q_res = kx.q(q_query, table, kx.LongVector(row_indices))
|
|
63
|
+
|
|
64
|
+
elif target_cols is not None:
|
|
65
|
+
q_query = '{?[x;();0b;y]}'
|
|
66
|
+
syms = kx.SymbolVector(target_cols)
|
|
67
|
+
cols_dict = kx.q('!', syms, syms)
|
|
68
|
+
q_res = kx.q(q_query, table, cols_dict)
|
|
69
|
+
|
|
70
|
+
else:
|
|
71
|
+
q_res = table
|
|
72
|
+
|
|
73
|
+
return _handle_return(q_res, return_type)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
from ..utils import _ensure_q_table, _handle_return, _validate_columns
|
|
3
|
+
|
|
4
|
+
def loc(df, rows=None, cols=None, return_type='q'):
|
|
5
|
+
"""
|
|
6
|
+
Pure label-location based indexing for selection by label (or boolean array).
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
df : pykx.Table or pd.DataFrame
|
|
11
|
+
Input data.
|
|
12
|
+
rows : list of bool, pykx.BooleanVector, or None
|
|
13
|
+
Boolean mask for row selection.
|
|
14
|
+
cols : str, list of str, or None
|
|
15
|
+
Column names to select.
|
|
16
|
+
return_type : str, default 'q'
|
|
17
|
+
'q' for pykx.Table, 'p' for pandas DataFrame.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
pykx.Table or pd.DataFrame
|
|
22
|
+
Subset of the inputs.
|
|
23
|
+
"""
|
|
24
|
+
table = _ensure_q_table(df)
|
|
25
|
+
|
|
26
|
+
q_rows = None
|
|
27
|
+
if rows is not None:
|
|
28
|
+
if isinstance(rows, list):
|
|
29
|
+
q_rows = kx.toq(rows)
|
|
30
|
+
else:
|
|
31
|
+
q_rows = rows
|
|
32
|
+
|
|
33
|
+
q_cols = None
|
|
34
|
+
if cols is not None:
|
|
35
|
+
if isinstance(cols, str):
|
|
36
|
+
cols = [cols]
|
|
37
|
+
_validate_columns(table, cols)
|
|
38
|
+
syms = kx.SymbolVector(cols)
|
|
39
|
+
q_cols = kx.q('!', syms, syms)
|
|
40
|
+
|
|
41
|
+
if q_rows is not None and q_cols is not None:
|
|
42
|
+
q_res = kx.q("{?[x;enlist y;0b;z]}", table, q_rows, q_cols)
|
|
43
|
+
|
|
44
|
+
elif q_rows is not None:
|
|
45
|
+
q_res = kx.q("{?[x;enlist y;0b;()]}", table, q_rows)
|
|
46
|
+
|
|
47
|
+
elif q_cols is not None:
|
|
48
|
+
q_res = kx.q("{?[x;();0b;y]}", table, q_cols)
|
|
49
|
+
|
|
50
|
+
else:
|
|
51
|
+
q_res = table
|
|
52
|
+
|
|
53
|
+
return _handle_return(q_res, return_type)
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
from ..utils import _ensure_q_table, _handle_return
|
|
3
|
+
|
|
4
|
+
def dtypes(df, return_type='q'):
|
|
5
|
+
"""
|
|
6
|
+
Returns the data types of each column in the DataFrame.
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
df : pandas.DataFrame or pykx.Table
|
|
11
|
+
Input DataFrame.
|
|
12
|
+
return_type : str, default 'q'
|
|
13
|
+
Desired return type ('p' or 'q').
|
|
14
|
+
|
|
15
|
+
Returns
|
|
16
|
+
-------
|
|
17
|
+
pykx.MetaTable or pandas.DataFrame
|
|
18
|
+
Table containing column names and their kdb+ types.
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
q_table = _ensure_q_table(df)
|
|
22
|
+
res = kx.q("{meta x}", q_table)
|
|
23
|
+
return _handle_return(res, return_type)
|
|
24
|
+
except Exception as e:
|
|
25
|
+
raise RuntimeError(f"Failed to retrieve data types: {e}")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..utils import _handle_return
|
|
4
|
+
import pyarrow.csv as pa_csv
|
|
5
|
+
|
|
6
|
+
def from_csv(path, return_type='q'):
|
|
7
|
+
"""
|
|
8
|
+
Imports DataFrame from CSV file.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
path : str
|
|
13
|
+
File path to load CSV from.
|
|
14
|
+
return_type : str, default 'q'
|
|
15
|
+
Desired return type ('p' or 'q').
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
pandas.DataFrame or pykx.Table
|
|
20
|
+
Loaded DataFrame.
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
pa_tab = pa_csv.read_csv(path)
|
|
24
|
+
q_table = kx.toq(pa_tab)
|
|
25
|
+
return _handle_return(q_table, return_type)
|
|
26
|
+
except Exception as e:
|
|
27
|
+
raise RuntimeError(f"Failed to load CSV file {path}: {e}")
|
qutePandas/io/to_csv.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pyarrow.csv as pa_csv
|
|
4
|
+
from ..utils import _ensure_q_table
|
|
5
|
+
|
|
6
|
+
def to_csv(df, path):
|
|
7
|
+
"""
|
|
8
|
+
Exports DataFrame to CSV file.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
df : pandas.DataFrame or pykx.Table
|
|
13
|
+
Input DataFrame.
|
|
14
|
+
path : str
|
|
15
|
+
File path to save CSV.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
str
|
|
20
|
+
Success message.
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
if hasattr(df, 'pa'):
|
|
24
|
+
pa_tab = df.pa()
|
|
25
|
+
pa_csv.write_csv(pa_tab, path)
|
|
26
|
+
return f"Table saved to: {path}"
|
|
27
|
+
|
|
28
|
+
if isinstance(df, (kx.Table, kx.KeyedTable)):
|
|
29
|
+
pd_df = df.pd()
|
|
30
|
+
elif isinstance(df, pd.DataFrame):
|
|
31
|
+
pd_df = df
|
|
32
|
+
else:
|
|
33
|
+
pd_df = pd.DataFrame(df)
|
|
34
|
+
|
|
35
|
+
pd_df.to_csv(path, index=False)
|
|
36
|
+
return f"Table saved to: {path}"
|
|
37
|
+
|
|
38
|
+
except Exception as e:
|
|
39
|
+
raise RuntimeError(f"Failed to save table to CSV: {e}")
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..utils import _ensure_q_table, _handle_return
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, return_type="q"):
|
|
7
|
+
"""
|
|
8
|
+
Merge DataFrame or pykx.Table objects with a database-style join.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
left : pandas.DataFrame or pykx.Table
|
|
13
|
+
Left object.
|
|
14
|
+
right : pandas.DataFrame or pykx.Table
|
|
15
|
+
Right object.
|
|
16
|
+
how : {'left', 'right', 'outer', 'inner'}, default 'inner'
|
|
17
|
+
Type of merge to be performed.
|
|
18
|
+
on : label or list
|
|
19
|
+
Column or index level names to join on. These must be found in both DataFrames.
|
|
20
|
+
left_on : label or list
|
|
21
|
+
Column or index level names to join on in the left DataFrame.
|
|
22
|
+
right_on : label or list
|
|
23
|
+
Column or index level names to join on in the right DataFrame.
|
|
24
|
+
left_index : bool, default False
|
|
25
|
+
Use the index from the left DataFrame as the join key(s). (Currently not fully supported)
|
|
26
|
+
right_index : bool, default False
|
|
27
|
+
Use the index from the right DataFrame as the join key(s). (Currently not fully supported)
|
|
28
|
+
sort : bool, default False
|
|
29
|
+
Sort the join keys lexicographically in the result DataFrame. (Currently not fully supported)
|
|
30
|
+
return_type : str, default 'q'
|
|
31
|
+
Desired return type ('p' or 'q').
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
pandas.DataFrame or pykx.Table
|
|
36
|
+
Merged object.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
q_left = _ensure_q_table(left)
|
|
40
|
+
q_right = _ensure_q_table(right)
|
|
41
|
+
|
|
42
|
+
if on is None and left_on is None and right_on is None:
|
|
43
|
+
left_cols = set(kx.q("cols", q_left).py())
|
|
44
|
+
right_cols = set(kx.q("cols", q_right).py())
|
|
45
|
+
common = list(left_cols.intersection(right_cols))
|
|
46
|
+
if not common:
|
|
47
|
+
raise ValueError("No common columns found and no join keys specified.")
|
|
48
|
+
on = common
|
|
49
|
+
|
|
50
|
+
l_keys = on if left_on is None else left_on
|
|
51
|
+
r_keys = on if right_on is None else right_on
|
|
52
|
+
|
|
53
|
+
if isinstance(l_keys, str):
|
|
54
|
+
l_keys = [l_keys]
|
|
55
|
+
if isinstance(r_keys, str):
|
|
56
|
+
r_keys = [r_keys]
|
|
57
|
+
|
|
58
|
+
if l_keys != r_keys:
|
|
59
|
+
update_clauses = []
|
|
60
|
+
for lk, rk in zip(l_keys, r_keys):
|
|
61
|
+
if lk != rk:
|
|
62
|
+
update_clauses.append(f"{lk}:{rk}")
|
|
63
|
+
|
|
64
|
+
if update_clauses:
|
|
65
|
+
update_str = ",".join(update_clauses)
|
|
66
|
+
q_right = kx.q(f"{{update {update_str} from x}}", q_right)
|
|
67
|
+
|
|
68
|
+
r_keys = l_keys
|
|
69
|
+
|
|
70
|
+
key_cols = "`" + "`".join(l_keys)
|
|
71
|
+
|
|
72
|
+
if how == 'inner':
|
|
73
|
+
keyed_right = kx.q(f'{{ {key_cols} xkey x }}', q_right)
|
|
74
|
+
result = kx.q("{x ij y}", q_left, keyed_right)
|
|
75
|
+
elif how == 'left':
|
|
76
|
+
keyed_right = kx.q(f'{{ {key_cols} xkey x }}', q_right)
|
|
77
|
+
result = kx.q("{x lj y}", q_left, keyed_right)
|
|
78
|
+
elif how == 'right':
|
|
79
|
+
keyed_left = kx.q(f'{{ {key_cols} xkey x }}', q_left)
|
|
80
|
+
result = kx.q("{x lj y}", q_right, keyed_left)
|
|
81
|
+
left_all_cols = kx.q("cols", q_left).py()
|
|
82
|
+
right_all_cols = kx.q("cols", q_right).py()
|
|
83
|
+
target_cols = "`" + "`".join(left_all_cols + [c for c in right_all_cols if c not in left_all_cols])
|
|
84
|
+
result = kx.q(f"{{ {target_cols} xcols x }}", result)
|
|
85
|
+
elif how == 'outer':
|
|
86
|
+
keyed_left = kx.q(f'{{ {key_cols} xkey x }}', q_left)
|
|
87
|
+
keyed_right = kx.q(f'{{ {key_cols} xkey x }}', q_right)
|
|
88
|
+
result = kx.q("{0! x uj y}", keyed_left, keyed_right)
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError(f"Invalid how: {how}. Must be one of 'left', 'right', 'outer', 'inner'.")
|
|
91
|
+
|
|
92
|
+
return _handle_return(result, return_type)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
raise RuntimeError(f"Failed to perform {how} join: {e}")
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import pykx as kx
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..utils import _ensure_q_table, _handle_return
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def cast(df, col, dtype, return_type='q'):
|
|
7
|
+
"""
|
|
8
|
+
Converts column to specified data type.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
df : pandas.DataFrame or pykx.Table
|
|
13
|
+
Input DataFrame.
|
|
14
|
+
col : str
|
|
15
|
+
Column name to cast.
|
|
16
|
+
dtype : str
|
|
17
|
+
Target data type ('i' for int, 'f' for float, 's' for symbol, etc.).
|
|
18
|
+
return_type : str, default 'q'
|
|
19
|
+
Desired return type ('p' or 'q').
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
pandas.DataFrame or pykx.Table
|
|
24
|
+
DataFrame with column cast to new type.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
q_map = {
|
|
28
|
+
'int64': 'j', 'int32': 'i', 'int': 'i', 'long': 'j',
|
|
29
|
+
'float64': 'f', 'float32': 'e', 'float': 'f', 'real': 'e',
|
|
30
|
+
'object': 's', 'string': 'C', 'str': 'C',
|
|
31
|
+
'j': 'j', 'i': 'i', 'h': 'h', 'f': 'f', 'e': 'e', 's': 's', 'c': 'c'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
q_type = q_map.get(dtype, dtype)
|
|
35
|
+
q_table = _ensure_q_table(df)
|
|
36
|
+
|
|
37
|
+
if len(q_type) != 1:
|
|
38
|
+
raise ValueError(f"Unsupported q cast type: {dtype}")
|
|
39
|
+
|
|
40
|
+
curr_type = kx.q(f'{{type x`{col}}}', q_table).py()
|
|
41
|
+
type_to_code = {
|
|
42
|
+
'j': 7, 'i': 6, 'h': 5, 'f': 9, 'e': 8, 's': 11, 'c': 10, 'b': 1
|
|
43
|
+
}
|
|
44
|
+
target_code = type_to_code.get(q_type.lower())
|
|
45
|
+
|
|
46
|
+
if target_code is not None and abs(curr_type) == target_code:
|
|
47
|
+
return _handle_return(q_table, return_type)
|
|
48
|
+
|
|
49
|
+
is_parsing = curr_type in (0, 10)
|
|
50
|
+
q_char = q_type.upper() if is_parsing else q_type.lower()
|
|
51
|
+
|
|
52
|
+
if q_char.lower() == 's':
|
|
53
|
+
result = kx.q(
|
|
54
|
+
f'{{update {col}:`$ string {col} from x}}',
|
|
55
|
+
q_table
|
|
56
|
+
)
|
|
57
|
+
elif q_char in ('i', 'j'):
|
|
58
|
+
result = kx.q(
|
|
59
|
+
f'{{update {col}:"{q_char}"$(({col}>=0)*floor {col} + ({col}<0)*ceiling {col}) from x}}',
|
|
60
|
+
q_table
|
|
61
|
+
)
|
|
62
|
+
else:
|
|
63
|
+
result = kx.q(
|
|
64
|
+
f'{{update {col}:"{q_char}"${col} from x}}',
|
|
65
|
+
q_table
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return _handle_return(result, return_type)
|
|
69
|
+
|
|
70
|
+
except Exception as e:
|
|
71
|
+
raise RuntimeError(f"Failed to cast column {col} to type {dtype}: {e}")
|