qutePandas 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
qutePandas/__init__.py CHANGED
@@ -1,11 +1,11 @@
1
- """
2
- qutePandas - A pandas-like library for q/kdb+
3
- """
4
-
5
1
  import os
2
+ import sys
6
3
 
7
- def _setup_environment():
8
- """Validates and sets up the environment for PyKX."""
4
+ def _setup_pykx_environment():
5
+ """
6
+ Sets up the environment for PyKX BEFORE any PyKX imports.
7
+ This must run before importing any module that uses PyKX.
8
+ """
9
9
  root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
10
10
  env_path = os.path.join(root, ".env")
11
11
 
@@ -20,17 +20,47 @@ def _setup_environment():
20
20
  if len(parts) == 2:
21
21
  key, value = parts
22
22
  value = value.strip().strip('"').strip("'")
23
+ # Only set if not already in environment
23
24
  if key.strip() and key.strip() not in os.environ:
24
25
  os.environ[key.strip()] = value
25
-
26
+
27
+ # Check for valid license files in priority order
26
28
  qutepandas_home = os.path.expanduser("~/.qutepandas")
27
29
  local_kdb = os.path.join(root, "kdb_lic")
30
+
31
+ license_found = False
32
+ valid_license_path = None
33
+
28
34
  if os.path.exists(os.path.join(local_kdb, "kc.lic")):
29
- os.environ['QLIC'] = local_kdb
35
+ valid_license_path = local_kdb
36
+ license_found = True
30
37
  elif os.path.exists(os.path.join(qutepandas_home, "kc.lic")):
31
- os.environ['QLIC'] = qutepandas_home
38
+ valid_license_path = qutepandas_home
39
+ license_found = True
40
+
41
+ if license_found and valid_license_path:
42
+ os.environ['QLIC'] = valid_license_path
43
+ os.environ.pop('PYKX_UNLICENSED', None)
44
+ elif 'QLIC' in os.environ:
45
+ qlic_path = os.environ['QLIC']
46
+ if not (os.path.exists(os.path.join(qlic_path, "kc.lic")) or
47
+ os.path.exists(os.path.join(qlic_path, "k4.lic"))):
48
+ del os.environ['QLIC']
49
+ if 'QHOME' in os.environ:
50
+ del os.environ['QHOME']
51
+ license_found = False
52
+
53
+ if not license_found and 'PYKX_UNLICENSED' not in os.environ:
54
+ os.environ['PYKX_UNLICENSED'] = 'true'
55
+
56
+ if 'PYKX_RELEASE_GIL' not in os.environ:
57
+ os.environ['PYKX_RELEASE_GIL'] = 'true'
58
+
59
+ if 'PYKX_ENFORCE_EMBEDDED_IMPORT' not in os.environ:
60
+ os.environ['PYKX_ENFORCE_EMBEDDED_IMPORT'] = '0'
61
+
62
+ _setup_pykx_environment()
32
63
 
33
- _setup_environment()
34
64
  from .core.dataframe import DataFrame
35
65
  from .core.connection import connect, get_license_info, install_license
36
66
  from .core.display import py, np, pd, pa, pt, print
@@ -59,4 +89,4 @@ from .introspection.dtypes import dtypes
59
89
 
60
90
  from .indexing import loc, iloc
61
91
 
62
- __version__ = "1.0.0"
92
+ __version__ = "1.1.1"
@@ -0,0 +1,4 @@
1
+ from .apply import apply
2
+ from .apply_col import apply_col
3
+
4
+ __all__ = ['apply', 'apply_col']
@@ -0,0 +1,76 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ from ..utils import _ensure_q_table, _handle_return
4
+
5
+ _VECTORIZED_AXIS1 = {
6
+ 'sum': '{sum (0^) each value flip x}',
7
+ }
8
+
9
+
10
+ def apply(df, func, axis=0, return_type='q'):
11
+ """
12
+ Applies function to DataFrame along specified axis.
13
+
14
+ Parameters
15
+ ----------
16
+ df : pandas.DataFrame or pykx.Table
17
+ Input DataFrame.
18
+ func : callable or str
19
+ Function to apply.
20
+ axis : int, default 0
21
+ Axis along which to apply function (0=columns, 1=rows).
22
+ return_type : str, default 'q'
23
+ Desired return type ('p' or 'q').
24
+
25
+ Returns
26
+ -------
27
+ pandas.DataFrame or pandas.Series or pykx.Table or pykx.K
28
+ Result of applying function.
29
+ """
30
+ try:
31
+ q_table = _ensure_q_table(df)
32
+
33
+ if len(q_table) == 0:
34
+ if axis == 1:
35
+ return _handle_return(kx.toq([]), return_type)
36
+ if not isinstance(func, str):
37
+ pdf = df if isinstance(df, pd.DataFrame) else q_table.pd()
38
+ return pdf.apply(func, axis=axis)
39
+ elif func == "sum":
40
+ cols = kx.q("cols", q_table).py()
41
+ result = kx.toq({c: 0 for c in cols})
42
+ ret = _handle_return(result, return_type)
43
+ return pd.Series(ret) if return_type == 'p' else ret
44
+
45
+ if isinstance(func, str):
46
+ if axis == 1 and func in _VECTORIZED_AXIS1:
47
+ result = kx.q(_VECTORIZED_AXIS1[func], q_table)
48
+ elif axis == 1:
49
+ result = kx.q(f"{{({func}) each x}}", q_table)
50
+ else:
51
+ result = kx.q(f"{{({func}) each flip x}}", q_table)
52
+
53
+
54
+ else:
55
+ if axis == 1:
56
+ pdf = q_table.pd()
57
+ res_list = [func(row) for _, row in pdf.iterrows()]
58
+ result = kx.toq(res_list)
59
+
60
+ else:
61
+ cols = kx.q("cols", q_table).py()
62
+
63
+ res_dict = {}
64
+ for col in cols:
65
+ col_data = kx.q(f"{{x`{col}}}", q_table).pd()
66
+ res_dict[col] = func(col_data)
67
+
68
+ result = kx.toq(res_dict)
69
+
70
+ ret = _handle_return(result, return_type)
71
+ if return_type == 'p' and isinstance(ret, dict):
72
+ return pd.Series(ret)
73
+ return ret
74
+
75
+ except Exception as e:
76
+ raise RuntimeError(f"Failed to apply function: {e}")
@@ -0,0 +1,50 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ import numpy as np
4
+ from ..utils import _ensure_q_table, _handle_return
5
+
6
+
7
+ def apply_col(df, col, func, return_type='q'):
8
+ """
9
+ Applies function to a single column of DataFrame.
10
+
11
+ Parameters
12
+ ----------
13
+ df : pandas.DataFrame or pykx.Table
14
+ Input DataFrame.
15
+ col : str
16
+ Column name to apply function to.
17
+ func : callable or str
18
+ Function to apply to the column. If string, applied as q function string.
19
+ return_type : str, default 'q'
20
+ Desired return type ('p' or 'q').
21
+
22
+ Returns
23
+ -------
24
+ pandas.DataFrame or pykx.Table
25
+ DataFrame with function applied to specified column.
26
+ """
27
+ try:
28
+ q_table = _ensure_q_table(df)
29
+ if len(q_table) == 0:
30
+ return _handle_return(q_table, return_type)
31
+
32
+ if isinstance(func, str):
33
+ result = kx.q(f"{{update {col}:({func}) each {col} from x}}", q_table)
34
+ else:
35
+ col_data = kx.q(f"{{x`{col}}}", q_table).py()
36
+
37
+ if hasattr(col_data, 'apply'):
38
+ new_data = col_data.apply(func)
39
+ elif isinstance(col_data, list):
40
+ new_data = [func(x) for x in col_data]
41
+ else:
42
+ new_data = np.vectorize(func)(col_data)
43
+
44
+ q_new_data = kx.toq(new_data)
45
+ result = kx.q(f"{{update {col}:y from x}}", q_table, q_new_data)
46
+
47
+ return _handle_return(result, return_type)
48
+ except Exception as e:
49
+ raise RuntimeError(f"Failed to apply function to column {col}: {e}")
50
+
@@ -0,0 +1,6 @@
1
+ from .dropna import dropna
2
+ from .dropna_col import dropna_col
3
+ from .fillna import fillna
4
+ from .remove_duplicates import remove_duplicates
5
+
6
+ __all__ = ['dropna', 'dropna_col', 'fillna', 'remove_duplicates']
@@ -0,0 +1,26 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ from ..utils import _ensure_q_table, _handle_return
4
+
5
+ def dropna(df, return_type='q'):
6
+ """
7
+ Drops any row containing null values.
8
+
9
+ Parameters
10
+ ----------
11
+ df : pandas.DataFrame or pykx.Table
12
+ Input DataFrame.
13
+ return_type : str, default 'q'
14
+ Desired return type ('p' for pandas, 'q' for kdb+).
15
+
16
+ Returns
17
+ -------
18
+ pandas.DataFrame or pykx.Table
19
+ DataFrame with null rows removed.
20
+ """
21
+ try:
22
+ q_table = _ensure_q_table(df)
23
+ result = kx.q("{select from x where not any null each value flip x}", q_table)
24
+ return _handle_return(result, return_type)
25
+ except Exception as e:
26
+ raise RuntimeError(f"Failed to dropna: {e}")
@@ -0,0 +1,28 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ from ..utils import _ensure_q_table, _handle_return
4
+
5
+ def dropna_col(df, col, return_type='q'):
6
+ """
7
+ Drops rows where a specific column is null.
8
+
9
+ Parameters
10
+ ----------
11
+ df : pandas.DataFrame or pykx.Table
12
+ Input DataFrame.
13
+ col : str
14
+ Column name to check for nulls.
15
+ return_type : str, default 'q'
16
+ Desired return type ('p' for pandas, 'q' for kdb+).
17
+
18
+ Returns
19
+ -------
20
+ pandas.DataFrame or pykx.Table
21
+ DataFrame with filtered rows.
22
+ """
23
+ try:
24
+ q_table = _ensure_q_table(df)
25
+ result = kx.q("{[t; c] select from t where not null t c}", q_table, kx.SymbolAtom(col))
26
+ return _handle_return(result, return_type)
27
+ except Exception as e:
28
+ raise RuntimeError(f"Failed to dropna from column {col}: {e}")
@@ -0,0 +1,49 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ from ..utils import _ensure_q_table, _handle_return
4
+
5
+ def fillna(df, col_or_values, fill_value=None, return_type='q'):
6
+ """
7
+ Fills null values in specified columns.
8
+
9
+ Can be called in two ways:
10
+ fillna(df, values_dict, return_type='q')
11
+ fillna(df, col_name, fill_value, return_type='q')
12
+
13
+ Parameters
14
+ ----------
15
+ df : pandas.DataFrame or pykx.Table
16
+ Input DataFrame.
17
+ col_or_values : str or dict
18
+ If str, the column name to fill (requires fill_value).
19
+ If dict, a mapping of column names to fill values.
20
+ fill_value : scalar, optional
21
+ The value to fill nulls with when col_or_values is a column name.
22
+ return_type : str, default 'q'
23
+ Desired return type ('p' or 'q').
24
+
25
+ Returns
26
+ -------
27
+ pandas.DataFrame or pykx.Table
28
+ DataFrame with nulls filled.
29
+ """
30
+ try:
31
+ if isinstance(col_or_values, str):
32
+ if fill_value is None:
33
+ raise ValueError("fill_value is required when col_or_values is a column name")
34
+ values = {col_or_values: fill_value}
35
+ elif isinstance(col_or_values, dict):
36
+ values = col_or_values
37
+ else:
38
+ raise ValueError("col_or_values must be a column name (str) or a dictionary")
39
+
40
+ q_table = _ensure_q_table(df)
41
+ result = q_table
42
+
43
+ for col, val in values.items():
44
+ fill_val = f'`{val}' if isinstance(val, str) else str(val)
45
+ result = kx.q(f"{{update {col}:{fill_val}^{col} from x}}", result)
46
+
47
+ return _handle_return(result, return_type)
48
+ except Exception as e:
49
+ raise RuntimeError(f"Failed to fillna: {e}")
@@ -0,0 +1,28 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ from ..utils import _ensure_q_table, _handle_return
4
+
5
+
6
+ def remove_duplicates(df, return_type='q'):
7
+ """
8
+ Removes duplicate rows from the DataFrame, keeping the first occurrence.
9
+
10
+ Parameters
11
+ ----------
12
+ df : pandas.DataFrame or pykx.Table
13
+ Input DataFrame.
14
+ return_type : str, default 'q'
15
+ Desired return type ('p' or 'q').
16
+
17
+ Returns
18
+ -------
19
+ pandas.DataFrame or pykx.Table
20
+ DataFrame with duplicate rows removed.
21
+ """
22
+ try:
23
+ q_table = _ensure_q_table(df)
24
+ result = kx.q("{distinct x}", q_table)
25
+ return _handle_return(result, return_type)
26
+ except Exception as e:
27
+ raise RuntimeError(f"Failed to remove duplicates from table: {e}")
28
+
@@ -0,0 +1,9 @@
1
+ """
2
+ Core functionality for qutePandas - DataFrame creation and basic operations.
3
+ """
4
+
5
+ from .dataframe import DataFrame
6
+ from .display import py, np, pd, pa, pt, print
7
+ from .connection import connect, get_license_info
8
+
9
+ __all__ = ['DataFrame', 'py', 'np', 'pd', 'pa', 'pt', 'print', 'connect', 'get_license_info']
@@ -0,0 +1,153 @@
1
+ """
2
+ Connection and license management for qutePandas.
3
+ Enforces strict license validation with fail-fast behavior.
4
+ """
5
+
6
+ import os
7
+ import pykx as kx
8
+ import base64
9
+ import shutil
10
+
11
+
12
+ def _get_project_lic_dir():
13
+ """
14
+ Get the project-local license directory.
15
+ """
16
+ root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
17
+ return os.path.join(root, "kdb_lic")
18
+
19
+
20
+ def install_license(content, is_base64=True):
21
+ """
22
+ Installs a kdb+ license from base64 content or file path.
23
+
24
+ Parameters
25
+ ----------
26
+ content : str
27
+ License content (base64) or file path.
28
+ is_base64 : bool, default True
29
+ Whether content is base64 encoded.
30
+
31
+ Returns
32
+ -------
33
+ bool
34
+ True if license installation was successful.
35
+ """
36
+ target_dir = _get_project_lic_dir()
37
+ if not os.path.exists(target_dir):
38
+ target_dir = os.path.expanduser("~/.qutepandas")
39
+
40
+ os.makedirs(target_dir, exist_ok=True)
41
+ target_path = os.path.join(target_dir, "kc.lic")
42
+
43
+ content = content.strip().strip('"').strip("'")
44
+
45
+ if is_base64:
46
+ padding = len(content) % 4
47
+ if padding:
48
+ content += "=" * (4 - padding)
49
+ content = content.replace('-', '+').replace('_', '/')
50
+
51
+ try:
52
+ data = base64.b64decode(content)
53
+ except Exception as e:
54
+ raise RuntimeError(f"Invalid base64 license content: {e}")
55
+
56
+ try:
57
+ text_data = data.decode('utf-8')
58
+ if text_data.startswith('0001'):
59
+ os.environ['KX_TOKEN'] = text_data
60
+ os.environ['QTOK'] = text_data
61
+ if os.path.exists(target_path):
62
+ os.remove(target_path)
63
+ return True
64
+ except:
65
+ pass
66
+
67
+ with open(target_path, "wb") as f:
68
+ f.write(data)
69
+ else:
70
+ if not os.path.exists(content):
71
+ raise RuntimeError(f"License file not found: {content}")
72
+ shutil.copy(content, target_path)
73
+
74
+ os.environ['QLIC'] = target_dir
75
+ return True
76
+
77
+
78
+ def connect(license_path=None):
79
+ """
80
+ Establishes connection to kdb+.
81
+
82
+ Parameters
83
+ ----------
84
+ license_path : str, optional
85
+ Path to the license file or directory.
86
+ """
87
+ if license_path:
88
+ if not os.path.exists(license_path):
89
+ raise RuntimeError(f"License path does not exist: {license_path}")
90
+ os.environ['QLIC'] = license_path
91
+
92
+ try:
93
+ kx.q('1+1')
94
+ return True
95
+ except:
96
+ pass
97
+
98
+ token = (os.environ.get('KDB_TOKEN') or os.environ.get('KX_TOKEN', '')).strip()
99
+ if token:
100
+ install_license(token)
101
+ try:
102
+ kx.q('1+1')
103
+ return True
104
+ except:
105
+ pass
106
+
107
+ possible_paths = [
108
+ _get_project_lic_dir(),
109
+ os.path.expanduser("~/.qutepandas"),
110
+ '/Applications/kdb+:q',
111
+ os.path.expanduser("~/kdb+"),
112
+ os.path.expanduser("~/q"),
113
+ os.path.expanduser("~/.pykx")
114
+ ]
115
+
116
+ for path in possible_paths:
117
+ if not os.path.exists(path):
118
+ continue
119
+
120
+ if os.path.exists(os.path.join(path, "kc.lic")) or os.path.exists(os.path.join(path, "k4.lic")):
121
+ os.environ['QLIC'] = path
122
+ try:
123
+ kx.q('1+1')
124
+ return True
125
+ except:
126
+ continue
127
+
128
+ raise RuntimeError(
129
+ "No valid kdb+ license found. "
130
+ "Set KDB_TOKEN in .env or place kc.lic in local kdb_lic/ or ~/.qutepandas/"
131
+ )
132
+
133
+
134
+ def get_license_info():
135
+ """
136
+ Returns current license configuration status.
137
+ """
138
+ try:
139
+ kx.q('1+1')
140
+ return {
141
+ 'qlic_path': os.environ.get('QLIC', 'Not set'),
142
+ 'qhome_path': os.environ.get('QHOME', 'Not set'),
143
+ 'kx_token_set': 'Yes' if os.environ.get('KX_TOKEN') else 'No',
144
+ 'connection_status': 'Connected'
145
+ }
146
+ except Exception as e:
147
+ return {
148
+ 'qlic_path': os.environ.get('QLIC', 'Not set'),
149
+ 'qhome_path': os.environ.get('QHOME', 'Not set'),
150
+ 'kx_token_set': 'Yes' if os.environ.get('KX_TOKEN') else 'No',
151
+ 'connection_status': 'Failed',
152
+ 'error': str(e)
153
+ }
@@ -0,0 +1,118 @@
1
+ import pykx as kx
2
+ import pandas as pd
3
+ import atexit
4
+ from ..utils import _handle_return
5
+
6
+ def DataFrame(data, columns=None):
7
+ """
8
+ Creates a qutePandas DataFrame (internal pykx Table).
9
+
10
+ Parameters
11
+ ----------
12
+ data : array-like, dict, or pandas.DataFrame
13
+ Data to be stored in the table.
14
+ columns : list, optional
15
+ Column names to use if data does not already have them.
16
+
17
+ Returns
18
+ -------
19
+ pykx.Table
20
+ The resulting kdb+ table.
21
+ """
22
+ try:
23
+ if isinstance(data, pd.DataFrame):
24
+ q_res = kx.toq(data)
25
+ elif isinstance(data, (kx.Table, kx.KeyedTable)):
26
+ q_res = data
27
+ elif isinstance(data, dict):
28
+ q_res = _dict_to_table(data)
29
+ elif isinstance(data, list) and data and isinstance(data[0], list):
30
+ q_res = _lists_to_table(data, columns)
31
+ else:
32
+ q_res = _data_to_table(data, columns)
33
+ return _handle_return(q_res)
34
+ except Exception as e:
35
+ raise RuntimeError(f"Failed to create kdb+ table: {e}")
36
+
37
+
38
+ def _dict_to_table(data_dict):
39
+ """
40
+ Converts a dictionary to a kdb+ table.
41
+
42
+ Parameters
43
+ ----------
44
+ data_dict : dict
45
+ Dictionary to convert.
46
+
47
+ Returns
48
+ -------
49
+ pykx.Table
50
+ The resulting kdb+ table.
51
+ """
52
+ try:
53
+ q_dict = kx.toq(data_dict)
54
+ return kx.q("{flip x}", q_dict)
55
+ except Exception as e:
56
+ raise RuntimeError(f"Failed to create kdb+ table from dict: {e}")
57
+
58
+
59
+ def _lists_to_table(data_lists, columns=None):
60
+ """
61
+ Converts a list of lists to a kdb+ table.
62
+
63
+ Parameters
64
+ ----------
65
+ data_lists : list of lists
66
+ Data to convert.
67
+ columns : list, optional
68
+ Column names.
69
+
70
+ Returns
71
+ -------
72
+ pykx.Table
73
+ The resulting kdb+ table.
74
+ """
75
+ try:
76
+ if not data_lists:
77
+ return kx.q("([] )")
78
+
79
+ if columns is None:
80
+ columns = [f'col_{i}' for i in range(len(data_lists[0]))]
81
+
82
+ transposed = list(zip(*data_lists))
83
+ data_dict = {columns[i]: list(col_data) for i, col_data in enumerate(transposed)}
84
+ q_dict = kx.toq(data_dict)
85
+ return kx.q("{flip x}", q_dict)
86
+ except Exception as e:
87
+ raise RuntimeError(f"Failed to create kdb+ table from lists: {e}")
88
+
89
+
90
+ def _data_to_table(data, columns=None):
91
+ """
92
+ Converts generic data to a kdb+ table.
93
+
94
+ Parameters
95
+ ----------
96
+ data : any
97
+ Data to convert.
98
+ columns : list, optional
99
+ Column names.
100
+
101
+ Returns
102
+ -------
103
+ pykx.Table
104
+ The resulting kdb+ table.
105
+ """
106
+ try:
107
+ if hasattr(data, '__iter__') and not isinstance(data, (str, dict)):
108
+ data_list = list(data)
109
+ if columns:
110
+ data_dict = {col: [row[i] if hasattr(row, '__getitem__') else row for row in data_list]
111
+ for i, col in enumerate(columns)}
112
+ return _dict_to_table(data_dict)
113
+ else:
114
+ return kx.toq(pd.DataFrame({'data': data_list}))
115
+ else:
116
+ return kx.toq(pd.DataFrame({'data': [data]}))
117
+ except Exception as e:
118
+ raise RuntimeError(f"Failed to create kdb+ table from data: {e}")