meerschaum 2.7.7__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/term/TermPageHandler.py +54 -4
- meerschaum/_internal/term/__init__.py +13 -5
- meerschaum/_internal/term/tools.py +41 -6
- meerschaum/actions/copy.py +1 -0
- meerschaum/actions/start.py +25 -10
- meerschaum/api/dash/callbacks/dashboard.py +43 -2
- meerschaum/api/dash/components.py +13 -6
- meerschaum/api/dash/keys.py +82 -108
- meerschaum/api/dash/pages/dashboard.py +17 -17
- meerschaum/api/dash/sessions.py +1 -0
- meerschaum/api/dash/webterm.py +17 -6
- meerschaum/api/resources/static/js/terminado.js +0 -2
- meerschaum/api/resources/templates/termpage.html +47 -4
- meerschaum/api/routes/_webterm.py +15 -11
- meerschaum/config/_default.py +6 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +2 -2
- meerschaum/connectors/sql/_SQLConnector.py +2 -9
- meerschaum/connectors/sql/_fetch.py +5 -30
- meerschaum/connectors/sql/_pipes.py +7 -4
- meerschaum/connectors/sql/_sql.py +56 -31
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
- meerschaum/core/Pipe/_fetch.py +4 -0
- meerschaum/core/Pipe/_sync.py +22 -15
- meerschaum/core/Pipe/_verify.py +1 -1
- meerschaum/utils/daemon/Daemon.py +24 -11
- meerschaum/utils/daemon/RotatingFile.py +3 -3
- meerschaum/utils/dataframe.py +42 -12
- meerschaum/utils/dtypes/__init__.py +153 -24
- meerschaum/utils/dtypes/sql.py +58 -9
- meerschaum/utils/formatting/__init__.py +2 -2
- meerschaum/utils/formatting/_pprint.py +13 -12
- meerschaum/utils/misc.py +32 -18
- meerschaum/utils/prompt.py +1 -1
- meerschaum/utils/sql.py +26 -8
- meerschaum/utils/venv/__init__.py +10 -14
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/METADATA +1 -1
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/RECORD +44 -44
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/zip-safe +0 -0
@@ -195,7 +195,8 @@ class SQLConnector(Connector):
|
|
195
195
|
self._debug = debug
|
196
196
|
### Store the PID and thread at initialization
|
197
197
|
### so we can dispose of the Pool in child processes or threads.
|
198
|
-
import os
|
198
|
+
import os
|
199
|
+
import threading
|
199
200
|
self._pid = os.getpid()
|
200
201
|
self._thread_ident = threading.current_thread().ident
|
201
202
|
self._sessions = {}
|
@@ -286,7 +287,6 @@ class SQLConnector(Connector):
|
|
286
287
|
return ':memory:' not in self.URI
|
287
288
|
return True
|
288
289
|
|
289
|
-
|
290
290
|
@property
|
291
291
|
def metadata(self):
|
292
292
|
"""
|
@@ -298,7 +298,6 @@ class SQLConnector(Connector):
|
|
298
298
|
self._metadata = sqlalchemy.MetaData(schema=self.schema)
|
299
299
|
return self._metadata
|
300
300
|
|
301
|
-
|
302
301
|
@property
|
303
302
|
def instance_schema(self):
|
304
303
|
"""
|
@@ -306,14 +305,12 @@ class SQLConnector(Connector):
|
|
306
305
|
"""
|
307
306
|
return self.schema
|
308
307
|
|
309
|
-
|
310
308
|
@property
|
311
309
|
def internal_schema(self):
|
312
310
|
"""
|
313
311
|
Return the schema name for internal tables.
|
314
312
|
"""
|
315
313
|
from meerschaum.config.static import STATIC_CONFIG
|
316
|
-
from meerschaum.utils.packages import attempt_import
|
317
314
|
from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
|
318
315
|
schema_name = self.__dict__.get('internal_schema', None) or (
|
319
316
|
STATIC_CONFIG['sql']['internal_schema']
|
@@ -325,7 +322,6 @@ class SQLConnector(Connector):
|
|
325
322
|
self._internal_schema = schema_name
|
326
323
|
return self._internal_schema
|
327
324
|
|
328
|
-
|
329
325
|
@property
|
330
326
|
def db(self) -> Optional[databases.Database]:
|
331
327
|
from meerschaum.utils.packages import attempt_import
|
@@ -342,7 +338,6 @@ class SQLConnector(Connector):
|
|
342
338
|
self._db = None
|
343
339
|
return self._db
|
344
340
|
|
345
|
-
|
346
341
|
@property
|
347
342
|
def db_version(self) -> Union[str, None]:
|
348
343
|
"""
|
@@ -356,7 +351,6 @@ class SQLConnector(Connector):
|
|
356
351
|
self._db_version = get_db_version(self)
|
357
352
|
return self._db_version
|
358
353
|
|
359
|
-
|
360
354
|
@property
|
361
355
|
def schema(self) -> Union[str, None]:
|
362
356
|
"""
|
@@ -376,7 +370,6 @@ class SQLConnector(Connector):
|
|
376
370
|
self.__dict__['schema'] = _schema
|
377
371
|
return _schema
|
378
372
|
|
379
|
-
|
380
373
|
def __getstate__(self):
|
381
374
|
return self.__dict__
|
382
375
|
|
@@ -11,7 +11,7 @@ from __future__ import annotations
|
|
11
11
|
from datetime import datetime, timedelta
|
12
12
|
|
13
13
|
import meerschaum as mrsm
|
14
|
-
from meerschaum.utils.typing import Optional, Union,
|
14
|
+
from meerschaum.utils.typing import Optional, Union, Any, List, Dict
|
15
15
|
|
16
16
|
|
17
17
|
def fetch(
|
@@ -20,7 +20,6 @@ def fetch(
|
|
20
20
|
begin: Union[datetime, int, str, None] = '',
|
21
21
|
end: Union[datetime, int, str, None] = None,
|
22
22
|
check_existing: bool = True,
|
23
|
-
chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None,
|
24
23
|
chunksize: Optional[int] = -1,
|
25
24
|
workers: Optional[int] = None,
|
26
25
|
debug: bool = False,
|
@@ -53,15 +52,12 @@ def fetch(
|
|
53
52
|
check_existing: bool, defult True
|
54
53
|
If `False`, use a backtrack interval of 0 minutes.
|
55
54
|
|
56
|
-
chunk_hook: Callable[[pd.DataFrame], Any], default None
|
57
|
-
A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame.
|
58
|
-
|
59
55
|
chunksize: Optional[int], default -1
|
60
|
-
How many rows to load into memory at once
|
56
|
+
How many rows to load into memory at once.
|
61
57
|
Otherwise the entire result set is loaded into memory.
|
62
58
|
|
63
59
|
workers: Optional[int], default None
|
64
|
-
How many threads to use when consuming the generator
|
60
|
+
How many threads to use when consuming the generator.
|
65
61
|
Defaults to the number of cores.
|
66
62
|
|
67
63
|
debug: bool, default False
|
@@ -69,8 +65,7 @@ def fetch(
|
|
69
65
|
|
70
66
|
Returns
|
71
67
|
-------
|
72
|
-
A pandas DataFrame
|
73
|
-
If `chunk_hook` is not None, return a list of the hook function's results.
|
68
|
+
A pandas DataFrame generator.
|
74
69
|
"""
|
75
70
|
meta_def = self.get_pipe_metadef(
|
76
71
|
pipe,
|
@@ -80,33 +75,13 @@ def fetch(
|
|
80
75
|
debug=debug,
|
81
76
|
**kw
|
82
77
|
)
|
83
|
-
as_hook_results = chunk_hook is not None
|
84
78
|
chunks = self.read(
|
85
79
|
meta_def,
|
86
|
-
chunk_hook=chunk_hook,
|
87
|
-
as_hook_results=as_hook_results,
|
88
80
|
chunksize=chunksize,
|
89
81
|
workers=workers,
|
82
|
+
as_iterator=True,
|
90
83
|
debug=debug,
|
91
84
|
)
|
92
|
-
### if sqlite, parse for datetimes
|
93
|
-
if not as_hook_results and self.flavor == 'sqlite':
|
94
|
-
from meerschaum.utils.dataframe import parse_df_datetimes
|
95
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
96
|
-
ignore_cols = [
|
97
|
-
col
|
98
|
-
for col, dtype in pipe.dtypes.items()
|
99
|
-
if not are_dtypes_equal(str(dtype), 'datetime')
|
100
|
-
]
|
101
|
-
return (
|
102
|
-
parse_df_datetimes(
|
103
|
-
chunk,
|
104
|
-
ignore_cols=ignore_cols,
|
105
|
-
strip_timezone=(pipe.tzinfo is None),
|
106
|
-
debug=debug,
|
107
|
-
)
|
108
|
-
for chunk in chunks
|
109
|
-
)
|
110
85
|
return chunks
|
111
86
|
|
112
87
|
|
@@ -1125,7 +1125,7 @@ def get_pipe_data(
|
|
1125
1125
|
numeric_columns = [
|
1126
1126
|
col
|
1127
1127
|
for col, typ in pipe.dtypes.items()
|
1128
|
-
if typ
|
1128
|
+
if typ.startswith('numeric') and col in dtypes
|
1129
1129
|
]
|
1130
1130
|
uuid_columns = [
|
1131
1131
|
col
|
@@ -1887,7 +1887,10 @@ def sync_pipe(
|
|
1887
1887
|
warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
|
1888
1888
|
|
1889
1889
|
if update_df is not None and len(update_df) > 0:
|
1890
|
-
temp_target = self.get_temporary_target(
|
1890
|
+
temp_target = self.get_temporary_target(
|
1891
|
+
pipe.target,
|
1892
|
+
label=('update' if not upsert else 'upsert'),
|
1893
|
+
)
|
1891
1894
|
self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
|
1892
1895
|
temp_pipe = Pipe(
|
1893
1896
|
pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
|
@@ -3274,7 +3277,7 @@ def get_alter_columns_queries(
|
|
3274
3277
|
else [
|
3275
3278
|
col
|
3276
3279
|
for col, typ in df.items()
|
3277
|
-
if typ
|
3280
|
+
if typ.startswith('numeric')
|
3278
3281
|
]
|
3279
3282
|
)
|
3280
3283
|
df_cols_types = (
|
@@ -3354,7 +3357,7 @@ def get_alter_columns_queries(
|
|
3354
3357
|
+ f"{edit_msg}"
|
3355
3358
|
)
|
3356
3359
|
else:
|
3357
|
-
numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ
|
3360
|
+
numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
|
3358
3361
|
|
3359
3362
|
numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
|
3360
3363
|
text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
|
@@ -126,7 +126,7 @@ def read(
|
|
126
126
|
return []
|
127
127
|
from meerschaum.utils.sql import sql_item_name, truncate_item_name
|
128
128
|
from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
|
129
|
-
from meerschaum.utils.dtypes.sql import
|
129
|
+
from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS
|
130
130
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
131
131
|
from meerschaum.utils.pool import get_pool
|
132
132
|
from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
|
@@ -802,16 +802,17 @@ def to_sql(
|
|
802
802
|
)
|
803
803
|
from meerschaum.utils.dtypes import (
|
804
804
|
are_dtypes_equal,
|
805
|
-
quantize_decimal,
|
806
805
|
coerce_timezone,
|
807
806
|
encode_bytes_for_bytea,
|
808
807
|
serialize_bytes,
|
808
|
+
serialize_decimal,
|
809
|
+
json_serialize_value,
|
809
810
|
)
|
810
811
|
from meerschaum.utils.dtypes.sql import (
|
811
|
-
NUMERIC_PRECISION_FLAVORS,
|
812
|
-
NUMERIC_AS_TEXT_FLAVORS,
|
813
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
814
813
|
get_db_type_from_pd_type,
|
814
|
+
get_pd_type_from_db_type,
|
815
|
+
get_numeric_precision_scale,
|
815
816
|
)
|
816
817
|
from meerschaum.utils.misc import interval_str
|
817
818
|
from meerschaum.connectors.sql._create_engine import flavor_configs
|
@@ -822,6 +823,16 @@ def to_sql(
|
|
822
823
|
|
823
824
|
bytes_cols = get_bytes_cols(df)
|
824
825
|
numeric_cols = get_numeric_cols(df)
|
826
|
+
numeric_cols_dtypes = {
|
827
|
+
col: typ
|
828
|
+
for col, typ in kw.get('dtype', {}).items()
|
829
|
+
if (
|
830
|
+
col in df.columns
|
831
|
+
and 'numeric' in str(typ).lower()
|
832
|
+
)
|
833
|
+
|
834
|
+
}
|
835
|
+
numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
|
825
836
|
|
826
837
|
enable_bulk_insert = mrsm.get_config(
|
827
838
|
'system', 'connectors', 'sql', 'bulk_insert'
|
@@ -854,12 +865,24 @@ def to_sql(
|
|
854
865
|
for col in bytes_cols:
|
855
866
|
df[col] = df[col].apply(bytes_serializer)
|
856
867
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
868
|
+
### Check for numeric columns.
|
869
|
+
for col in numeric_cols:
|
870
|
+
typ = numeric_cols_dtypes.get(col, None)
|
871
|
+
|
872
|
+
precision, scale = (
|
873
|
+
(typ.precision, typ.scale)
|
874
|
+
if hasattr(typ, 'precision')
|
875
|
+
else get_numeric_precision_scale(self.flavor)
|
876
|
+
)
|
877
|
+
|
878
|
+
df[col] = df[col].apply(
|
879
|
+
functools.partial(
|
880
|
+
serialize_decimal,
|
881
|
+
quantize=True,
|
882
|
+
precision=precision,
|
883
|
+
scale=scale,
|
884
|
+
)
|
885
|
+
)
|
863
886
|
|
864
887
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
865
888
|
|
@@ -889,7 +912,7 @@ def to_sql(
|
|
889
912
|
if name != truncated_name:
|
890
913
|
warn(
|
891
914
|
f"Table '{name}' is too long for '{self.flavor}',"
|
892
|
-
|
915
|
+
f" will instead create the table '{truncated_name}'."
|
893
916
|
)
|
894
917
|
|
895
918
|
### filter out non-pandas args
|
@@ -957,24 +980,11 @@ def to_sql(
|
|
957
980
|
### Check for JSON columns.
|
958
981
|
if self.flavor not in json_flavors:
|
959
982
|
json_cols = get_json_cols(df)
|
960
|
-
|
961
|
-
for col in json_cols:
|
962
|
-
df[col] = df[col].apply(
|
963
|
-
(
|
964
|
-
lambda x: json.dumps(x, default=str, sort_keys=True)
|
965
|
-
if not isinstance(x, Hashable)
|
966
|
-
else x
|
967
|
-
)
|
968
|
-
)
|
969
|
-
|
970
|
-
### Check for numeric columns.
|
971
|
-
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
972
|
-
if numeric_precision is not None and numeric_scale is not None:
|
973
|
-
for col in numeric_cols:
|
983
|
+
for col in json_cols:
|
974
984
|
df[col] = df[col].apply(
|
975
|
-
|
976
|
-
|
977
|
-
if isinstance(x,
|
985
|
+
(
|
986
|
+
lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True)
|
987
|
+
if not isinstance(x, Hashable)
|
978
988
|
else x
|
979
989
|
)
|
980
990
|
)
|
@@ -1051,16 +1061,20 @@ def psql_insert_copy(
|
|
1051
1061
|
|
1052
1062
|
from meerschaum.utils.sql import sql_item_name
|
1053
1063
|
from meerschaum.utils.warnings import dprint
|
1064
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
1054
1065
|
|
1055
1066
|
### NOTE: PostgreSQL doesn't support NUL chars in text, so they're removed from strings.
|
1056
1067
|
data_iter = (
|
1057
1068
|
(
|
1058
1069
|
(
|
1059
1070
|
(
|
1060
|
-
json.dumps(
|
1071
|
+
json.dumps(
|
1072
|
+
item,
|
1073
|
+
default=json_serialize_value,
|
1074
|
+
).replace('\0', '').replace('\\u0000', '')
|
1061
1075
|
if isinstance(item, (dict, list))
|
1062
1076
|
else (
|
1063
|
-
item
|
1077
|
+
json_serialize_value(item, default_to_str=False)
|
1064
1078
|
if not isinstance(item, str)
|
1065
1079
|
else item.replace('\0', '').replace('\\u0000', '')
|
1066
1080
|
)
|
@@ -1119,6 +1133,7 @@ def mssql_insert_json(
|
|
1119
1133
|
"""
|
1120
1134
|
import json
|
1121
1135
|
from meerschaum.utils.sql import sql_item_name
|
1136
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
1122
1137
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
|
1123
1138
|
from meerschaum.utils.warnings import dprint
|
1124
1139
|
table_name = sql_item_name(table.name, 'mssql', table.schema)
|
@@ -1127,6 +1142,15 @@ def mssql_insert_json(
|
|
1127
1142
|
str(column.name): get_pd_type_from_db_type(str(column.type))
|
1128
1143
|
for column in table.table.columns
|
1129
1144
|
}
|
1145
|
+
numeric_cols_types = {
|
1146
|
+
col: table.table.columns[col].type
|
1147
|
+
for col, typ in pd_types.items()
|
1148
|
+
if typ.startswith('numeric') and col in keys
|
1149
|
+
}
|
1150
|
+
pd_types.update({
|
1151
|
+
col: f'numeric[{typ.precision},{typ.scale}]'
|
1152
|
+
for col, typ in numeric_cols_types.items()
|
1153
|
+
})
|
1130
1154
|
cols_types = {
|
1131
1155
|
col: get_db_type_from_pd_type(typ, 'mssql')
|
1132
1156
|
for col, typ in pd_types.items()
|
@@ -1151,7 +1175,8 @@ def mssql_insert_json(
|
|
1151
1175
|
if debug:
|
1152
1176
|
dprint(sql)
|
1153
1177
|
|
1154
|
-
|
1178
|
+
serialized_data = json.dumps(json_data, default=json_serialize_value)
|
1179
|
+
conn.exec_driver_sql(sql, (serialized_data,))
|
1155
1180
|
|
1156
1181
|
|
1157
1182
|
def format_sql_query_for_dask(query: str) -> 'sqlalchemy.sql.selectable.Select':
|
@@ -239,7 +239,7 @@ class ValkeyConnector(Connector):
|
|
239
239
|
-------
|
240
240
|
The current index counter value (how many docs have been pushed).
|
241
241
|
"""
|
242
|
-
from meerschaum.utils.
|
242
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
243
243
|
table_name = self.quote_table(table)
|
244
244
|
datetime_column_key = self.get_datetime_column_key(table)
|
245
245
|
remote_datetime_column = self.get(datetime_column_key)
|
@@ -269,7 +269,7 @@ class ValkeyConnector(Connector):
|
|
269
269
|
) if datetime_column else None
|
270
270
|
doc_str = json.dumps(
|
271
271
|
doc,
|
272
|
-
default=
|
272
|
+
default=json_serialize_value,
|
273
273
|
separators=(',', ':'),
|
274
274
|
sort_keys=True,
|
275
275
|
)
|
meerschaum/core/Pipe/_fetch.py
CHANGED
@@ -84,6 +84,7 @@ def fetch(
|
|
84
84
|
begin=_determine_begin(
|
85
85
|
self,
|
86
86
|
begin,
|
87
|
+
end,
|
87
88
|
check_existing=check_existing,
|
88
89
|
debug=debug,
|
89
90
|
),
|
@@ -136,6 +137,7 @@ def get_backtrack_interval(
|
|
136
137
|
def _determine_begin(
|
137
138
|
pipe: mrsm.Pipe,
|
138
139
|
begin: Union[datetime, int, str, None] = '',
|
140
|
+
end: Union[datetime, int, None] = None,
|
139
141
|
check_existing: bool = True,
|
140
142
|
debug: bool = False,
|
141
143
|
) -> Union[datetime, int, None]:
|
@@ -157,6 +159,8 @@ def _determine_begin(
|
|
157
159
|
"""
|
158
160
|
if begin != '':
|
159
161
|
return begin
|
162
|
+
if end is not None:
|
163
|
+
return None
|
160
164
|
sync_time = pipe.get_sync_time(debug=debug)
|
161
165
|
if sync_time is None:
|
162
166
|
return sync_time
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -292,7 +292,6 @@ def sync(
|
|
292
292
|
message = '\n'.join([_message for _, _message in df])
|
293
293
|
return success, message
|
294
294
|
|
295
|
-
### TODO: Depreciate async?
|
296
295
|
if df is True:
|
297
296
|
p._exists = None
|
298
297
|
return True, f"{p} is being synced in parallel."
|
@@ -328,30 +327,37 @@ def sync(
|
|
328
327
|
_chunk_success, _chunk_msg = False, str(e)
|
329
328
|
if not _chunk_success:
|
330
329
|
failed_chunks.append(_chunk)
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
+ self._get_chunk_label(_chunk, dt_col)
|
336
|
-
+ '\n'
|
337
|
-
+ _chunk_msg
|
338
|
-
)
|
330
|
+
_chunk_msg = (
|
331
|
+
self._get_chunk_label(_chunk, dt_col)
|
332
|
+
+ '\n'
|
333
|
+
+ _chunk_msg
|
339
334
|
)
|
340
335
|
|
336
|
+
mrsm.pprint((_chunk_success, _chunk_msg), calm=True)
|
337
|
+
return _chunk_success, _chunk_msg
|
338
|
+
|
341
339
|
results = sorted(
|
342
340
|
[(chunk_success, chunk_msg)] + (
|
343
341
|
list(pool.imap(_process_chunk, df))
|
344
|
-
if
|
345
|
-
|
342
|
+
if (
|
343
|
+
not df_is_chunk_generator(chunk) # Handle nested generators.
|
344
|
+
and kw.get('workers', 1) != 1
|
345
|
+
)
|
346
|
+
else list(
|
346
347
|
_process_chunk(_child_chunks)
|
347
348
|
for _child_chunks in df
|
348
|
-
|
349
|
+
)
|
349
350
|
)
|
350
351
|
)
|
351
352
|
chunk_messages = [chunk_msg for _, chunk_msg in results]
|
352
353
|
success_bools = [chunk_success for chunk_success, _ in results]
|
353
354
|
success = all(success_bools)
|
354
|
-
msg =
|
355
|
+
msg = (
|
356
|
+
f'Synced {len(chunk_messages)} chunk'
|
357
|
+
+ ('s' if len(chunk_messages) != 1 else '')
|
358
|
+
+ f' to {p}:\n\n'
|
359
|
+
+ '\n\n'.join(chunk_messages).lstrip().rstrip()
|
360
|
+
).lstrip().rstrip()
|
355
361
|
|
356
362
|
### If some chunks succeeded, retry the failures.
|
357
363
|
retry_success = True
|
@@ -432,7 +438,7 @@ def sync(
|
|
432
438
|
|
433
439
|
if blocking:
|
434
440
|
self._exists = None
|
435
|
-
return _sync(self, df
|
441
|
+
return _sync(self, df=df)
|
436
442
|
|
437
443
|
from meerschaum.utils.threading import Thread
|
438
444
|
def default_callback(result_tuple: SuccessTuple):
|
@@ -821,6 +827,7 @@ def filter_existing(
|
|
821
827
|
for col, typ in self_dtypes.items()
|
822
828
|
},
|
823
829
|
safe_copy=safe_copy,
|
830
|
+
coerce_mixed_numerics=(not self.static),
|
824
831
|
debug=debug
|
825
832
|
),
|
826
833
|
on_cols_dtypes,
|
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
962
969
|
"""
|
963
970
|
from meerschaum.utils.dataframe import get_numeric_cols
|
964
971
|
numeric_cols = get_numeric_cols(df)
|
965
|
-
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ
|
972
|
+
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
|
966
973
|
new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
|
967
974
|
if not new_numeric_cols:
|
968
975
|
return True, "Success"
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -330,12 +330,12 @@ class Daemon:
|
|
330
330
|
result = self.target(*self.target_args, **self.target_kw)
|
331
331
|
self.properties['result'] = result
|
332
332
|
except (BrokenPipeError, KeyboardInterrupt, SystemExit):
|
333
|
-
|
333
|
+
result = False, traceback.format_exc()
|
334
334
|
except Exception as e:
|
335
335
|
warn(
|
336
336
|
f"Exception in daemon target function: {traceback.format_exc()}",
|
337
337
|
)
|
338
|
-
result = e
|
338
|
+
result = False, str(e)
|
339
339
|
finally:
|
340
340
|
_results[self.daemon_id] = result
|
341
341
|
|
@@ -345,8 +345,11 @@ class Daemon:
|
|
345
345
|
self.cleanup()
|
346
346
|
|
347
347
|
self._log_refresh_timer.cancel()
|
348
|
-
|
349
|
-
self.pid_path.
|
348
|
+
try:
|
349
|
+
if self.pid is None and self.pid_path.exists():
|
350
|
+
self.pid_path.unlink()
|
351
|
+
except Exception:
|
352
|
+
pass
|
350
353
|
|
351
354
|
if is_success_tuple(result):
|
352
355
|
try:
|
@@ -774,9 +777,16 @@ class Daemon:
|
|
774
777
|
if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
|
775
778
|
try:
|
776
779
|
self._process = psutil.Process(int(pid))
|
780
|
+
process_exists = True
|
777
781
|
except Exception:
|
778
|
-
|
779
|
-
|
782
|
+
process_exists = False
|
783
|
+
if not process_exists:
|
784
|
+
_ = self.__dict__.pop('_process', None)
|
785
|
+
try:
|
786
|
+
if self.pid_path.exists():
|
787
|
+
self.pid_path.unlink()
|
788
|
+
except Exception:
|
789
|
+
pass
|
780
790
|
return None
|
781
791
|
return self._process
|
782
792
|
|
@@ -897,8 +907,8 @@ class Daemon:
|
|
897
907
|
"""
|
898
908
|
Return the file handler for the stdin file.
|
899
909
|
"""
|
900
|
-
if
|
901
|
-
return
|
910
|
+
if (stdin_file := self.__dict__.get('_stdin_file', None)):
|
911
|
+
return stdin_file
|
902
912
|
|
903
913
|
self._stdin_file = StdinFile(
|
904
914
|
self.stdin_file_path,
|
@@ -1013,7 +1023,7 @@ class Daemon:
|
|
1013
1023
|
except Exception:
|
1014
1024
|
properties = {}
|
1015
1025
|
|
1016
|
-
return properties
|
1026
|
+
return properties or {}
|
1017
1027
|
|
1018
1028
|
def read_pickle(self) -> Daemon:
|
1019
1029
|
"""Read a Daemon's pickle file and return the `Daemon`."""
|
@@ -1043,7 +1053,7 @@ class Daemon:
|
|
1043
1053
|
Return the contents of the properties JSON file.
|
1044
1054
|
"""
|
1045
1055
|
try:
|
1046
|
-
_file_properties = self.read_properties()
|
1056
|
+
_file_properties = self.read_properties() or {}
|
1047
1057
|
except Exception:
|
1048
1058
|
traceback.print_exc()
|
1049
1059
|
_file_properties = {}
|
@@ -1054,7 +1064,10 @@ class Daemon:
|
|
1054
1064
|
if self._properties is None:
|
1055
1065
|
self._properties = {}
|
1056
1066
|
|
1057
|
-
if
|
1067
|
+
if (
|
1068
|
+
self._properties.get('result', None) is None
|
1069
|
+
and _file_properties.get('result', None) is not None
|
1070
|
+
):
|
1058
1071
|
_ = self._properties.pop('result', None)
|
1059
1072
|
|
1060
1073
|
if _file_properties is not None:
|
@@ -13,11 +13,10 @@ import pathlib
|
|
13
13
|
import traceback
|
14
14
|
import sys
|
15
15
|
import atexit
|
16
|
-
from datetime import datetime, timezone
|
17
|
-
from typing import List,
|
16
|
+
from datetime import datetime, timezone
|
17
|
+
from typing import List, Optional, Tuple
|
18
18
|
from meerschaum.config import get_config
|
19
19
|
from meerschaum.utils.warnings import warn
|
20
|
-
from meerschaum.utils.misc import round_time
|
21
20
|
from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
|
22
21
|
from meerschaum.utils.threading import Thread
|
23
22
|
import meerschaum as mrsm
|
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
|
|
517
516
|
else 0
|
518
517
|
)
|
519
518
|
|
519
|
+
subfile_lines = []
|
520
520
|
if (
|
521
521
|
subfile_index in self.subfile_objects
|
522
522
|
and
|