meerschaum 2.7.7__py3-none-any.whl → 2.7.9__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/_internal/term/TermPageHandler.py +54 -4
- meerschaum/_internal/term/__init__.py +13 -5
- meerschaum/_internal/term/tools.py +41 -6
- meerschaum/actions/copy.py +1 -0
- meerschaum/actions/start.py +25 -10
- meerschaum/api/dash/callbacks/dashboard.py +43 -2
- meerschaum/api/dash/components.py +13 -6
- meerschaum/api/dash/keys.py +82 -108
- meerschaum/api/dash/pages/dashboard.py +17 -17
- meerschaum/api/dash/sessions.py +1 -0
- meerschaum/api/dash/webterm.py +17 -6
- meerschaum/api/resources/static/js/terminado.js +0 -2
- meerschaum/api/resources/templates/termpage.html +47 -4
- meerschaum/api/routes/_webterm.py +15 -11
- meerschaum/config/_default.py +6 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +2 -2
- meerschaum/connectors/sql/_SQLConnector.py +2 -9
- meerschaum/connectors/sql/_fetch.py +5 -30
- meerschaum/connectors/sql/_pipes.py +7 -4
- meerschaum/connectors/sql/_sql.py +56 -31
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
- meerschaum/core/Pipe/_fetch.py +4 -0
- meerschaum/core/Pipe/_sync.py +22 -15
- meerschaum/core/Pipe/_verify.py +1 -1
- meerschaum/utils/daemon/Daemon.py +24 -11
- meerschaum/utils/daemon/RotatingFile.py +3 -3
- meerschaum/utils/dataframe.py +42 -12
- meerschaum/utils/dtypes/__init__.py +153 -24
- meerschaum/utils/dtypes/sql.py +58 -9
- meerschaum/utils/formatting/__init__.py +2 -2
- meerschaum/utils/formatting/_pprint.py +13 -12
- meerschaum/utils/misc.py +32 -18
- meerschaum/utils/prompt.py +1 -1
- meerschaum/utils/sql.py +26 -8
- meerschaum/utils/venv/__init__.py +10 -14
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/METADATA +1 -1
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/RECORD +44 -44
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/zip-safe +0 -0
@@ -195,7 +195,8 @@ class SQLConnector(Connector):
|
|
195
195
|
self._debug = debug
|
196
196
|
### Store the PID and thread at initialization
|
197
197
|
### so we can dispose of the Pool in child processes or threads.
|
198
|
-
import os
|
198
|
+
import os
|
199
|
+
import threading
|
199
200
|
self._pid = os.getpid()
|
200
201
|
self._thread_ident = threading.current_thread().ident
|
201
202
|
self._sessions = {}
|
@@ -286,7 +287,6 @@ class SQLConnector(Connector):
|
|
286
287
|
return ':memory:' not in self.URI
|
287
288
|
return True
|
288
289
|
|
289
|
-
|
290
290
|
@property
|
291
291
|
def metadata(self):
|
292
292
|
"""
|
@@ -298,7 +298,6 @@ class SQLConnector(Connector):
|
|
298
298
|
self._metadata = sqlalchemy.MetaData(schema=self.schema)
|
299
299
|
return self._metadata
|
300
300
|
|
301
|
-
|
302
301
|
@property
|
303
302
|
def instance_schema(self):
|
304
303
|
"""
|
@@ -306,14 +305,12 @@ class SQLConnector(Connector):
|
|
306
305
|
"""
|
307
306
|
return self.schema
|
308
307
|
|
309
|
-
|
310
308
|
@property
|
311
309
|
def internal_schema(self):
|
312
310
|
"""
|
313
311
|
Return the schema name for internal tables.
|
314
312
|
"""
|
315
313
|
from meerschaum.config.static import STATIC_CONFIG
|
316
|
-
from meerschaum.utils.packages import attempt_import
|
317
314
|
from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
|
318
315
|
schema_name = self.__dict__.get('internal_schema', None) or (
|
319
316
|
STATIC_CONFIG['sql']['internal_schema']
|
@@ -325,7 +322,6 @@ class SQLConnector(Connector):
|
|
325
322
|
self._internal_schema = schema_name
|
326
323
|
return self._internal_schema
|
327
324
|
|
328
|
-
|
329
325
|
@property
|
330
326
|
def db(self) -> Optional[databases.Database]:
|
331
327
|
from meerschaum.utils.packages import attempt_import
|
@@ -342,7 +338,6 @@ class SQLConnector(Connector):
|
|
342
338
|
self._db = None
|
343
339
|
return self._db
|
344
340
|
|
345
|
-
|
346
341
|
@property
|
347
342
|
def db_version(self) -> Union[str, None]:
|
348
343
|
"""
|
@@ -356,7 +351,6 @@ class SQLConnector(Connector):
|
|
356
351
|
self._db_version = get_db_version(self)
|
357
352
|
return self._db_version
|
358
353
|
|
359
|
-
|
360
354
|
@property
|
361
355
|
def schema(self) -> Union[str, None]:
|
362
356
|
"""
|
@@ -376,7 +370,6 @@ class SQLConnector(Connector):
|
|
376
370
|
self.__dict__['schema'] = _schema
|
377
371
|
return _schema
|
378
372
|
|
379
|
-
|
380
373
|
def __getstate__(self):
|
381
374
|
return self.__dict__
|
382
375
|
|
@@ -11,7 +11,7 @@ from __future__ import annotations
|
|
11
11
|
from datetime import datetime, timedelta
|
12
12
|
|
13
13
|
import meerschaum as mrsm
|
14
|
-
from meerschaum.utils.typing import Optional, Union,
|
14
|
+
from meerschaum.utils.typing import Optional, Union, Any, List, Dict
|
15
15
|
|
16
16
|
|
17
17
|
def fetch(
|
@@ -20,7 +20,6 @@ def fetch(
|
|
20
20
|
begin: Union[datetime, int, str, None] = '',
|
21
21
|
end: Union[datetime, int, str, None] = None,
|
22
22
|
check_existing: bool = True,
|
23
|
-
chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None,
|
24
23
|
chunksize: Optional[int] = -1,
|
25
24
|
workers: Optional[int] = None,
|
26
25
|
debug: bool = False,
|
@@ -53,15 +52,12 @@ def fetch(
|
|
53
52
|
check_existing: bool, defult True
|
54
53
|
If `False`, use a backtrack interval of 0 minutes.
|
55
54
|
|
56
|
-
chunk_hook: Callable[[pd.DataFrame], Any], default None
|
57
|
-
A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame.
|
58
|
-
|
59
55
|
chunksize: Optional[int], default -1
|
60
|
-
How many rows to load into memory at once
|
56
|
+
How many rows to load into memory at once.
|
61
57
|
Otherwise the entire result set is loaded into memory.
|
62
58
|
|
63
59
|
workers: Optional[int], default None
|
64
|
-
How many threads to use when consuming the generator
|
60
|
+
How many threads to use when consuming the generator.
|
65
61
|
Defaults to the number of cores.
|
66
62
|
|
67
63
|
debug: bool, default False
|
@@ -69,8 +65,7 @@ def fetch(
|
|
69
65
|
|
70
66
|
Returns
|
71
67
|
-------
|
72
|
-
A pandas DataFrame
|
73
|
-
If `chunk_hook` is not None, return a list of the hook function's results.
|
68
|
+
A pandas DataFrame generator.
|
74
69
|
"""
|
75
70
|
meta_def = self.get_pipe_metadef(
|
76
71
|
pipe,
|
@@ -80,33 +75,13 @@ def fetch(
|
|
80
75
|
debug=debug,
|
81
76
|
**kw
|
82
77
|
)
|
83
|
-
as_hook_results = chunk_hook is not None
|
84
78
|
chunks = self.read(
|
85
79
|
meta_def,
|
86
|
-
chunk_hook=chunk_hook,
|
87
|
-
as_hook_results=as_hook_results,
|
88
80
|
chunksize=chunksize,
|
89
81
|
workers=workers,
|
82
|
+
as_iterator=True,
|
90
83
|
debug=debug,
|
91
84
|
)
|
92
|
-
### if sqlite, parse for datetimes
|
93
|
-
if not as_hook_results and self.flavor == 'sqlite':
|
94
|
-
from meerschaum.utils.dataframe import parse_df_datetimes
|
95
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
96
|
-
ignore_cols = [
|
97
|
-
col
|
98
|
-
for col, dtype in pipe.dtypes.items()
|
99
|
-
if not are_dtypes_equal(str(dtype), 'datetime')
|
100
|
-
]
|
101
|
-
return (
|
102
|
-
parse_df_datetimes(
|
103
|
-
chunk,
|
104
|
-
ignore_cols=ignore_cols,
|
105
|
-
strip_timezone=(pipe.tzinfo is None),
|
106
|
-
debug=debug,
|
107
|
-
)
|
108
|
-
for chunk in chunks
|
109
|
-
)
|
110
85
|
return chunks
|
111
86
|
|
112
87
|
|
@@ -1125,7 +1125,7 @@ def get_pipe_data(
|
|
1125
1125
|
numeric_columns = [
|
1126
1126
|
col
|
1127
1127
|
for col, typ in pipe.dtypes.items()
|
1128
|
-
if typ
|
1128
|
+
if typ.startswith('numeric') and col in dtypes
|
1129
1129
|
]
|
1130
1130
|
uuid_columns = [
|
1131
1131
|
col
|
@@ -1887,7 +1887,10 @@ def sync_pipe(
|
|
1887
1887
|
warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
|
1888
1888
|
|
1889
1889
|
if update_df is not None and len(update_df) > 0:
|
1890
|
-
temp_target = self.get_temporary_target(
|
1890
|
+
temp_target = self.get_temporary_target(
|
1891
|
+
pipe.target,
|
1892
|
+
label=('update' if not upsert else 'upsert'),
|
1893
|
+
)
|
1891
1894
|
self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
|
1892
1895
|
temp_pipe = Pipe(
|
1893
1896
|
pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
|
@@ -3274,7 +3277,7 @@ def get_alter_columns_queries(
|
|
3274
3277
|
else [
|
3275
3278
|
col
|
3276
3279
|
for col, typ in df.items()
|
3277
|
-
if typ
|
3280
|
+
if typ.startswith('numeric')
|
3278
3281
|
]
|
3279
3282
|
)
|
3280
3283
|
df_cols_types = (
|
@@ -3354,7 +3357,7 @@ def get_alter_columns_queries(
|
|
3354
3357
|
+ f"{edit_msg}"
|
3355
3358
|
)
|
3356
3359
|
else:
|
3357
|
-
numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ
|
3360
|
+
numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
|
3358
3361
|
|
3359
3362
|
numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
|
3360
3363
|
text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
|
@@ -126,7 +126,7 @@ def read(
|
|
126
126
|
return []
|
127
127
|
from meerschaum.utils.sql import sql_item_name, truncate_item_name
|
128
128
|
from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
|
129
|
-
from meerschaum.utils.dtypes.sql import
|
129
|
+
from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS
|
130
130
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
131
131
|
from meerschaum.utils.pool import get_pool
|
132
132
|
from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
|
@@ -802,16 +802,17 @@ def to_sql(
|
|
802
802
|
)
|
803
803
|
from meerschaum.utils.dtypes import (
|
804
804
|
are_dtypes_equal,
|
805
|
-
quantize_decimal,
|
806
805
|
coerce_timezone,
|
807
806
|
encode_bytes_for_bytea,
|
808
807
|
serialize_bytes,
|
808
|
+
serialize_decimal,
|
809
|
+
json_serialize_value,
|
809
810
|
)
|
810
811
|
from meerschaum.utils.dtypes.sql import (
|
811
|
-
NUMERIC_PRECISION_FLAVORS,
|
812
|
-
NUMERIC_AS_TEXT_FLAVORS,
|
813
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
814
813
|
get_db_type_from_pd_type,
|
814
|
+
get_pd_type_from_db_type,
|
815
|
+
get_numeric_precision_scale,
|
815
816
|
)
|
816
817
|
from meerschaum.utils.misc import interval_str
|
817
818
|
from meerschaum.connectors.sql._create_engine import flavor_configs
|
@@ -822,6 +823,16 @@ def to_sql(
|
|
822
823
|
|
823
824
|
bytes_cols = get_bytes_cols(df)
|
824
825
|
numeric_cols = get_numeric_cols(df)
|
826
|
+
numeric_cols_dtypes = {
|
827
|
+
col: typ
|
828
|
+
for col, typ in kw.get('dtype', {}).items()
|
829
|
+
if (
|
830
|
+
col in df.columns
|
831
|
+
and 'numeric' in str(typ).lower()
|
832
|
+
)
|
833
|
+
|
834
|
+
}
|
835
|
+
numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
|
825
836
|
|
826
837
|
enable_bulk_insert = mrsm.get_config(
|
827
838
|
'system', 'connectors', 'sql', 'bulk_insert'
|
@@ -854,12 +865,24 @@ def to_sql(
|
|
854
865
|
for col in bytes_cols:
|
855
866
|
df[col] = df[col].apply(bytes_serializer)
|
856
867
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
868
|
+
### Check for numeric columns.
|
869
|
+
for col in numeric_cols:
|
870
|
+
typ = numeric_cols_dtypes.get(col, None)
|
871
|
+
|
872
|
+
precision, scale = (
|
873
|
+
(typ.precision, typ.scale)
|
874
|
+
if hasattr(typ, 'precision')
|
875
|
+
else get_numeric_precision_scale(self.flavor)
|
876
|
+
)
|
877
|
+
|
878
|
+
df[col] = df[col].apply(
|
879
|
+
functools.partial(
|
880
|
+
serialize_decimal,
|
881
|
+
quantize=True,
|
882
|
+
precision=precision,
|
883
|
+
scale=scale,
|
884
|
+
)
|
885
|
+
)
|
863
886
|
|
864
887
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
865
888
|
|
@@ -889,7 +912,7 @@ def to_sql(
|
|
889
912
|
if name != truncated_name:
|
890
913
|
warn(
|
891
914
|
f"Table '{name}' is too long for '{self.flavor}',"
|
892
|
-
|
915
|
+
f" will instead create the table '{truncated_name}'."
|
893
916
|
)
|
894
917
|
|
895
918
|
### filter out non-pandas args
|
@@ -957,24 +980,11 @@ def to_sql(
|
|
957
980
|
### Check for JSON columns.
|
958
981
|
if self.flavor not in json_flavors:
|
959
982
|
json_cols = get_json_cols(df)
|
960
|
-
|
961
|
-
for col in json_cols:
|
962
|
-
df[col] = df[col].apply(
|
963
|
-
(
|
964
|
-
lambda x: json.dumps(x, default=str, sort_keys=True)
|
965
|
-
if not isinstance(x, Hashable)
|
966
|
-
else x
|
967
|
-
)
|
968
|
-
)
|
969
|
-
|
970
|
-
### Check for numeric columns.
|
971
|
-
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
972
|
-
if numeric_precision is not None and numeric_scale is not None:
|
973
|
-
for col in numeric_cols:
|
983
|
+
for col in json_cols:
|
974
984
|
df[col] = df[col].apply(
|
975
|
-
|
976
|
-
|
977
|
-
if isinstance(x,
|
985
|
+
(
|
986
|
+
lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True)
|
987
|
+
if not isinstance(x, Hashable)
|
978
988
|
else x
|
979
989
|
)
|
980
990
|
)
|
@@ -1051,16 +1061,20 @@ def psql_insert_copy(
|
|
1051
1061
|
|
1052
1062
|
from meerschaum.utils.sql import sql_item_name
|
1053
1063
|
from meerschaum.utils.warnings import dprint
|
1064
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
1054
1065
|
|
1055
1066
|
### NOTE: PostgreSQL doesn't support NUL chars in text, so they're removed from strings.
|
1056
1067
|
data_iter = (
|
1057
1068
|
(
|
1058
1069
|
(
|
1059
1070
|
(
|
1060
|
-
json.dumps(
|
1071
|
+
json.dumps(
|
1072
|
+
item,
|
1073
|
+
default=json_serialize_value,
|
1074
|
+
).replace('\0', '').replace('\\u0000', '')
|
1061
1075
|
if isinstance(item, (dict, list))
|
1062
1076
|
else (
|
1063
|
-
item
|
1077
|
+
json_serialize_value(item, default_to_str=False)
|
1064
1078
|
if not isinstance(item, str)
|
1065
1079
|
else item.replace('\0', '').replace('\\u0000', '')
|
1066
1080
|
)
|
@@ -1119,6 +1133,7 @@ def mssql_insert_json(
|
|
1119
1133
|
"""
|
1120
1134
|
import json
|
1121
1135
|
from meerschaum.utils.sql import sql_item_name
|
1136
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
1122
1137
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
|
1123
1138
|
from meerschaum.utils.warnings import dprint
|
1124
1139
|
table_name = sql_item_name(table.name, 'mssql', table.schema)
|
@@ -1127,6 +1142,15 @@ def mssql_insert_json(
|
|
1127
1142
|
str(column.name): get_pd_type_from_db_type(str(column.type))
|
1128
1143
|
for column in table.table.columns
|
1129
1144
|
}
|
1145
|
+
numeric_cols_types = {
|
1146
|
+
col: table.table.columns[col].type
|
1147
|
+
for col, typ in pd_types.items()
|
1148
|
+
if typ.startswith('numeric') and col in keys
|
1149
|
+
}
|
1150
|
+
pd_types.update({
|
1151
|
+
col: f'numeric[{typ.precision},{typ.scale}]'
|
1152
|
+
for col, typ in numeric_cols_types.items()
|
1153
|
+
})
|
1130
1154
|
cols_types = {
|
1131
1155
|
col: get_db_type_from_pd_type(typ, 'mssql')
|
1132
1156
|
for col, typ in pd_types.items()
|
@@ -1151,7 +1175,8 @@ def mssql_insert_json(
|
|
1151
1175
|
if debug:
|
1152
1176
|
dprint(sql)
|
1153
1177
|
|
1154
|
-
|
1178
|
+
serialized_data = json.dumps(json_data, default=json_serialize_value)
|
1179
|
+
conn.exec_driver_sql(sql, (serialized_data,))
|
1155
1180
|
|
1156
1181
|
|
1157
1182
|
def format_sql_query_for_dask(query: str) -> 'sqlalchemy.sql.selectable.Select':
|
@@ -239,7 +239,7 @@ class ValkeyConnector(Connector):
|
|
239
239
|
-------
|
240
240
|
The current index counter value (how many docs have been pushed).
|
241
241
|
"""
|
242
|
-
from meerschaum.utils.
|
242
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
243
243
|
table_name = self.quote_table(table)
|
244
244
|
datetime_column_key = self.get_datetime_column_key(table)
|
245
245
|
remote_datetime_column = self.get(datetime_column_key)
|
@@ -269,7 +269,7 @@ class ValkeyConnector(Connector):
|
|
269
269
|
) if datetime_column else None
|
270
270
|
doc_str = json.dumps(
|
271
271
|
doc,
|
272
|
-
default=
|
272
|
+
default=json_serialize_value,
|
273
273
|
separators=(',', ':'),
|
274
274
|
sort_keys=True,
|
275
275
|
)
|
meerschaum/core/Pipe/_fetch.py
CHANGED
@@ -84,6 +84,7 @@ def fetch(
|
|
84
84
|
begin=_determine_begin(
|
85
85
|
self,
|
86
86
|
begin,
|
87
|
+
end,
|
87
88
|
check_existing=check_existing,
|
88
89
|
debug=debug,
|
89
90
|
),
|
@@ -136,6 +137,7 @@ def get_backtrack_interval(
|
|
136
137
|
def _determine_begin(
|
137
138
|
pipe: mrsm.Pipe,
|
138
139
|
begin: Union[datetime, int, str, None] = '',
|
140
|
+
end: Union[datetime, int, None] = None,
|
139
141
|
check_existing: bool = True,
|
140
142
|
debug: bool = False,
|
141
143
|
) -> Union[datetime, int, None]:
|
@@ -157,6 +159,8 @@ def _determine_begin(
|
|
157
159
|
"""
|
158
160
|
if begin != '':
|
159
161
|
return begin
|
162
|
+
if end is not None:
|
163
|
+
return None
|
160
164
|
sync_time = pipe.get_sync_time(debug=debug)
|
161
165
|
if sync_time is None:
|
162
166
|
return sync_time
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -292,7 +292,6 @@ def sync(
|
|
292
292
|
message = '\n'.join([_message for _, _message in df])
|
293
293
|
return success, message
|
294
294
|
|
295
|
-
### TODO: Depreciate async?
|
296
295
|
if df is True:
|
297
296
|
p._exists = None
|
298
297
|
return True, f"{p} is being synced in parallel."
|
@@ -328,30 +327,37 @@ def sync(
|
|
328
327
|
_chunk_success, _chunk_msg = False, str(e)
|
329
328
|
if not _chunk_success:
|
330
329
|
failed_chunks.append(_chunk)
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
+ self._get_chunk_label(_chunk, dt_col)
|
336
|
-
+ '\n'
|
337
|
-
+ _chunk_msg
|
338
|
-
)
|
330
|
+
_chunk_msg = (
|
331
|
+
self._get_chunk_label(_chunk, dt_col)
|
332
|
+
+ '\n'
|
333
|
+
+ _chunk_msg
|
339
334
|
)
|
340
335
|
|
336
|
+
mrsm.pprint((_chunk_success, _chunk_msg), calm=True)
|
337
|
+
return _chunk_success, _chunk_msg
|
338
|
+
|
341
339
|
results = sorted(
|
342
340
|
[(chunk_success, chunk_msg)] + (
|
343
341
|
list(pool.imap(_process_chunk, df))
|
344
|
-
if
|
345
|
-
|
342
|
+
if (
|
343
|
+
not df_is_chunk_generator(chunk) # Handle nested generators.
|
344
|
+
and kw.get('workers', 1) != 1
|
345
|
+
)
|
346
|
+
else list(
|
346
347
|
_process_chunk(_child_chunks)
|
347
348
|
for _child_chunks in df
|
348
|
-
|
349
|
+
)
|
349
350
|
)
|
350
351
|
)
|
351
352
|
chunk_messages = [chunk_msg for _, chunk_msg in results]
|
352
353
|
success_bools = [chunk_success for chunk_success, _ in results]
|
353
354
|
success = all(success_bools)
|
354
|
-
msg =
|
355
|
+
msg = (
|
356
|
+
f'Synced {len(chunk_messages)} chunk'
|
357
|
+
+ ('s' if len(chunk_messages) != 1 else '')
|
358
|
+
+ f' to {p}:\n\n'
|
359
|
+
+ '\n\n'.join(chunk_messages).lstrip().rstrip()
|
360
|
+
).lstrip().rstrip()
|
355
361
|
|
356
362
|
### If some chunks succeeded, retry the failures.
|
357
363
|
retry_success = True
|
@@ -432,7 +438,7 @@ def sync(
|
|
432
438
|
|
433
439
|
if blocking:
|
434
440
|
self._exists = None
|
435
|
-
return _sync(self, df
|
441
|
+
return _sync(self, df=df)
|
436
442
|
|
437
443
|
from meerschaum.utils.threading import Thread
|
438
444
|
def default_callback(result_tuple: SuccessTuple):
|
@@ -821,6 +827,7 @@ def filter_existing(
|
|
821
827
|
for col, typ in self_dtypes.items()
|
822
828
|
},
|
823
829
|
safe_copy=safe_copy,
|
830
|
+
coerce_mixed_numerics=(not self.static),
|
824
831
|
debug=debug
|
825
832
|
),
|
826
833
|
on_cols_dtypes,
|
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
962
969
|
"""
|
963
970
|
from meerschaum.utils.dataframe import get_numeric_cols
|
964
971
|
numeric_cols = get_numeric_cols(df)
|
965
|
-
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ
|
972
|
+
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
|
966
973
|
new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
|
967
974
|
if not new_numeric_cols:
|
968
975
|
return True, "Success"
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -330,12 +330,12 @@ class Daemon:
|
|
330
330
|
result = self.target(*self.target_args, **self.target_kw)
|
331
331
|
self.properties['result'] = result
|
332
332
|
except (BrokenPipeError, KeyboardInterrupt, SystemExit):
|
333
|
-
|
333
|
+
result = False, traceback.format_exc()
|
334
334
|
except Exception as e:
|
335
335
|
warn(
|
336
336
|
f"Exception in daemon target function: {traceback.format_exc()}",
|
337
337
|
)
|
338
|
-
result = e
|
338
|
+
result = False, str(e)
|
339
339
|
finally:
|
340
340
|
_results[self.daemon_id] = result
|
341
341
|
|
@@ -345,8 +345,11 @@ class Daemon:
|
|
345
345
|
self.cleanup()
|
346
346
|
|
347
347
|
self._log_refresh_timer.cancel()
|
348
|
-
|
349
|
-
self.pid_path.
|
348
|
+
try:
|
349
|
+
if self.pid is None and self.pid_path.exists():
|
350
|
+
self.pid_path.unlink()
|
351
|
+
except Exception:
|
352
|
+
pass
|
350
353
|
|
351
354
|
if is_success_tuple(result):
|
352
355
|
try:
|
@@ -774,9 +777,16 @@ class Daemon:
|
|
774
777
|
if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
|
775
778
|
try:
|
776
779
|
self._process = psutil.Process(int(pid))
|
780
|
+
process_exists = True
|
777
781
|
except Exception:
|
778
|
-
|
779
|
-
|
782
|
+
process_exists = False
|
783
|
+
if not process_exists:
|
784
|
+
_ = self.__dict__.pop('_process', None)
|
785
|
+
try:
|
786
|
+
if self.pid_path.exists():
|
787
|
+
self.pid_path.unlink()
|
788
|
+
except Exception:
|
789
|
+
pass
|
780
790
|
return None
|
781
791
|
return self._process
|
782
792
|
|
@@ -897,8 +907,8 @@ class Daemon:
|
|
897
907
|
"""
|
898
908
|
Return the file handler for the stdin file.
|
899
909
|
"""
|
900
|
-
if
|
901
|
-
return
|
910
|
+
if (stdin_file := self.__dict__.get('_stdin_file', None)):
|
911
|
+
return stdin_file
|
902
912
|
|
903
913
|
self._stdin_file = StdinFile(
|
904
914
|
self.stdin_file_path,
|
@@ -1013,7 +1023,7 @@ class Daemon:
|
|
1013
1023
|
except Exception:
|
1014
1024
|
properties = {}
|
1015
1025
|
|
1016
|
-
return properties
|
1026
|
+
return properties or {}
|
1017
1027
|
|
1018
1028
|
def read_pickle(self) -> Daemon:
|
1019
1029
|
"""Read a Daemon's pickle file and return the `Daemon`."""
|
@@ -1043,7 +1053,7 @@ class Daemon:
|
|
1043
1053
|
Return the contents of the properties JSON file.
|
1044
1054
|
"""
|
1045
1055
|
try:
|
1046
|
-
_file_properties = self.read_properties()
|
1056
|
+
_file_properties = self.read_properties() or {}
|
1047
1057
|
except Exception:
|
1048
1058
|
traceback.print_exc()
|
1049
1059
|
_file_properties = {}
|
@@ -1054,7 +1064,10 @@ class Daemon:
|
|
1054
1064
|
if self._properties is None:
|
1055
1065
|
self._properties = {}
|
1056
1066
|
|
1057
|
-
if
|
1067
|
+
if (
|
1068
|
+
self._properties.get('result', None) is None
|
1069
|
+
and _file_properties.get('result', None) is not None
|
1070
|
+
):
|
1058
1071
|
_ = self._properties.pop('result', None)
|
1059
1072
|
|
1060
1073
|
if _file_properties is not None:
|
@@ -13,11 +13,10 @@ import pathlib
|
|
13
13
|
import traceback
|
14
14
|
import sys
|
15
15
|
import atexit
|
16
|
-
from datetime import datetime, timezone
|
17
|
-
from typing import List,
|
16
|
+
from datetime import datetime, timezone
|
17
|
+
from typing import List, Optional, Tuple
|
18
18
|
from meerschaum.config import get_config
|
19
19
|
from meerschaum.utils.warnings import warn
|
20
|
-
from meerschaum.utils.misc import round_time
|
21
20
|
from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
|
22
21
|
from meerschaum.utils.threading import Thread
|
23
22
|
import meerschaum as mrsm
|
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
|
|
517
516
|
else 0
|
518
517
|
)
|
519
518
|
|
519
|
+
subfile_lines = []
|
520
520
|
if (
|
521
521
|
subfile_index in self.subfile_objects
|
522
522
|
and
|