meerschaum 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,6 +9,7 @@ Functions for copying elements.
9
9
  from __future__ import annotations
10
10
  from meerschaum.utils.typing import Any, SuccessTuple, Optional, List
11
11
 
12
+
12
13
  def copy(
13
14
  action: Optional[List[str]] = None,
14
15
  **kw : Any
@@ -2,4 +2,4 @@
2
2
  Specify the Meerschaum release version.
3
3
  """
4
4
 
5
- __version__ = "2.7.7"
5
+ __version__ = "2.7.8"
@@ -195,7 +195,8 @@ class SQLConnector(Connector):
195
195
  self._debug = debug
196
196
  ### Store the PID and thread at initialization
197
197
  ### so we can dispose of the Pool in child processes or threads.
198
- import os, threading
198
+ import os
199
+ import threading
199
200
  self._pid = os.getpid()
200
201
  self._thread_ident = threading.current_thread().ident
201
202
  self._sessions = {}
@@ -286,7 +287,6 @@ class SQLConnector(Connector):
286
287
  return ':memory:' not in self.URI
287
288
  return True
288
289
 
289
-
290
290
  @property
291
291
  def metadata(self):
292
292
  """
@@ -298,7 +298,6 @@ class SQLConnector(Connector):
298
298
  self._metadata = sqlalchemy.MetaData(schema=self.schema)
299
299
  return self._metadata
300
300
 
301
-
302
301
  @property
303
302
  def instance_schema(self):
304
303
  """
@@ -306,14 +305,12 @@ class SQLConnector(Connector):
306
305
  """
307
306
  return self.schema
308
307
 
309
-
310
308
  @property
311
309
  def internal_schema(self):
312
310
  """
313
311
  Return the schema name for internal tables.
314
312
  """
315
313
  from meerschaum.config.static import STATIC_CONFIG
316
- from meerschaum.utils.packages import attempt_import
317
314
  from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
318
315
  schema_name = self.__dict__.get('internal_schema', None) or (
319
316
  STATIC_CONFIG['sql']['internal_schema']
@@ -325,7 +322,6 @@ class SQLConnector(Connector):
325
322
  self._internal_schema = schema_name
326
323
  return self._internal_schema
327
324
 
328
-
329
325
  @property
330
326
  def db(self) -> Optional[databases.Database]:
331
327
  from meerschaum.utils.packages import attempt_import
@@ -342,7 +338,6 @@ class SQLConnector(Connector):
342
338
  self._db = None
343
339
  return self._db
344
340
 
345
-
346
341
  @property
347
342
  def db_version(self) -> Union[str, None]:
348
343
  """
@@ -356,7 +351,6 @@ class SQLConnector(Connector):
356
351
  self._db_version = get_db_version(self)
357
352
  return self._db_version
358
353
 
359
-
360
354
  @property
361
355
  def schema(self) -> Union[str, None]:
362
356
  """
@@ -376,7 +370,6 @@ class SQLConnector(Connector):
376
370
  self.__dict__['schema'] = _schema
377
371
  return _schema
378
372
 
379
-
380
373
  def __getstate__(self):
381
374
  return self.__dict__
382
375
 
@@ -11,7 +11,7 @@ from __future__ import annotations
11
11
  from datetime import datetime, timedelta
12
12
 
13
13
  import meerschaum as mrsm
14
- from meerschaum.utils.typing import Optional, Union, Callable, Any, List, Dict
14
+ from meerschaum.utils.typing import Optional, Union, Any, List, Dict
15
15
 
16
16
 
17
17
  def fetch(
@@ -20,7 +20,6 @@ def fetch(
20
20
  begin: Union[datetime, int, str, None] = '',
21
21
  end: Union[datetime, int, str, None] = None,
22
22
  check_existing: bool = True,
23
- chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None,
24
23
  chunksize: Optional[int] = -1,
25
24
  workers: Optional[int] = None,
26
25
  debug: bool = False,
@@ -53,15 +52,12 @@ def fetch(
53
52
  check_existing: bool, defult True
54
53
  If `False`, use a backtrack interval of 0 minutes.
55
54
 
56
- chunk_hook: Callable[[pd.DataFrame], Any], default None
57
- A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame.
58
-
59
55
  chunksize: Optional[int], default -1
60
- How many rows to load into memory at once (when `chunk_hook` is provided).
56
+ How many rows to load into memory at once.
61
57
  Otherwise the entire result set is loaded into memory.
62
58
 
63
59
  workers: Optional[int], default None
64
- How many threads to use when consuming the generator (when `chunk_hook is provided).
60
+ How many threads to use when consuming the generator.
65
61
  Defaults to the number of cores.
66
62
 
67
63
  debug: bool, default False
@@ -69,8 +65,7 @@ def fetch(
69
65
 
70
66
  Returns
71
67
  -------
72
- A pandas DataFrame or `None`.
73
- If `chunk_hook` is not None, return a list of the hook function's results.
68
+ A pandas DataFrame generator.
74
69
  """
75
70
  meta_def = self.get_pipe_metadef(
76
71
  pipe,
@@ -80,33 +75,13 @@ def fetch(
80
75
  debug=debug,
81
76
  **kw
82
77
  )
83
- as_hook_results = chunk_hook is not None
84
78
  chunks = self.read(
85
79
  meta_def,
86
- chunk_hook=chunk_hook,
87
- as_hook_results=as_hook_results,
88
80
  chunksize=chunksize,
89
81
  workers=workers,
82
+ as_iterator=True,
90
83
  debug=debug,
91
84
  )
92
- ### if sqlite, parse for datetimes
93
- if not as_hook_results and self.flavor == 'sqlite':
94
- from meerschaum.utils.dataframe import parse_df_datetimes
95
- from meerschaum.utils.dtypes import are_dtypes_equal
96
- ignore_cols = [
97
- col
98
- for col, dtype in pipe.dtypes.items()
99
- if not are_dtypes_equal(str(dtype), 'datetime')
100
- ]
101
- return (
102
- parse_df_datetimes(
103
- chunk,
104
- ignore_cols=ignore_cols,
105
- strip_timezone=(pipe.tzinfo is None),
106
- debug=debug,
107
- )
108
- for chunk in chunks
109
- )
110
85
  return chunks
111
86
 
112
87
 
@@ -1125,7 +1125,7 @@ def get_pipe_data(
1125
1125
  numeric_columns = [
1126
1126
  col
1127
1127
  for col, typ in pipe.dtypes.items()
1128
- if typ == 'numeric' and col in dtypes
1128
+ if typ.startswith('numeric') and col in dtypes
1129
1129
  ]
1130
1130
  uuid_columns = [
1131
1131
  col
@@ -1887,7 +1887,10 @@ def sync_pipe(
1887
1887
  warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
1888
1888
 
1889
1889
  if update_df is not None and len(update_df) > 0:
1890
- temp_target = self.get_temporary_target(pipe.target, label='update')
1890
+ temp_target = self.get_temporary_target(
1891
+ pipe.target,
1892
+ label=('update' if not upsert else 'upsert'),
1893
+ )
1891
1894
  self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
1892
1895
  temp_pipe = Pipe(
1893
1896
  pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
@@ -3274,7 +3277,7 @@ def get_alter_columns_queries(
3274
3277
  else [
3275
3278
  col
3276
3279
  for col, typ in df.items()
3277
- if typ == 'numeric'
3280
+ if typ.startswith('numeric')
3278
3281
  ]
3279
3282
  )
3280
3283
  df_cols_types = (
@@ -3354,7 +3357,7 @@ def get_alter_columns_queries(
3354
3357
  + f"{edit_msg}"
3355
3358
  )
3356
3359
  else:
3357
- numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ == 'numeric'])
3360
+ numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
3358
3361
 
3359
3362
  numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3360
3363
  text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
@@ -126,7 +126,7 @@ def read(
126
126
  return []
127
127
  from meerschaum.utils.sql import sql_item_name, truncate_item_name
128
128
  from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
129
- from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
129
+ from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS
130
130
  from meerschaum.utils.packages import attempt_import, import_pandas
131
131
  from meerschaum.utils.pool import get_pool
132
132
  from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
@@ -802,16 +802,17 @@ def to_sql(
802
802
  )
803
803
  from meerschaum.utils.dtypes import (
804
804
  are_dtypes_equal,
805
- quantize_decimal,
806
805
  coerce_timezone,
807
806
  encode_bytes_for_bytea,
808
807
  serialize_bytes,
808
+ serialize_decimal,
809
+ json_serialize_value,
809
810
  )
810
811
  from meerschaum.utils.dtypes.sql import (
811
- NUMERIC_PRECISION_FLAVORS,
812
- NUMERIC_AS_TEXT_FLAVORS,
813
812
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
814
813
  get_db_type_from_pd_type,
814
+ get_pd_type_from_db_type,
815
+ get_numeric_precision_scale,
815
816
  )
816
817
  from meerschaum.utils.misc import interval_str
817
818
  from meerschaum.connectors.sql._create_engine import flavor_configs
@@ -822,6 +823,16 @@ def to_sql(
822
823
 
823
824
  bytes_cols = get_bytes_cols(df)
824
825
  numeric_cols = get_numeric_cols(df)
826
+ numeric_cols_dtypes = {
827
+ col: typ
828
+ for col, typ in kw.get('dtype', {}).items()
829
+ if (
830
+ col in df.columns
831
+ and 'numeric' in str(typ).lower()
832
+ )
833
+
834
+ }
835
+ numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
825
836
 
826
837
  enable_bulk_insert = mrsm.get_config(
827
838
  'system', 'connectors', 'sql', 'bulk_insert'
@@ -854,12 +865,24 @@ def to_sql(
854
865
  for col in bytes_cols:
855
866
  df[col] = df[col].apply(bytes_serializer)
856
867
 
857
- if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
858
- if safe_copy and not copied:
859
- df = df.copy()
860
- copied = True
861
- for col in numeric_cols:
862
- df[col] = df[col].astype(str)
868
+ ### Check for numeric columns.
869
+ for col in numeric_cols:
870
+ typ = numeric_cols_dtypes.get(col, None)
871
+
872
+ precision, scale = (
873
+ (typ.precision, typ.scale)
874
+ if hasattr(typ, 'precision')
875
+ else get_numeric_precision_scale(self.flavor)
876
+ )
877
+
878
+ df[col] = df[col].apply(
879
+ functools.partial(
880
+ serialize_decimal,
881
+ quantize=True,
882
+ precision=precision,
883
+ scale=scale,
884
+ )
885
+ )
863
886
 
864
887
  stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
865
888
 
@@ -889,7 +912,7 @@ def to_sql(
889
912
  if name != truncated_name:
890
913
  warn(
891
914
  f"Table '{name}' is too long for '{self.flavor}',"
892
- + f" will instead create the table '{truncated_name}'."
915
+ f" will instead create the table '{truncated_name}'."
893
916
  )
894
917
 
895
918
  ### filter out non-pandas args
@@ -957,24 +980,11 @@ def to_sql(
957
980
  ### Check for JSON columns.
958
981
  if self.flavor not in json_flavors:
959
982
  json_cols = get_json_cols(df)
960
- if json_cols:
961
- for col in json_cols:
962
- df[col] = df[col].apply(
963
- (
964
- lambda x: json.dumps(x, default=str, sort_keys=True)
965
- if not isinstance(x, Hashable)
966
- else x
967
- )
968
- )
969
-
970
- ### Check for numeric columns.
971
- numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
972
- if numeric_precision is not None and numeric_scale is not None:
973
- for col in numeric_cols:
983
+ for col in json_cols:
974
984
  df[col] = df[col].apply(
975
- lambda x: (
976
- quantize_decimal(x, numeric_scale, numeric_precision)
977
- if isinstance(x, Decimal)
985
+ (
986
+ lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True)
987
+ if not isinstance(x, Hashable)
978
988
  else x
979
989
  )
980
990
  )
@@ -1051,16 +1061,20 @@ def psql_insert_copy(
1051
1061
 
1052
1062
  from meerschaum.utils.sql import sql_item_name
1053
1063
  from meerschaum.utils.warnings import dprint
1064
+ from meerschaum.utils.dtypes import json_serialize_value
1054
1065
 
1055
1066
  ### NOTE: PostgreSQL doesn't support NUL chars in text, so they're removed from strings.
1056
1067
  data_iter = (
1057
1068
  (
1058
1069
  (
1059
1070
  (
1060
- json.dumps(item).replace('\0', '').replace('\\u0000', '')
1071
+ json.dumps(
1072
+ item,
1073
+ default=json_serialize_value,
1074
+ ).replace('\0', '').replace('\\u0000', '')
1061
1075
  if isinstance(item, (dict, list))
1062
1076
  else (
1063
- item
1077
+ json_serialize_value(item, default_to_str=False)
1064
1078
  if not isinstance(item, str)
1065
1079
  else item.replace('\0', '').replace('\\u0000', '')
1066
1080
  )
@@ -1119,6 +1133,7 @@ def mssql_insert_json(
1119
1133
  """
1120
1134
  import json
1121
1135
  from meerschaum.utils.sql import sql_item_name
1136
+ from meerschaum.utils.dtypes import json_serialize_value
1122
1137
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
1123
1138
  from meerschaum.utils.warnings import dprint
1124
1139
  table_name = sql_item_name(table.name, 'mssql', table.schema)
@@ -1127,6 +1142,15 @@ def mssql_insert_json(
1127
1142
  str(column.name): get_pd_type_from_db_type(str(column.type))
1128
1143
  for column in table.table.columns
1129
1144
  }
1145
+ numeric_cols_types = {
1146
+ col: table.table.columns[col].type
1147
+ for col, typ in pd_types.items()
1148
+ if typ.startswith('numeric') and col in keys
1149
+ }
1150
+ pd_types.update({
1151
+ col: f'numeric[{typ.precision},{typ.scale}]'
1152
+ for col, typ in numeric_cols_types.items()
1153
+ })
1130
1154
  cols_types = {
1131
1155
  col: get_db_type_from_pd_type(typ, 'mssql')
1132
1156
  for col, typ in pd_types.items()
@@ -1151,7 +1175,8 @@ def mssql_insert_json(
1151
1175
  if debug:
1152
1176
  dprint(sql)
1153
1177
 
1154
- conn.exec_driver_sql(sql, (json.dumps(json_data, default=str),))
1178
+ serialized_data = json.dumps(json_data, default=json_serialize_value)
1179
+ conn.exec_driver_sql(sql, (serialized_data,))
1155
1180
 
1156
1181
 
1157
1182
  def format_sql_query_for_dask(query: str) -> 'sqlalchemy.sql.selectable.Select':
@@ -239,7 +239,7 @@ class ValkeyConnector(Connector):
239
239
  -------
240
240
  The current index counter value (how many docs have been pushed).
241
241
  """
242
- from meerschaum.utils.misc import json_serialize_datetime
242
+ from meerschaum.utils.dtypes import json_serialize_value
243
243
  table_name = self.quote_table(table)
244
244
  datetime_column_key = self.get_datetime_column_key(table)
245
245
  remote_datetime_column = self.get(datetime_column_key)
@@ -269,7 +269,7 @@ class ValkeyConnector(Connector):
269
269
  ) if datetime_column else None
270
270
  doc_str = json.dumps(
271
271
  doc,
272
- default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
272
+ default=json_serialize_value,
273
273
  separators=(',', ':'),
274
274
  sort_keys=True,
275
275
  )
@@ -84,6 +84,7 @@ def fetch(
84
84
  begin=_determine_begin(
85
85
  self,
86
86
  begin,
87
+ end,
87
88
  check_existing=check_existing,
88
89
  debug=debug,
89
90
  ),
@@ -136,6 +137,7 @@ def get_backtrack_interval(
136
137
  def _determine_begin(
137
138
  pipe: mrsm.Pipe,
138
139
  begin: Union[datetime, int, str, None] = '',
140
+ end: Union[datetime, int, None] = None,
139
141
  check_existing: bool = True,
140
142
  debug: bool = False,
141
143
  ) -> Union[datetime, int, None]:
@@ -157,6 +159,8 @@ def _determine_begin(
157
159
  """
158
160
  if begin != '':
159
161
  return begin
162
+ if end is not None:
163
+ return None
160
164
  sync_time = pipe.get_sync_time(debug=debug)
161
165
  if sync_time is None:
162
166
  return sync_time
@@ -292,7 +292,6 @@ def sync(
292
292
  message = '\n'.join([_message for _, _message in df])
293
293
  return success, message
294
294
 
295
- ### TODO: Depreciate async?
296
295
  if df is True:
297
296
  p._exists = None
298
297
  return True, f"{p} is being synced in parallel."
@@ -331,8 +330,7 @@ def sync(
331
330
  return (
332
331
  _chunk_success,
333
332
  (
334
- '\n'
335
- + self._get_chunk_label(_chunk, dt_col)
333
+ self._get_chunk_label(_chunk, dt_col)
336
334
  + '\n'
337
335
  + _chunk_msg
338
336
  )
@@ -341,17 +339,25 @@ def sync(
341
339
  results = sorted(
342
340
  [(chunk_success, chunk_msg)] + (
343
341
  list(pool.imap(_process_chunk, df))
344
- if not df_is_chunk_generator(chunk)
345
- else [
342
+ if (
343
+ not df_is_chunk_generator(chunk) # Handle nested generators.
344
+ and kw.get('workers', 1) != 1
345
+ )
346
+ else list(
346
347
  _process_chunk(_child_chunks)
347
348
  for _child_chunks in df
348
- ]
349
+ )
349
350
  )
350
351
  )
351
352
  chunk_messages = [chunk_msg for _, chunk_msg in results]
352
353
  success_bools = [chunk_success for chunk_success, _ in results]
353
354
  success = all(success_bools)
354
- msg = '\n'.join(chunk_messages)
355
+ msg = (
356
+ f'Synced {len(chunk_messages)} chunk'
357
+ + ('s' if len(chunk_messages) != 1 else '')
358
+ + f' to {p}:\n\n'
359
+ + '\n\n'.join(chunk_messages).lstrip().rstrip()
360
+ ).lstrip().rstrip()
355
361
 
356
362
  ### If some chunks succeeded, retry the failures.
357
363
  retry_success = True
@@ -432,7 +438,7 @@ def sync(
432
438
 
433
439
  if blocking:
434
440
  self._exists = None
435
- return _sync(self, df = df)
441
+ return _sync(self, df=df)
436
442
 
437
443
  from meerschaum.utils.threading import Thread
438
444
  def default_callback(result_tuple: SuccessTuple):
@@ -821,6 +827,7 @@ def filter_existing(
821
827
  for col, typ in self_dtypes.items()
822
828
  },
823
829
  safe_copy=safe_copy,
830
+ coerce_mixed_numerics=(not self.static),
824
831
  debug=debug
825
832
  ),
826
833
  on_cols_dtypes,
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
962
969
  """
963
970
  from meerschaum.utils.dataframe import get_numeric_cols
964
971
  numeric_cols = get_numeric_cols(df)
965
- existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ == 'numeric']
972
+ existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
966
973
  new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
967
974
  if not new_numeric_cols:
968
975
  return True, "Success"
@@ -774,9 +774,16 @@ class Daemon:
774
774
  if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
775
775
  try:
776
776
  self._process = psutil.Process(int(pid))
777
+ process_exists = True
777
778
  except Exception:
778
- if self.pid_path.exists():
779
- self.pid_path.unlink()
779
+ process_exists = False
780
+ if not process_exists:
781
+ _ = self.__dict__.pop('_process', None)
782
+ try:
783
+ if self.pid_path.exists():
784
+ self.pid_path.unlink()
785
+ except Exception:
786
+ pass
780
787
  return None
781
788
  return self._process
782
789
 
@@ -13,11 +13,10 @@ import pathlib
13
13
  import traceback
14
14
  import sys
15
15
  import atexit
16
- from datetime import datetime, timezone, timedelta
17
- from typing import List, Union, Optional, Tuple
16
+ from datetime import datetime, timezone
17
+ from typing import List, Optional, Tuple
18
18
  from meerschaum.config import get_config
19
19
  from meerschaum.utils.warnings import warn
20
- from meerschaum.utils.misc import round_time
21
20
  from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
22
21
  from meerschaum.utils.threading import Thread
23
22
  import meerschaum as mrsm
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
517
516
  else 0
518
517
  )
519
518
 
519
+ subfile_lines = []
520
520
  if (
521
521
  subfile_index in self.subfile_objects
522
522
  and