meerschaum 2.4.12__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. meerschaum/_internal/entry.py +1 -0
  2. meerschaum/actions/bootstrap.py +9 -11
  3. meerschaum/actions/delete.py +1 -1
  4. meerschaum/actions/edit.py +40 -14
  5. meerschaum/actions/sh.py +11 -10
  6. meerschaum/actions/start.py +58 -2
  7. meerschaum/actions/sync.py +14 -16
  8. meerschaum/actions/upgrade.py +8 -6
  9. meerschaum/config/_version.py +1 -1
  10. meerschaum/connectors/sql/_create_engine.py +8 -3
  11. meerschaum/connectors/sql/_fetch.py +4 -2
  12. meerschaum/connectors/sql/_instance.py +3 -3
  13. meerschaum/connectors/sql/_pipes.py +18 -13
  14. meerschaum/connectors/sql/_sql.py +59 -50
  15. meerschaum/core/Pipe/__init__.py +23 -1
  16. meerschaum/core/Pipe/_attributes.py +96 -14
  17. meerschaum/plugins/__init__.py +6 -2
  18. meerschaum/plugins/bootstrap.py +15 -15
  19. meerschaum/utils/dataframe.py +17 -5
  20. meerschaum/utils/packages/__init__.py +40 -22
  21. meerschaum/utils/packages/_packages.py +24 -8
  22. meerschaum/utils/process.py +18 -8
  23. meerschaum/utils/schedule.py +9 -5
  24. meerschaum/utils/venv/__init__.py +35 -24
  25. meerschaum/utils/warnings.py +7 -7
  26. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/METADATA +9 -13
  27. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/RECORD +33 -33
  28. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/LICENSE +0 -0
  29. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/NOTICE +0 -0
  30. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/WHEEL +0 -0
  31. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/entry_points.txt +0 -0
  32. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/top_level.txt +0 -0
  33. {meerschaum-2.4.12.dist-info → meerschaum-2.5.0.dist-info}/zip-safe +0 -0
@@ -17,8 +17,9 @@ from meerschaum.utils.warnings import warn
17
17
  ### database flavors that can use bulk insert
18
18
  _bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
19
19
  ### flavors that do not support chunks
20
- _disallow_chunks_flavors = {'duckdb', 'mssql'}
20
+ _disallow_chunks_flavors = ['duckdb']
21
21
  _max_chunks_flavors = {'sqlite': 1000,}
22
+ SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
22
23
 
23
24
 
24
25
  def read(
@@ -97,7 +98,7 @@ def read(
97
98
  Defaults to `SQLConnector.schema`.
98
99
 
99
100
  as_chunks: bool, default False
100
- If `True`, return a list of DataFrames.
101
+ If `True`, return a list of DataFrames.
101
102
  Otherwise return a single DataFrame.
102
103
 
103
104
  as_iterator: bool, default False
@@ -127,7 +128,6 @@ def read(
127
128
  from meerschaum.utils.pool import get_pool
128
129
  from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
129
130
  import warnings
130
- import inspect
131
131
  import traceback
132
132
  from decimal import Decimal
133
133
  pd = import_pandas()
@@ -140,6 +140,7 @@ def read(
140
140
  chunksize = None
141
141
  schema = schema or self.schema
142
142
 
143
+ pool = get_pool(workers=workers)
143
144
  sqlalchemy = attempt_import("sqlalchemy")
144
145
  default_chunksize = self._sys_config.get('chunksize', None)
145
146
  chunksize = chunksize if chunksize != -1 else default_chunksize
@@ -157,7 +158,7 @@ def read(
157
158
  f"The specified chunksize of {chunksize} exceeds the maximum of "
158
159
  + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
159
160
  + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
160
- stacklevel = 3,
161
+ stacklevel=3,
161
162
  )
162
163
  chunksize = _max_chunks_flavors[self.flavor]
163
164
 
@@ -184,8 +185,8 @@ def read(
184
185
  truncated_table_name = truncate_item_name(str(query_or_table), self.flavor)
185
186
  if truncated_table_name != str(query_or_table) and not silent:
186
187
  warn(
187
- f"Table '{name}' is too long for '{self.flavor}',"
188
- + f" will instead create the table '{truncated_name}'."
188
+ f"Table '{query_or_table}' is too long for '{self.flavor}',"
189
+ + f" will instead read the table '{truncated_table_name}'."
189
190
  )
190
191
 
191
192
  query_or_table = sql_item_name(str(query_or_table), self.flavor, schema)
@@ -204,6 +205,34 @@ def read(
204
205
 
205
206
  chunk_list = []
206
207
  chunk_hook_results = []
208
+ def _process_chunk(_chunk, _retry_on_failure: bool = True):
209
+ if not as_hook_results:
210
+ chunk_list.append(_chunk)
211
+ if chunk_hook is None:
212
+ return None
213
+
214
+ result = None
215
+ try:
216
+ result = chunk_hook(
217
+ _chunk,
218
+ workers=workers,
219
+ chunksize=chunksize,
220
+ debug=debug,
221
+ **kw
222
+ )
223
+ except Exception:
224
+ result = False, traceback.format_exc()
225
+ from meerschaum.utils.formatting import get_console
226
+ if not silent:
227
+ get_console().print_exception()
228
+
229
+ ### If the chunk fails to process, try it again one more time.
230
+ if isinstance(result, tuple) and result[0] is False:
231
+ if _retry_on_failure:
232
+ return _process_chunk(_chunk, _retry_on_failure=False)
233
+
234
+ return result
235
+
207
236
  try:
208
237
  stream_results = not as_iterator and chunk_hook is not None and chunksize is not None
209
238
  with warnings.catch_warnings():
@@ -235,52 +264,32 @@ def read(
235
264
  )
236
265
  else:
237
266
 
238
- with self.engine.begin() as transaction:
239
- with transaction.execution_options(stream_results=stream_results) as connection:
240
- chunk_generator = pd.read_sql_query(
241
- formatted_query,
242
- connection,
243
- **read_sql_query_kwargs
267
+ def get_chunk_generator(connectable):
268
+ chunk_generator = pd.read_sql_query(
269
+ formatted_query,
270
+ self.engine,
271
+ **read_sql_query_kwargs
272
+ )
273
+ to_return = (
274
+ chunk_generator
275
+ if as_iterator or chunksize is None
276
+ else (
277
+ list(pool.imap(_process_chunk, chunk_generator))
278
+ if as_hook_results
279
+ else None
244
280
  )
281
+ )
282
+ return chunk_generator, to_return
283
+
284
+ if self.flavor in SKIP_READ_TRANSACTION_FLAVORS:
285
+ chunk_generator, to_return = get_chunk_generator(self.engine)
286
+ else:
287
+ with self.engine.begin() as transaction:
288
+ with transaction.execution_options(stream_results=stream_results) as connection:
289
+ chunk_generator, to_return = get_chunk_generator(connection)
245
290
 
246
- ### `stream_results` must be False (will load everything into memory).
247
- if as_iterator or chunksize is None:
248
- return chunk_generator
249
-
250
- ### We must consume the generator in this context if using server-side cursors.
251
- if stream_results:
252
-
253
- pool = get_pool(workers=workers)
254
-
255
- def _process_chunk(_chunk, _retry_on_failure: bool = True):
256
- if not as_hook_results:
257
- chunk_list.append(_chunk)
258
- result = None
259
- if chunk_hook is not None:
260
- try:
261
- result = chunk_hook(
262
- _chunk,
263
- workers = workers,
264
- chunksize = chunksize,
265
- debug = debug,
266
- **kw
267
- )
268
- except Exception as e:
269
- result = False, traceback.format_exc()
270
- from meerschaum.utils.formatting import get_console
271
- if not silent:
272
- get_console().print_exception()
273
-
274
- ### If the chunk fails to process, try it again one more time.
275
- if isinstance(result, tuple) and result[0] is False:
276
- if _retry_on_failure:
277
- return _process_chunk(_chunk, _retry_on_failure=False)
278
-
279
- return result
280
-
281
- chunk_hook_results = list(pool.imap(_process_chunk, chunk_generator))
282
- if as_hook_results:
283
- return chunk_hook_results
291
+ if to_return is not None:
292
+ return to_return
284
293
 
285
294
  except Exception as e:
286
295
  if debug:
@@ -98,6 +98,8 @@ class Pipe:
98
98
  attributes,
99
99
  parameters,
100
100
  columns,
101
+ indices,
102
+ indexes,
101
103
  dtypes,
102
104
  get_columns,
103
105
  get_columns_types,
@@ -145,6 +147,7 @@ class Pipe:
145
147
  location: Optional[str] = None,
146
148
  parameters: Optional[Dict[str, Any]] = None,
147
149
  columns: Union[Dict[str, str], List[str], None] = None,
150
+ indices: Optional[Dict[str, Union[str, List[str]]]] = None,
148
151
  tags: Optional[List[str]] = None,
149
152
  target: Optional[str] = None,
150
153
  dtypes: Optional[Dict[str, str]] = None,
@@ -156,6 +159,7 @@ class Pipe:
156
159
  connector_keys: Optional[str] = None,
157
160
  metric_key: Optional[str] = None,
158
161
  location_key: Optional[str] = None,
162
+ indexes: Union[Dict[str, str], List[str], None] = None,
159
163
  ):
160
164
  """
161
165
  Parameters
@@ -174,10 +178,14 @@ class Pipe:
174
178
  e.g. columns and other attributes.
175
179
  You can edit these parameters with `edit pipes`.
176
180
 
177
- columns: Optional[Dict[str, str]], default None
181
+ columns: Union[Dict[str, str], List[str], None], default None
178
182
  Set the `columns` dictionary of `parameters`.
179
183
  If `parameters` is also provided, this dictionary is added under the `'columns'` key.
180
184
 
185
+ indices: Optional[Dict[str, Union[str, List[str]]]], default None
186
+ Set the `indices` dictionary of `parameters`.
187
+ If `parameters` is also provided, this dictionary is added under the `'indices'` key.
188
+
181
189
  tags: Optional[List[str]], default None
182
190
  A list of strings to be added under the `'tags'` key of `parameters`.
183
191
  You can select pipes with certain tags using `--tags`.
@@ -255,6 +263,20 @@ class Pipe:
255
263
  elif columns is not None:
256
264
  warn(f"The provided columns are of invalid type '{type(columns)}'.")
257
265
 
266
+ indices = (
267
+ indices
268
+ or indexes
269
+ or self._attributes.get('parameters', {}).get('indices', None)
270
+ or self._attributes.get('parameters', {}).get('indexes', None)
271
+ ) or columns
272
+ if isinstance(indices, dict):
273
+ indices_key = (
274
+ 'indexes'
275
+ if 'indexes' in self._attributes['parameters']
276
+ else 'indices'
277
+ )
278
+ self._attributes['parameters'][indices_key] = indices
279
+
258
280
  if isinstance(tags, (list, tuple)):
259
281
  self._attributes['parameters']['tags'] = tags
260
282
  elif tags is not None:
@@ -77,15 +77,69 @@ def columns(self) -> Union[Dict[str, str], None]:
77
77
 
78
78
 
79
79
  @columns.setter
80
- def columns(self, columns: Dict[str, str]) -> None:
80
+ def columns(self, _columns: Union[Dict[str, str], List[str]]) -> None:
81
81
  """
82
82
  Override the columns dictionary of the in-memory pipe.
83
83
  Call `meerschaum.Pipe.edit()` to persist changes.
84
84
  """
85
+ if isinstance(_columns, (list, tuple)):
86
+ _columns = {col: col for col in _columns}
85
87
  if not isinstance(columns, dict):
86
- warn(f"{self}.columns must be a dictionary, received {type(columns)}")
88
+ warn(f"{self}.columns must be a dictionary, received {type(_columns)}.")
87
89
  return
88
- self.parameters['columns'] = columns
90
+ self.parameters['columns'] = _columns
91
+
92
+
93
+ @property
94
+ def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]:
95
+ """
96
+ Return the `indices` dictionary defined in `meerschaum.Pipe.parameters`.
97
+ """
98
+ indices_key = (
99
+ 'indexes'
100
+ if 'indexes' in self.parameters
101
+ else 'indices'
102
+ )
103
+ if indices_key not in self.parameters:
104
+ self.parameters[indices_key] = {}
105
+ _indices = self.parameters[indices_key]
106
+ if not isinstance(_indices, dict):
107
+ _indices = {}
108
+ self.parameters[indices_key] = _indices
109
+ return {**self.columns, **_indices}
110
+
111
+
112
+ @property
113
+ def indexes(self) -> Union[Dict[str, Union[str, List[str]]], None]:
114
+ """
115
+ Alias for `meerschaum.Pipe.indices`.
116
+ """
117
+ return self.indices
118
+
119
+
120
+ @indices.setter
121
+ def indices(self, _indices: Union[Dict[str, Union[str, List[str]]], List[str]]) -> None:
122
+ """
123
+ Override the indices dictionary of the in-memory pipe.
124
+ Call `meerschaum.Pipe.edit()` to persist changes.
125
+ """
126
+ if not isinstance(_indices, dict):
127
+ warn(f"{self}.indices must be a dictionary, received {type(_indices)}.")
128
+ return
129
+ indices_key = (
130
+ 'indexes'
131
+ if 'indexes' in self.parameters
132
+ else 'indices'
133
+ )
134
+ self.parameters[indices_key] = _indices
135
+
136
+
137
+ @indexes.setter
138
+ def indexes(self, _indexes: Union[Dict[str, Union[str, List[str]]], List[str]]) -> None:
139
+ """
140
+ Alias for `meerschaum.Pipe.indices`.
141
+ """
142
+ self.indices = _indexes
89
143
 
90
144
 
91
145
  @property
@@ -415,27 +469,55 @@ def guess_datetime(self) -> Union[str, None]:
415
469
  """
416
470
  Try to determine a pipe's datetime column.
417
471
  """
418
- dtypes = self.dtypes
472
+ _dtypes = self.dtypes
419
473
 
420
474
  ### Abort if the user explictly disallows a datetime index.
421
- if 'datetime' in dtypes:
422
- if dtypes['datetime'] is None:
475
+ if 'datetime' in _dtypes:
476
+ if _dtypes['datetime'] is None:
423
477
  return None
424
478
 
479
+ from meerschaum.utils.dtypes import are_dtypes_equal
425
480
  dt_cols = [
426
- col for col, typ in self.dtypes.items()
427
- if str(typ).startswith('datetime')
481
+ col
482
+ for col, typ in _dtypes.items()
483
+ if are_dtypes_equal(typ, 'datetime')
428
484
  ]
429
485
  if not dt_cols:
430
486
  return None
431
- return dt_cols[0]
487
+ return dt_cols[0]
432
488
 
433
489
 
434
490
  def get_indices(self) -> Dict[str, str]:
435
491
  """
436
- Return a dictionary in the form of `pipe.columns` but map to index names.
437
- """
438
- return {
439
- ix: (self.target + '_' + col + '_index')
440
- for ix, col in self.columns.items() if col
492
+ Return a dictionary mapping index keys to their names on the database.
493
+
494
+ Returns
495
+ -------
496
+ A dictionary of index keys to column names.
497
+ """
498
+ _parameters = self.parameters
499
+ _index_template = _parameters.get('index_template', "IX_{target}_{column_names}")
500
+ _indices = self.indices
501
+ _target = self.target
502
+ _column_names = {
503
+ ix: (
504
+ '_'.join(cols)
505
+ if isinstance(cols, (list, tuple))
506
+ else str(cols)
507
+ )
508
+ for ix, cols in _indices.items()
509
+ if cols
510
+ }
511
+ _index_names = {
512
+ ix: (
513
+ _index_template.format(
514
+ target=_target,
515
+ column_names=column_names,
516
+ connector_keys=self.connector_keys,
517
+ metric_key=self.connector_key,
518
+ location_key=self.location_key,
519
+ )
520
+ )
521
+ for ix, column_names in _column_names.items()
441
522
  }
523
+ return _index_names
@@ -316,7 +316,11 @@ def sync_plugins_symlinks(debug: bool = False, warn: bool = True) -> None:
316
316
 
317
317
  ### NOTE: Allow plugins to be installed via `pip`.
318
318
  packaged_plugin_paths = []
319
- discovered_packaged_plugins_eps = entry_points(group='meerschaum.plugins')
319
+ try:
320
+ discovered_packaged_plugins_eps = entry_points(group='meerschaum.plugins')
321
+ except TypeError:
322
+ discovered_packaged_plugins_eps = []
323
+
320
324
  for ep in discovered_packaged_plugins_eps:
321
325
  module_name = ep.name
322
326
  for package_file_path in ep.dist.files:
@@ -330,7 +334,7 @@ def sync_plugins_symlinks(debug: bool = False, warn: bool = True) -> None:
330
334
  if is_symlink(PLUGINS_RESOURCES_PATH) or not PLUGINS_RESOURCES_PATH.exists():
331
335
  try:
332
336
  PLUGINS_RESOURCES_PATH.unlink()
333
- except Exception as e:
337
+ except Exception:
334
338
  pass
335
339
 
336
340
  PLUGINS_RESOURCES_PATH.mkdir(exist_ok=True)
@@ -47,7 +47,6 @@ IMPORTS_LINES: Dict[str, str] = {
47
47
  ),
48
48
  }
49
49
 
50
- ### TODO: Add feature for custom connectors.
51
50
  FEATURE_LINES: Dict[str, str] = {
52
51
  'header': (
53
52
  "#! /usr/bin/env python3\n"
@@ -94,7 +93,7 @@ FEATURE_LINES: Dict[str, str] = {
94
93
  "class {plugin_name_capitalized}Connector(Connector):\n"
95
94
  " \"\"\"Implement '{plugin_name_lower}' connectors.\"\"\"\n\n"
96
95
  " REQUIRED_ATTRIBUTES: list[str] = []\n"
97
- " \n"
96
+ "\n"
98
97
  " def fetch(\n"
99
98
  " self,\n"
100
99
  " pipe: mrsm.Pipe,\n"
@@ -149,11 +148,12 @@ FEATURE_LINES: Dict[str, str] = {
149
148
  ),
150
149
  }
151
150
 
151
+
152
152
  def bootstrap_plugin(
153
- plugin_name: str,
154
- debug: bool = False,
155
- **kwargs: Any
156
- ) -> SuccessTuple:
153
+ plugin_name: str,
154
+ debug: bool = False,
155
+ **kwargs: Any
156
+ ) -> SuccessTuple:
157
157
  """
158
158
  Prompt the user for features and create a plugin file.
159
159
  """
@@ -177,9 +177,9 @@ def bootstrap_plugin(
177
177
  features: List[str] = choose(
178
178
  "Which of the following features would you like to add to your plugin?",
179
179
  list(FEATURE_CHOICES.items()),
180
- default = 'fetch',
181
- multiple = True,
182
- as_indices = True,
180
+ default='fetch',
181
+ multiple=True,
182
+ as_indices=True,
183
183
  **kwargs
184
184
  )
185
185
 
@@ -256,7 +256,7 @@ def bootstrap_plugin(
256
256
  _ = prompt(
257
257
  f"Press [Enter] to edit plugin '{plugin_name}',"
258
258
  + " [CTRL+C] to skip.",
259
- icon = False,
259
+ icon=False,
260
260
  )
261
261
  except (KeyboardInterrupt, Exception):
262
262
  return True, "Success"
@@ -267,7 +267,7 @@ def bootstrap_plugin(
267
267
 
268
268
  def _get_plugins_dir_path() -> pathlib.Path:
269
269
  from meerschaum.config.paths import PLUGINS_DIR_PATHS
270
-
270
+
271
271
  if not PLUGINS_DIR_PATHS:
272
272
  raise EnvironmentError("No plugin dir path could be found.")
273
273
 
@@ -278,9 +278,9 @@ def _get_plugins_dir_path() -> pathlib.Path:
278
278
  choose(
279
279
  "In which directory do you want to write your plugin?",
280
280
  [path.as_posix() for path in PLUGINS_DIR_PATHS],
281
- numeric = True,
282
- multiple = False,
283
- default = PLUGINS_DIR_PATHS[0].as_posix(),
281
+ numeric=True,
282
+ multiple=False,
283
+ default=PLUGINS_DIR_PATHS[0].as_posix(),
284
284
  )
285
285
  )
286
286
 
@@ -290,7 +290,7 @@ def _ask_to_uninstall(plugin: mrsm.Plugin, **kwargs: Any) -> SuccessTuple:
290
290
  warn(f"Plugin '{plugin}' is already installed!", stack=False)
291
291
  uninstall_plugin = yes_no(
292
292
  f"Do you want to first uninstall '{plugin}'?",
293
- default = 'n',
293
+ default='n',
294
294
  **kwargs
295
295
  )
296
296
  if not uninstall_plugin:
@@ -413,6 +413,7 @@ def parse_df_datetimes(
413
413
  from meerschaum.utils.packages import import_pandas, attempt_import
414
414
  from meerschaum.utils.debug import dprint
415
415
  from meerschaum.utils.warnings import warn
416
+ from meerschaum.utils.misc import items_str
416
417
  import traceback
417
418
  pd = import_pandas()
418
419
  pandas = attempt_import('pandas')
@@ -503,14 +504,14 @@ def parse_df_datetimes(
503
504
  else:
504
505
  df[datetime_cols] = df[datetime_cols].apply(
505
506
  pd.to_datetime,
506
- utc = True,
507
- axis = 1,
508
- meta = {
507
+ utc=True,
508
+ axis=1,
509
+ meta={
509
510
  col: 'datetime64[ns]'
510
511
  for col in datetime_cols
511
512
  }
512
513
  )
513
- except Exception as e:
514
+ except Exception:
514
515
  warn(
515
516
  f"Unable to apply `pd.to_datetime` to {items_str(datetime_cols)}:\n"
516
517
  + f"{traceback.format_exc()}"
@@ -519,7 +520,7 @@ def parse_df_datetimes(
519
520
  for dt in datetime_cols:
520
521
  try:
521
522
  df[dt] = df[dt].dt.tz_localize(None)
522
- except Exception as e:
523
+ except Exception:
523
524
  warn(f"Unable to convert column '{dt}' to naive datetime:\n{traceback.format_exc()}")
524
525
 
525
526
  return df
@@ -567,6 +568,9 @@ def get_json_cols(df: 'pd.DataFrame') -> List[str]:
567
568
  -------
568
569
  A list of columns to be encoded as JSON.
569
570
  """
571
+ if df is None:
572
+ return []
573
+
570
574
  is_dask = 'dask' in df.__module__ if hasattr(df, '__module__') else False
571
575
  if is_dask:
572
576
  df = get_first_valid_dask_partition(df)
@@ -602,6 +606,8 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
602
606
  -------
603
607
  A list of columns to treat as numerics.
604
608
  """
609
+ if df is None:
610
+ return []
605
611
  from decimal import Decimal
606
612
  is_dask = 'dask' in df.__module__
607
613
  if is_dask:
@@ -638,6 +644,8 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
638
644
  -------
639
645
  A list of columns to treat as numerics.
640
646
  """
647
+ if df is None:
648
+ return []
641
649
  from uuid import UUID
642
650
  is_dask = 'dask' in df.__module__
643
651
  if is_dask:
@@ -883,6 +891,8 @@ def get_datetime_bound_from_df(
883
891
  -------
884
892
  The minimum or maximum datetime value in the dataframe, or `None`.
885
893
  """
894
+ if df is None:
895
+ return None
886
896
  if not datetime_column:
887
897
  return None
888
898
 
@@ -955,6 +965,8 @@ def get_unique_index_values(
955
965
  -------
956
966
  A dictionary mapping indices to unique values.
957
967
  """
968
+ if df is None:
969
+ return {}
958
970
  if 'dataframe' in str(type(df)).lower():
959
971
  pandas = mrsm.attempt_import('pandas')
960
972
  return {
@@ -11,7 +11,12 @@ from __future__ import annotations
11
11
  import importlib.util, os, pathlib, re
12
12
  from meerschaum.utils.typing import Any, List, SuccessTuple, Optional, Union, Tuple, Dict, Iterable
13
13
  from meerschaum.utils.threading import Lock, RLock
14
- from meerschaum.utils.packages._packages import packages, all_packages, get_install_names
14
+ from meerschaum.utils.packages._packages import (
15
+ packages,
16
+ all_packages,
17
+ get_install_names,
18
+ _MRSM_PACKAGE_ARCHIVES_PREFIX,
19
+ )
15
20
  from meerschaum.utils.venv import (
16
21
  activate_venv,
17
22
  deactivate_venv,
@@ -35,14 +40,14 @@ _locks = {
35
40
  }
36
41
  _checked_for_updates = set()
37
42
  _is_installed_first_check: Dict[str, bool] = {}
38
- _MRSM_PACKAGE_ARCHIVES_PREFIX: str = "https://meerschaum.io/files/archives/"
43
+
39
44
 
40
45
  def get_module_path(
41
- import_name: str,
42
- venv: Optional[str] = 'mrsm',
43
- debug: bool = False,
44
- _try_install_name_on_fail: bool = True,
45
- ) -> Union[pathlib.Path, None]:
46
+ import_name: str,
47
+ venv: Optional[str] = 'mrsm',
48
+ debug: bool = False,
49
+ _try_install_name_on_fail: bool = True,
50
+ ) -> Union[pathlib.Path, None]:
46
51
  """
47
52
  Get a module's path without importing.
48
53
  """
@@ -232,10 +237,10 @@ def manually_import_module(
232
237
  if check_update:
233
238
  if need_update(
234
239
  None,
235
- import_name = root_name,
236
- version = _version,
237
- check_pypi = check_pypi,
238
- debug = debug,
240
+ import_name=root_name,
241
+ version=_version,
242
+ check_pypi=check_pypi,
243
+ debug=debug,
239
244
  ):
240
245
  if install:
241
246
  if not pip_install(
@@ -491,6 +496,8 @@ def _get_package_metadata(import_name: str, venv: Optional[str]) -> Dict[str, st
491
496
  import re
492
497
  from meerschaum.config._paths import VIRTENV_RESOURCES_PATH
493
498
  install_name = _import_to_install_name(import_name)
499
+ if install_name.startswith(_MRSM_PACKAGE_ARCHIVES_PREFIX):
500
+ return {}
494
501
  _args = ['pip', 'show', install_name]
495
502
  if venv is not None:
496
503
  cache_dir_path = VIRTENV_RESOURCES_PATH / venv / 'cache'
@@ -586,7 +593,10 @@ def need_update(
586
593
  _checked_for_updates.add(install_name)
587
594
 
588
595
  _install_no_version = get_install_no_version(install_name)
589
- required_version = install_name.replace(_install_no_version, '')
596
+ required_version = (
597
+ install_name
598
+ .replace(_install_no_version, '')
599
+ )
590
600
  if ']' in required_version:
591
601
  required_version = required_version.split(']')[1]
592
602
 
@@ -681,8 +691,8 @@ def need_update(
681
691
  )
682
692
 
683
693
  if 'a' in required_version:
684
- required_version = required_version.replace('a', '-dev').replace('+mrsm', '')
685
- version = version.replace('a', '-dev').replace('+mrsm', '')
694
+ required_version = required_version.replace('a', '-pre.').replace('+mrsm', '')
695
+ version = version.replace('a', '-pre.').replace('+mrsm', '')
686
696
  try:
687
697
  return (
688
698
  (not semver.Version.parse(version).match(required_version))
@@ -780,7 +790,7 @@ def pip_install(
780
790
  This includes version restrictions.
781
791
  Use `_import_to_install_name()` to get the predefined `install_name` for a package
782
792
  from its import name.
783
-
793
+
784
794
  args: Optional[List[str]], default None
785
795
  A list of command line arguments to pass to `pip`.
786
796
  If not provided, default to `['--upgrade']` if `_uninstall` is `False`, else `[]`.
@@ -975,7 +985,11 @@ def pip_install(
975
985
  pass
976
986
 
977
987
  _packages = [
978
- (install_name if not _uninstall else get_install_no_version(install_name))
988
+ (
989
+ get_install_no_version(install_name)
990
+ if _uninstall or install_name.startswith(_MRSM_PACKAGE_ARCHIVES_PREFIX)
991
+ else install_name
992
+ )
979
993
  for install_name in install_names
980
994
  ]
981
995
  msg = "Installing packages:" if not _uninstall else "Uninstalling packages:"
@@ -1774,13 +1788,17 @@ def is_installed(
1774
1788
 
1775
1789
  found = (
1776
1790
  not need_update(
1777
- None, import_name = root_name,
1778
- _run_determine_version = False,
1779
- check_pypi = False,
1780
- version = determine_version(
1781
- spec_path, venv=venv, debug=debug, import_name=root_name
1791
+ None,
1792
+ import_name=root_name,
1793
+ _run_determine_version=False,
1794
+ check_pypi=False,
1795
+ version=determine_version(
1796
+ spec_path,
1797
+ venv=venv,
1798
+ debug=debug,
1799
+ import_name=root_name,
1782
1800
  ),
1783
- debug = debug,
1801
+ debug=debug,
1784
1802
  )
1785
1803
  ) if spec_path is not None else False
1786
1804