meerschaum 2.0.0rc6__py3-none-any.whl → 2.0.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. meerschaum/_internal/arguments/_parse_arguments.py +12 -1
  2. meerschaum/_internal/arguments/_parser.py +23 -1
  3. meerschaum/actions/__init__.py +97 -48
  4. meerschaum/actions/bootstrap.py +1 -1
  5. meerschaum/actions/clear.py +1 -1
  6. meerschaum/actions/deduplicate.py +1 -1
  7. meerschaum/actions/delete.py +8 -7
  8. meerschaum/actions/drop.py +1 -10
  9. meerschaum/actions/edit.py +1 -1
  10. meerschaum/actions/install.py +1 -1
  11. meerschaum/actions/pause.py +1 -1
  12. meerschaum/actions/register.py +1 -1
  13. meerschaum/actions/setup.py +1 -1
  14. meerschaum/actions/show.py +1 -1
  15. meerschaum/actions/start.py +18 -7
  16. meerschaum/actions/stop.py +5 -4
  17. meerschaum/actions/sync.py +17 -2
  18. meerschaum/actions/uninstall.py +1 -1
  19. meerschaum/actions/upgrade.py +1 -1
  20. meerschaum/actions/verify.py +54 -3
  21. meerschaum/config/_default.py +71 -65
  22. meerschaum/config/_formatting.py +26 -0
  23. meerschaum/config/_jobs.py +28 -5
  24. meerschaum/config/_paths.py +21 -5
  25. meerschaum/config/_version.py +1 -1
  26. meerschaum/connectors/api/_fetch.py +1 -1
  27. meerschaum/connectors/api/_pipes.py +6 -11
  28. meerschaum/connectors/sql/_fetch.py +29 -11
  29. meerschaum/connectors/sql/_pipes.py +11 -4
  30. meerschaum/connectors/sql/_sql.py +1 -6
  31. meerschaum/core/Pipe/__init__.py +5 -1
  32. meerschaum/core/Pipe/_data.py +58 -9
  33. meerschaum/core/Pipe/_deduplicate.py +61 -11
  34. meerschaum/core/Pipe/_dtypes.py +2 -1
  35. meerschaum/core/Pipe/_verify.py +174 -34
  36. meerschaum/plugins/__init__.py +3 -0
  37. meerschaum/utils/daemon/Daemon.py +108 -27
  38. meerschaum/utils/daemon/__init__.py +35 -1
  39. meerschaum/utils/dataframe.py +10 -5
  40. meerschaum/utils/formatting/__init__.py +144 -1
  41. meerschaum/utils/formatting/_pipes.py +28 -5
  42. meerschaum/utils/misc.py +183 -187
  43. meerschaum/utils/packages/__init__.py +1 -1
  44. meerschaum/utils/packages/_packages.py +1 -0
  45. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/METADATA +4 -1
  46. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/RECORD +52 -52
  47. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/LICENSE +0 -0
  48. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/NOTICE +0 -0
  49. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/WHEEL +0 -0
  50. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/entry_points.txt +0 -0
  51. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/top_level.txt +0 -0
  52. {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/zip-safe +0 -0
@@ -12,61 +12,61 @@ from meerschaum.connectors import attributes as connector_attributes
12
12
  from meerschaum.config._paths import SQLITE_DB_PATH
13
13
 
14
14
  default_meerschaum_config = {
15
- 'instance' : 'sql:main',
16
- 'api_instance' : 'MRSM{meerschaum:instance}',
17
- 'web_instance' : 'MRSM{meerschaum:instance}',
18
- 'default_repository' : 'api:mrsm',
19
- 'connectors' : {
20
- 'sql' : {
21
- 'default' : {},
22
- 'main' : {
23
- 'username' : 'mrsm',
24
- 'password' : 'mrsm',
25
- 'flavor' : 'timescaledb',
26
- 'host' : 'localhost',
27
- 'database' : 'meerschaum',
28
- 'port' : 5432,
15
+ 'instance': 'sql:main',
16
+ 'api_instance': 'MRSM{meerschaum:instance}',
17
+ 'web_instance': 'MRSM{meerschaum:instance}',
18
+ 'default_repository': 'api:mrsm',
19
+ 'connectors': {
20
+ 'sql': {
21
+ 'default': {},
22
+ 'main': {
23
+ 'username': 'mrsm',
24
+ 'password': 'mrsm',
25
+ 'flavor': 'timescaledb',
26
+ 'host': 'localhost',
27
+ 'database': 'meerschaum',
28
+ 'port': 5432,
29
29
  },
30
- 'local' : {
31
- 'flavor' : 'sqlite',
32
- 'database' : str(SQLITE_DB_PATH),
30
+ 'local': {
31
+ 'flavor': 'sqlite',
32
+ 'database': str(SQLITE_DB_PATH),
33
33
  },
34
34
  'memory': {
35
- 'flavor' : 'sqlite',
36
- 'database' : ':memory:',
35
+ 'flavor': 'sqlite',
36
+ 'database': ':memory:',
37
37
  },
38
38
  },
39
- 'api' : {
40
- 'default' : connector_attributes['api']['default'],
41
- 'main' : {
42
- 'host' : 'localhost',
43
- 'port' : 8000,
39
+ 'api': {
40
+ 'default': connector_attributes['api']['default'],
41
+ 'main': {
42
+ 'host': 'localhost',
43
+ 'port': 8000,
44
44
  },
45
- 'local' : {
46
- 'host' : 'localhost',
45
+ 'local': {
46
+ 'host': 'localhost',
47
47
  },
48
- 'mrsm' : {
49
- 'host' : 'api.mrsm.io',
50
- 'port' : 443,
51
- 'protocol' : 'https',
48
+ 'mrsm': {
49
+ 'host': 'api.mrsm.io',
50
+ 'port': 443,
51
+ 'protocol': 'https',
52
52
  },
53
53
  },
54
54
  },
55
55
  }
56
56
  default_system_config = {
57
- 'connectors' : {
58
- 'all' : {
59
- 'pandas' : 'pandas',
57
+ 'connectors': {
58
+ 'all': {
59
+ 'pandas': 'pandas',
60
60
  },
61
- 'sql' : {
62
- 'chunksize' : 100000,
63
- 'poolclass' : 'sqlalchemy.pool.QueuePool',
64
- 'create_engine' : {
65
- 'method' : 'multi',
66
- 'pool_size' : 5,
67
- 'max_overflow' : 10,
68
- 'pool_recycle' : 3600,
69
- 'connect_args' : {},
61
+ 'sql': {
62
+ 'chunksize': 100_000,
63
+ 'poolclass': 'sqlalchemy.pool.QueuePool',
64
+ 'create_engine': {
65
+ 'method': 'multi',
66
+ 'pool_size': 5,
67
+ 'max_overflow': 10,
68
+ 'pool_recycle': 3600,
69
+ 'connect_args': {},
70
70
  },
71
71
  },
72
72
 
@@ -75,28 +75,28 @@ default_system_config = {
75
75
  },
76
76
  ### not to be confused with system_config['connectors']['api'], this is the configuration
77
77
  ### for the API server itself.
78
- 'api' : {
79
- 'uvicorn' : {
80
- 'app' : 'meerschaum.api:app',
81
- 'port' : default_meerschaum_config['connectors']['api']['default']['port'],
82
- 'host' : '0.0.0.0',
83
- 'workers' : max(int(multiprocessing.cpu_count() / 2), 1),
78
+ 'api': {
79
+ 'uvicorn': {
80
+ 'app': 'meerschaum.api:app',
81
+ 'port': default_meerschaum_config['connectors']['api']['default']['port'],
82
+ 'host': '0.0.0.0',
83
+ 'workers': max(int(multiprocessing.cpu_count() / 2), 1),
84
84
  },
85
85
  'permissions': {
86
- 'registration' : {
87
- 'users' : True,
88
- 'pipes' : True,
89
- 'plugins' : True,
86
+ 'registration': {
87
+ 'users': True,
88
+ 'pipes': True,
89
+ 'plugins': True,
90
90
  },
91
- 'actions' : {
91
+ 'actions': {
92
92
  'non_admin': True,
93
93
  },
94
94
  'chaining' : {
95
- 'insecure_parent_instance' : False,
96
- 'child_apis' : False,
95
+ 'insecure_parent_instance': False,
96
+ 'child_apis': False,
97
97
  },
98
98
  },
99
- 'protocol' : default_meerschaum_config['connectors']['api']['default']['protocol'],
99
+ 'protocol': default_meerschaum_config['connectors']['api']['default']['protocol'],
100
100
  },
101
101
  'experimental': {
102
102
  'fetch': False,
@@ -106,21 +106,27 @@ default_system_config = {
106
106
  'inplace_sync': True,
107
107
  },
108
108
  }
109
- default_pipes_config = {
110
- 'parameters' : {
111
- 'columns' : {
112
- 'datetime' : None,
113
- 'id' : None,
109
+ default_pipes_config = {
110
+ 'parameters': {
111
+ 'columns': {
112
+ 'datetime': None,
113
+ 'id': None,
114
+ },
115
+ 'chunk_minutes': 1440,
116
+ 'fetch': {
117
+ 'backtrack_minutes': 1440,
118
+ },
119
+ 'verify': {
120
+ 'bound_days': 366,
114
121
  },
115
- 'chunk_minutes' : 1440,
116
122
  },
117
- 'attributes' : {
123
+ 'attributes': {
118
124
  'local_cache_timeout_seconds': 60,
119
125
  },
120
126
  }
121
- default_plugins_config = {}
127
+ default_plugins_config = {}
122
128
  default_experimental_config = {
123
- 'venv' : True,
129
+ 'venv': True,
124
130
  }
125
131
 
126
132
 
@@ -115,6 +115,32 @@ default_formatting_config = {
115
115
  },
116
116
  },
117
117
  },
118
+ 'success_calm' : {
119
+ 'unicode' : {
120
+ 'icon' : 'MRSM{formatting:emoji:success_calm}',
121
+ },
122
+ 'ascii' : {
123
+ 'icon' : '+',
124
+ },
125
+ 'ansi' : {
126
+ 'rich' : {
127
+ 'style' : 'pale_green3',
128
+ },
129
+ },
130
+ },
131
+ 'failure_calm' : {
132
+ 'unicode' : {
133
+ 'icon' : 'MRSM{formatting:emoji:failure_calm}',
134
+ },
135
+ 'ascii' : {
136
+ 'icon' : 'x',
137
+ },
138
+ 'ansi' : {
139
+ 'rich' : {
140
+ 'style' : 'indian red',
141
+ },
142
+ },
143
+ },
118
144
  'errors' : {
119
145
  'unicode' : {
120
146
  'icon' : 'MRSM{formatting:emoji:error}',
@@ -7,6 +7,8 @@ Default configuration for jobs.
7
7
  """
8
8
 
9
9
  default_jobs_config = {
10
+ 'timeout_seconds': 8,
11
+ 'check_timeout_interval_seconds': 0.1,
10
12
  'logs' : {
11
13
  'num_files_to_keep': 5,
12
14
  'max_file_size': 100_000,
@@ -14,11 +16,32 @@ default_jobs_config = {
14
16
  'refresh_files_seconds': 5.0,
15
17
  'min_buffer_len': 15,
16
18
  'colors' : [
17
- 'cyan', 'magenta', 'orange3', 'green', 'blue', 'red', 'spring_green3',
18
- 'medium_purple3', 'medium_violet_red', 'slate_blue1', 'bright_red', 'steel_blue3',
19
- 'aquamarine1', 'dark_khaki', 'pink3', 'gold3', 'pale_green1', 'light coral',
20
- 'light_goldenrod2', 'cornsilk1', 'orange_red1', 'deep_pink1', 'aquamarine3',
21
- 'sky_blue2', 'tan', 'honeydew2',
19
+ 'cyan',
20
+ 'magenta',
21
+ 'orange3',
22
+ 'green',
23
+ 'blue',
24
+ 'red',
25
+ 'spring_green3',
26
+ 'medium_purple3',
27
+ 'medium_violet_red',
28
+ 'slate_blue1',
29
+ 'bright_red',
30
+ 'steel_blue3',
31
+ 'aquamarine1',
32
+ 'dark_khaki',
33
+ 'pink3',
34
+ 'gold3',
35
+ 'pale_green1',
36
+ 'light coral',
37
+ 'light_goldenrod2',
38
+ 'cornsilk1',
39
+ 'orange_red1',
40
+ 'deep_pink1',
41
+ 'aquamarine3',
42
+ 'sky_blue2',
43
+ 'tan',
44
+ 'honeydew2',
22
45
  ],
23
46
  },
24
47
  }
@@ -48,22 +48,38 @@ if ENVIRONMENT_PLUGINS_DIR in os.environ:
48
48
  Path(path).resolve()
49
49
  for path in json.loads(os.environ[ENVIRONMENT_PLUGINS_DIR])
50
50
  ] if os.environ[ENVIRONMENT_PLUGINS_DIR].lstrip().startswith('[')
51
- else [Path(os.environ[ENVIRONMENT_PLUGINS_DIR]).resolve()]
51
+ else [
52
+ Path(path_str).resolve()
53
+ for path_str in os.environ[ENVIRONMENT_PLUGINS_DIR].split(':')
54
+ if path_str
55
+ ]
52
56
  )
53
57
  except Exception as e:
54
58
  PLUGINS_DIR_PATHS = []
55
59
 
56
60
  if not PLUGINS_DIR_PATHS:
57
61
  print(
58
- "Invalid plugins directories set for " +
59
- f"environment variable '{ENVIRONMENT_PLUGINS_DIR}'.\n" +
60
- f"Please enter a valid path or JSON-encoded paths for {ENVIRONMENT_PLUGINS_DIR}.",
61
- file = sys.stderr,
62
+ "Invalid plugins directories set for "
63
+ f"environment variable '{ENVIRONMENT_PLUGINS_DIR}'.\n\n"
64
+ f"Set this to a colon-separated path string:\n\n"
65
+ f"`export {ENVIRONMENT_PLUGINS_DIR}=./plugins:/another/path/to/plugins`\n\n"
66
+ "or a JSON-encoded path list:\n\n"
67
+ f"`export {ENVIRONMENT_PLUGINS_DIR}=" + "'[\"./plugins\", \"/another/path/to/plugins\"]'`"
68
+ f"",
62
69
  )
63
70
  sys.exit(1)
64
71
  else:
65
72
  PLUGINS_DIR_PATHS = [_ROOT_DIR_PATH / 'plugins']
66
73
 
74
+ ### Remove duplicate plugins paths.
75
+ _seen_plugins_paths, _plugins_paths_to_remove = set(), set()
76
+ for _plugin_path in PLUGINS_DIR_PATHS:
77
+ if _plugin_path in _seen_plugins_paths:
78
+ _plugins_paths_to_remove.add(_plugin_path)
79
+ _seen_plugins_paths.add(_plugin_path)
80
+ for _plugin_path in _plugins_paths_to_remove:
81
+ PLUGINS_DIR_PATHS.remove(_plugin_path)
82
+
67
83
 
68
84
  paths = {
69
85
  'PACKAGE_ROOT_PATH' : str(Path(__file__).parent.parent.resolve()),
@@ -2,4 +2,4 @@
2
2
  Specify the Meerschaum release version.
3
3
  """
4
4
 
5
- __version__ = "2.0.0rc6"
5
+ __version__ = "2.0.0rc8"
@@ -42,7 +42,7 @@ def fetch(
42
42
  remote_metric_key = instructions.get('metric_key', None)
43
43
  remote_location_key = instructions.get('location_key', None)
44
44
  if begin is None:
45
- begin = pipe.sync_time
45
+ begin = pipe.get_sync_time(debug=debug)
46
46
 
47
47
  _params = copy.deepcopy(params) if params is not None else {}
48
48
  _params = apply_patch_to_config(_params, instructions.get('params', {}))
@@ -7,6 +7,9 @@ Register or fetch Pipes from the API
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+ import time
11
+ import json
12
+ from io import StringIO
10
13
  from datetime import datetime
11
14
  from meerschaum.utils.debug import dprint
12
15
  from meerschaum.utils.warnings import warn, error
@@ -123,7 +126,6 @@ def fetch_pipes_keys(
123
126
  A list of tuples containing pipes' keys.
124
127
  """
125
128
  from meerschaum.config.static import STATIC_CONFIG
126
- import json
127
129
  if connector_keys is None:
128
130
  connector_keys = []
129
131
  if metric_keys is None:
@@ -169,7 +171,6 @@ def sync_pipe(
169
171
  from meerschaum.utils.misc import json_serialize_datetime
170
172
  from meerschaum.config import get_config
171
173
  from meerschaum.utils.packages import attempt_import
172
- import json, time
173
174
  begin = time.time()
174
175
  more_itertools = attempt_import('more_itertools')
175
176
  if df is None:
@@ -310,7 +311,6 @@ def get_pipe_data(
310
311
  **kw: Any
311
312
  ) -> Union[pandas.DataFrame, None]:
312
313
  """Fetch data from the API."""
313
- import json
314
314
  r_url = pipe_r_url(pipe)
315
315
  chunks_list = []
316
316
  while True:
@@ -340,7 +340,7 @@ def get_pipe_data(
340
340
  from meerschaum.utils.dataframe import parse_df_datetimes
341
341
  pd = import_pandas()
342
342
  try:
343
- df = pd.read_json(response.text)
343
+ df = pd.read_json(StringIO(response.text))
344
344
  except Exception as e:
345
345
  warn(f"Failed to parse response for {pipe}:\n{e}")
346
346
  return None
@@ -367,7 +367,6 @@ def get_backtrack_data(
367
367
  **kw: Any,
368
368
  ) -> pandas.DataFrame:
369
369
  """Get a Pipe's backtrack data from the API."""
370
- import json
371
370
  r_url = pipe_r_url(pipe)
372
371
  try:
373
372
  response = self.get(
@@ -389,12 +388,12 @@ def get_backtrack_data(
389
388
  dprint(response.text)
390
389
  pd = import_pandas()
391
390
  try:
392
- df = pd.read_json(response.text)
391
+ df = pd.read_json(StringIO(response.text))
393
392
  except Exception as e:
394
393
  warn(f"Failed to read response into a dataframe:\n{e}")
395
394
  return None
396
395
 
397
- df = parse_df_datetimes(pd.read_json(response.text), debug=debug)
396
+ df = parse_df_datetimes(pd.read_json(StringIO(response.text)), debug=debug)
398
397
  return df
399
398
 
400
399
  def get_pipe_id(
@@ -438,7 +437,6 @@ def get_pipe_attributes(
438
437
  """
439
438
  r_url = pipe_r_url(pipe)
440
439
  response = self.get(r_url + '/attributes', debug=debug)
441
- import json
442
440
  try:
443
441
  return json.loads(response.text)
444
442
  except Exception as e:
@@ -474,7 +472,6 @@ def get_sync_time(
474
472
  """
475
473
  from meerschaum.utils.misc import is_int
476
474
  from meerschaum.utils.warnings import warn
477
- import datetime, json
478
475
  r_url = pipe_r_url(pipe)
479
476
  response = self.get(
480
477
  r_url + '/sync_time',
@@ -545,7 +542,6 @@ def create_metadata(
545
542
  """
546
543
  from meerschaum.utils.debug import dprint
547
544
  from meerschaum.config.static import STATIC_CONFIG
548
- import json
549
545
  r_url = STATIC_CONFIG['api']['endpoints']['metadata']
550
546
  response = self.post(r_url, debug=debug)
551
547
  if debug:
@@ -590,7 +586,6 @@ def get_pipe_rowcount(
590
586
  The number of rows in the pipe's table, bound the given parameters.
591
587
  If the table does not exist, return 0.
592
588
  """
593
- import json
594
589
  r_url = pipe_r_url(pipe)
595
590
  response = self.get(
596
591
  r_url + "/rowcount",
@@ -148,7 +148,7 @@ def get_pipe_metadef(
148
148
  dt_name = sql_item_name(_dt, self.flavor)
149
149
  is_guess = False
150
150
 
151
- if begin is not None or end is not None:
151
+ if begin not in (None, '') or end is not None:
152
152
  if is_guess:
153
153
  if _dt is None:
154
154
  warn(
@@ -168,20 +168,38 @@ def get_pipe_metadef(
168
168
  if 'order by' in definition.lower() and 'over' not in definition.lower():
169
169
  error("Cannot fetch with an ORDER clause in the definition")
170
170
 
171
+ apply_backtrack = begin == ''
171
172
  begin = (
172
- begin if not (isinstance(begin, str) and begin == '')
173
- else pipe.get_sync_time(debug=debug)
173
+ pipe.get_sync_time(debug=debug)
174
+ if begin == ''
175
+ else begin
174
176
  )
175
-
177
+
178
+ if begin and end and begin >= end:
179
+ begin = None
180
+
176
181
  da = None
177
182
  if dt_name:
178
- ### default: do not backtrack
179
- begin_da = dateadd_str(
180
- flavor=self.flavor, datepart='minute', number=(-1 * btm), begin=begin,
181
- ) if begin else None
182
- end_da = dateadd_str(
183
- flavor=self.flavor, datepart='minute', number=1, begin=end,
184
- ) if end else None
183
+ begin_da = (
184
+ dateadd_str(
185
+ flavor = self.flavor,
186
+ datepart = 'minute',
187
+ number = ((-1 * btm) if apply_backtrack else 0),
188
+ begin = begin,
189
+ )
190
+ if begin
191
+ else None
192
+ )
193
+ end_da = (
194
+ dateadd_str(
195
+ flavor = self.flavor,
196
+ datepart = 'minute',
197
+ number = 0,
198
+ begin = end,
199
+ )
200
+ if end
201
+ else None
202
+ )
185
203
 
186
204
  meta_def = (
187
205
  _simple_fetch_query(pipe) if (
@@ -6,7 +6,7 @@
6
6
  Interact with Pipes metadata via SQLConnector.
7
7
  """
8
8
  from __future__ import annotations
9
- from datetime import datetime, date
9
+ from datetime import datetime, date, timedelta
10
10
  import meerschaum as mrsm
11
11
  from meerschaum.utils.typing import (
12
12
  Union, Any, SuccessTuple, Tuple, Dict, Optional, List
@@ -427,10 +427,17 @@ def get_create_index_queries(
427
427
  get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
428
428
  if (_id is not None and _create_space_partition) else None
429
429
  )
430
+
431
+ chunk_interval = pipe.get_chunk_interval(debug=debug)
432
+ chunk_interval_minutes = (
433
+ chunk_interval
434
+ if isinstance(chunk_interval, int)
435
+ else int(chunk_interval.total_seconds() / 60)
436
+ )
430
437
  chunk_time_interval = (
431
- pipe.parameters.get('chunk_time_interval', None)
432
- or
433
- ("INTERVAL '1 DAY'" if not 'int' in _datetime_type.lower() else '100000')
438
+ f"INTERVAL '{chunk_interval_minutes} MINUTES'"
439
+ if isinstance(chunk_interval, timedelta)
440
+ else f'{chunk_interval_minutes}'
434
441
  )
435
442
 
436
443
  dt_query = (
@@ -102,10 +102,6 @@ def read(
102
102
  `chunksize` must not be `None` (falls back to 1000 if so),
103
103
  and hooks are not called in this case.
104
104
 
105
- as_dask: bool, default False
106
- If `True`, return a `dask.DataFrame`
107
- (which may be loaded into a Pandas DataFrame with `df.compute()`).
108
-
109
105
  index_col: Optional[str], default None
110
106
  If using Dask, use this column as the index column.
111
107
  If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
@@ -134,7 +130,6 @@ def read(
134
130
  is_dask = 'dask' in pd.__name__
135
131
  pd = attempt_import('pandas')
136
132
  # pd = import_pandas()
137
- dd = attempt_import('dask.dataframe') if as_dask else None
138
133
  is_dask = dd is not None
139
134
  npartitions = chunksize_to_npartitions(chunksize)
140
135
  if is_dask:
@@ -687,7 +682,7 @@ def to_sql(
687
682
  from meerschaum.utils.packages import attempt_import, import_pandas
688
683
  sqlalchemy = attempt_import('sqlalchemy', debug=debug)
689
684
  pd = import_pandas()
690
- is_dask = 'dask' in pd.__name__
685
+ is_dask = 'dask' in df.__module__
691
686
 
692
687
  stats = {'target': name, }
693
688
  ### resort to defaults if None
@@ -119,7 +119,11 @@ class Pipe:
119
119
  _get_chunk_label,
120
120
  get_num_workers,
121
121
  )
122
- from ._verify import verify
122
+ from ._verify import (
123
+ verify,
124
+ get_bound_interval,
125
+ get_bound_time,
126
+ )
123
127
  from ._delete import delete
124
128
  from ._drop import drop
125
129
  from ._clear import clear
@@ -20,6 +20,7 @@ def get_data(
20
20
  params: Optional[Dict[str, Any]] = None,
21
21
  as_iterator: bool = False,
22
22
  as_chunks: bool = False,
23
+ as_dask: bool = False,
23
24
  chunk_interval: Union[timedelta, int, None] = None,
24
25
  fresh: bool = False,
25
26
  debug: bool = False,
@@ -57,6 +58,10 @@ def get_data(
57
58
  as_chunks: bool, default False
58
59
  Alias for `as_iterator`.
59
60
 
61
+ as_dask: bool, default False
62
+ If `True`, return a `dask.DataFrame`
63
+ (which may be loaded into a Pandas DataFrame with `df.compute()`).
64
+
60
65
  chunk_interval: Union[timedelta, int, None], default None
61
66
  If `as_iterator`, then return chunks with `begin` and `end` separated by this interval.
62
67
  This may be set under `pipe.parameters['chunk_minutes']`.
@@ -85,6 +90,9 @@ def get_data(
85
90
  from meerschaum.connectors import get_connector_plugin
86
91
  from meerschaum.utils.misc import iterate_chunks, items_str
87
92
  from meerschaum.utils.dataframe import add_missing_cols_to_df
93
+ from meerschaum.utils.packages import attempt_import
94
+ dd = attempt_import('dask.dataframe') if as_dask else None
95
+ dask = attempt_import('dask') if as_dask else None
88
96
 
89
97
  if select_columns == '*':
90
98
  select_columns = None
@@ -108,6 +116,33 @@ def get_data(
108
116
  debug = debug,
109
117
  )
110
118
 
119
+ if as_dask:
120
+ from multiprocessing.pool import ThreadPool
121
+ dask_pool = ThreadPool(self.get_num_workers())
122
+ dask.config.set(pool=dask_pool)
123
+ chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
124
+ bounds = self.get_chunk_bounds(
125
+ begin = begin,
126
+ end = end,
127
+ bounded = False,
128
+ chunk_interval = chunk_interval,
129
+ debug = debug,
130
+ )
131
+ dask_chunks = [
132
+ dask.delayed(self.get_data)(
133
+ select_columns = select_columns,
134
+ omit_columns = omit_columns,
135
+ begin = chunk_begin,
136
+ end = chunk_end,
137
+ params = params,
138
+ chunk_interval = chunk_interval,
139
+ fresh = fresh,
140
+ debug = debug,
141
+ )
142
+ for (chunk_begin, chunk_end) in bounds
143
+ ]
144
+ return dd.from_delayed(dask_chunks)
145
+
111
146
  if not self.exists(debug=debug):
112
147
  return None
113
148
 
@@ -245,12 +280,7 @@ def _get_data_as_iterator(
245
280
  elif isinstance(max_dt, datetime):
246
281
  max_dt = round_time(max_dt + timedelta(minutes=1))
247
282
 
248
- if chunk_interval is None:
249
- chunk_interval = self.get_chunk_interval(debug=debug)
250
- elif isinstance(chunk_interval, int) and isinstance(min_dt, datetime):
251
- chunk_interval = timedelta(minutes=1)
252
- elif isinstance(chunk_interval, timedelta) and isinstance(min_dt, int):
253
- chunk_interval = int(chunk_interval.total_seconds() / 60)
283
+ chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
254
284
 
255
285
  ### If we can't determine bounds
256
286
  ### or if chunk_interval exceeds the max,
@@ -458,14 +488,34 @@ def get_rowcount(
458
488
 
459
489
  def get_chunk_interval(
460
490
  self,
491
+ chunk_interval: Union[timedelta, int, None] = None,
461
492
  debug: bool = False,
462
493
  ) -> Union[timedelta, int]:
463
494
  """
464
495
  Get the chunk interval to use for this pipe.
496
+
497
+ Parameters
498
+ ----------
499
+ chunk_interval: Union[timedelta, int, None], default None
500
+ If provided, coerce this value into the correct type.
501
+ For example, if the datetime axis is an integer, then
502
+ return the number of minutes.
503
+
504
+ Returns
505
+ -------
506
+ The chunk interval (`timedelta` or `int`) to use with this pipe's `datetime` axis.
465
507
  """
466
508
  default_chunk_minutes = get_config('pipes', 'parameters', 'chunk_minutes')
467
509
  configured_chunk_minutes = self.parameters.get('chunk_minutes', None)
468
- chunk_minutes = configured_chunk_minutes or default_chunk_minutes
510
+ chunk_minutes = (
511
+ (configured_chunk_minutes or default_chunk_minutes)
512
+ if chunk_interval is None
513
+ else (
514
+ chunk_interval
515
+ if isinstance(chunk_interval, int)
516
+ else int(chunk_interval.total_seconds() / 60)
517
+ )
518
+ )
469
519
 
470
520
  dt_col = self.columns.get('datetime', None)
471
521
  if dt_col is None:
@@ -529,8 +579,7 @@ def get_chunk_bounds(
529
579
  return [(None, None)]
530
580
 
531
581
  ### Set the chunk interval under `pipe.parameters['chunk_minutes']`.
532
- if chunk_interval is None:
533
- chunk_interval = self.get_chunk_interval(debug=debug)
582
+ chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
534
583
 
535
584
  ### Build a list of tuples containing the chunk boundaries
536
585
  ### so that we can sync multiple chunks in parallel.