meerschaum 2.0.0rc6__py3-none-any.whl → 2.0.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +12 -1
- meerschaum/_internal/arguments/_parser.py +23 -1
- meerschaum/actions/__init__.py +97 -48
- meerschaum/actions/bootstrap.py +1 -1
- meerschaum/actions/clear.py +1 -1
- meerschaum/actions/deduplicate.py +1 -1
- meerschaum/actions/delete.py +8 -7
- meerschaum/actions/drop.py +1 -10
- meerschaum/actions/edit.py +1 -1
- meerschaum/actions/install.py +1 -1
- meerschaum/actions/pause.py +1 -1
- meerschaum/actions/register.py +1 -1
- meerschaum/actions/setup.py +1 -1
- meerschaum/actions/show.py +1 -1
- meerschaum/actions/start.py +18 -7
- meerschaum/actions/stop.py +5 -4
- meerschaum/actions/sync.py +17 -2
- meerschaum/actions/uninstall.py +1 -1
- meerschaum/actions/upgrade.py +1 -1
- meerschaum/actions/verify.py +54 -3
- meerschaum/config/_default.py +71 -65
- meerschaum/config/_formatting.py +26 -0
- meerschaum/config/_jobs.py +28 -5
- meerschaum/config/_paths.py +21 -5
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_fetch.py +1 -1
- meerschaum/connectors/api/_pipes.py +6 -11
- meerschaum/connectors/sql/_fetch.py +29 -11
- meerschaum/connectors/sql/_pipes.py +11 -4
- meerschaum/connectors/sql/_sql.py +1 -6
- meerschaum/core/Pipe/__init__.py +5 -1
- meerschaum/core/Pipe/_data.py +58 -9
- meerschaum/core/Pipe/_deduplicate.py +61 -11
- meerschaum/core/Pipe/_dtypes.py +2 -1
- meerschaum/core/Pipe/_verify.py +174 -34
- meerschaum/plugins/__init__.py +3 -0
- meerschaum/utils/daemon/Daemon.py +108 -27
- meerschaum/utils/daemon/__init__.py +35 -1
- meerschaum/utils/dataframe.py +10 -5
- meerschaum/utils/formatting/__init__.py +144 -1
- meerschaum/utils/formatting/_pipes.py +28 -5
- meerschaum/utils/misc.py +183 -187
- meerschaum/utils/packages/__init__.py +1 -1
- meerschaum/utils/packages/_packages.py +1 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/METADATA +4 -1
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/RECORD +52 -52
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/LICENSE +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/NOTICE +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/WHEEL +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/top_level.txt +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/zip-safe +0 -0
meerschaum/config/_default.py
CHANGED
@@ -12,61 +12,61 @@ from meerschaum.connectors import attributes as connector_attributes
|
|
12
12
|
from meerschaum.config._paths import SQLITE_DB_PATH
|
13
13
|
|
14
14
|
default_meerschaum_config = {
|
15
|
-
'instance'
|
16
|
-
'api_instance'
|
17
|
-
'web_instance'
|
18
|
-
'default_repository'
|
19
|
-
'connectors'
|
20
|
-
'sql'
|
21
|
-
'default'
|
22
|
-
'main'
|
23
|
-
'username'
|
24
|
-
'password'
|
25
|
-
'flavor'
|
26
|
-
'host'
|
27
|
-
'database'
|
28
|
-
'port'
|
15
|
+
'instance': 'sql:main',
|
16
|
+
'api_instance': 'MRSM{meerschaum:instance}',
|
17
|
+
'web_instance': 'MRSM{meerschaum:instance}',
|
18
|
+
'default_repository': 'api:mrsm',
|
19
|
+
'connectors': {
|
20
|
+
'sql': {
|
21
|
+
'default': {},
|
22
|
+
'main': {
|
23
|
+
'username': 'mrsm',
|
24
|
+
'password': 'mrsm',
|
25
|
+
'flavor': 'timescaledb',
|
26
|
+
'host': 'localhost',
|
27
|
+
'database': 'meerschaum',
|
28
|
+
'port': 5432,
|
29
29
|
},
|
30
|
-
'local'
|
31
|
-
'flavor'
|
32
|
-
'database'
|
30
|
+
'local': {
|
31
|
+
'flavor': 'sqlite',
|
32
|
+
'database': str(SQLITE_DB_PATH),
|
33
33
|
},
|
34
34
|
'memory': {
|
35
|
-
'flavor'
|
36
|
-
'database'
|
35
|
+
'flavor': 'sqlite',
|
36
|
+
'database': ':memory:',
|
37
37
|
},
|
38
38
|
},
|
39
|
-
'api'
|
40
|
-
'default'
|
41
|
-
'main'
|
42
|
-
'host'
|
43
|
-
'port'
|
39
|
+
'api': {
|
40
|
+
'default': connector_attributes['api']['default'],
|
41
|
+
'main': {
|
42
|
+
'host': 'localhost',
|
43
|
+
'port': 8000,
|
44
44
|
},
|
45
|
-
'local'
|
46
|
-
'host'
|
45
|
+
'local': {
|
46
|
+
'host': 'localhost',
|
47
47
|
},
|
48
|
-
'mrsm'
|
49
|
-
'host'
|
50
|
-
'port'
|
51
|
-
'protocol'
|
48
|
+
'mrsm': {
|
49
|
+
'host': 'api.mrsm.io',
|
50
|
+
'port': 443,
|
51
|
+
'protocol': 'https',
|
52
52
|
},
|
53
53
|
},
|
54
54
|
},
|
55
55
|
}
|
56
56
|
default_system_config = {
|
57
|
-
'connectors'
|
58
|
-
'all'
|
59
|
-
'pandas'
|
57
|
+
'connectors': {
|
58
|
+
'all': {
|
59
|
+
'pandas': 'pandas',
|
60
60
|
},
|
61
|
-
'sql'
|
62
|
-
'chunksize'
|
63
|
-
'poolclass'
|
64
|
-
'create_engine'
|
65
|
-
'method'
|
66
|
-
'pool_size'
|
67
|
-
'max_overflow'
|
68
|
-
'pool_recycle'
|
69
|
-
'connect_args'
|
61
|
+
'sql': {
|
62
|
+
'chunksize': 100_000,
|
63
|
+
'poolclass': 'sqlalchemy.pool.QueuePool',
|
64
|
+
'create_engine': {
|
65
|
+
'method': 'multi',
|
66
|
+
'pool_size': 5,
|
67
|
+
'max_overflow': 10,
|
68
|
+
'pool_recycle': 3600,
|
69
|
+
'connect_args': {},
|
70
70
|
},
|
71
71
|
},
|
72
72
|
|
@@ -75,28 +75,28 @@ default_system_config = {
|
|
75
75
|
},
|
76
76
|
### not to be confused with system_config['connectors']['api'], this is the configuration
|
77
77
|
### for the API server itself.
|
78
|
-
'api'
|
79
|
-
'uvicorn'
|
80
|
-
'app'
|
81
|
-
'port'
|
82
|
-
'host'
|
83
|
-
'workers'
|
78
|
+
'api': {
|
79
|
+
'uvicorn': {
|
80
|
+
'app': 'meerschaum.api:app',
|
81
|
+
'port': default_meerschaum_config['connectors']['api']['default']['port'],
|
82
|
+
'host': '0.0.0.0',
|
83
|
+
'workers': max(int(multiprocessing.cpu_count() / 2), 1),
|
84
84
|
},
|
85
85
|
'permissions': {
|
86
|
-
'registration'
|
87
|
-
'users'
|
88
|
-
'pipes'
|
89
|
-
'plugins'
|
86
|
+
'registration': {
|
87
|
+
'users': True,
|
88
|
+
'pipes': True,
|
89
|
+
'plugins': True,
|
90
90
|
},
|
91
|
-
'actions'
|
91
|
+
'actions': {
|
92
92
|
'non_admin': True,
|
93
93
|
},
|
94
94
|
'chaining' : {
|
95
|
-
'insecure_parent_instance'
|
96
|
-
'child_apis'
|
95
|
+
'insecure_parent_instance': False,
|
96
|
+
'child_apis': False,
|
97
97
|
},
|
98
98
|
},
|
99
|
-
'protocol'
|
99
|
+
'protocol': default_meerschaum_config['connectors']['api']['default']['protocol'],
|
100
100
|
},
|
101
101
|
'experimental': {
|
102
102
|
'fetch': False,
|
@@ -106,21 +106,27 @@ default_system_config = {
|
|
106
106
|
'inplace_sync': True,
|
107
107
|
},
|
108
108
|
}
|
109
|
-
default_pipes_config
|
110
|
-
'parameters'
|
111
|
-
'columns'
|
112
|
-
'datetime'
|
113
|
-
'id'
|
109
|
+
default_pipes_config = {
|
110
|
+
'parameters': {
|
111
|
+
'columns': {
|
112
|
+
'datetime': None,
|
113
|
+
'id': None,
|
114
|
+
},
|
115
|
+
'chunk_minutes': 1440,
|
116
|
+
'fetch': {
|
117
|
+
'backtrack_minutes': 1440,
|
118
|
+
},
|
119
|
+
'verify': {
|
120
|
+
'bound_days': 366,
|
114
121
|
},
|
115
|
-
'chunk_minutes' : 1440,
|
116
122
|
},
|
117
|
-
'attributes'
|
123
|
+
'attributes': {
|
118
124
|
'local_cache_timeout_seconds': 60,
|
119
125
|
},
|
120
126
|
}
|
121
|
-
default_plugins_config
|
127
|
+
default_plugins_config = {}
|
122
128
|
default_experimental_config = {
|
123
|
-
'venv'
|
129
|
+
'venv': True,
|
124
130
|
}
|
125
131
|
|
126
132
|
|
meerschaum/config/_formatting.py
CHANGED
@@ -115,6 +115,32 @@ default_formatting_config = {
|
|
115
115
|
},
|
116
116
|
},
|
117
117
|
},
|
118
|
+
'success_calm' : {
|
119
|
+
'unicode' : {
|
120
|
+
'icon' : 'MRSM{formatting:emoji:success_calm}',
|
121
|
+
},
|
122
|
+
'ascii' : {
|
123
|
+
'icon' : '+',
|
124
|
+
},
|
125
|
+
'ansi' : {
|
126
|
+
'rich' : {
|
127
|
+
'style' : 'pale_green3',
|
128
|
+
},
|
129
|
+
},
|
130
|
+
},
|
131
|
+
'failure_calm' : {
|
132
|
+
'unicode' : {
|
133
|
+
'icon' : 'MRSM{formatting:emoji:failure_calm}',
|
134
|
+
},
|
135
|
+
'ascii' : {
|
136
|
+
'icon' : 'x',
|
137
|
+
},
|
138
|
+
'ansi' : {
|
139
|
+
'rich' : {
|
140
|
+
'style' : 'indian red',
|
141
|
+
},
|
142
|
+
},
|
143
|
+
},
|
118
144
|
'errors' : {
|
119
145
|
'unicode' : {
|
120
146
|
'icon' : 'MRSM{formatting:emoji:error}',
|
meerschaum/config/_jobs.py
CHANGED
@@ -7,6 +7,8 @@ Default configuration for jobs.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
default_jobs_config = {
|
10
|
+
'timeout_seconds': 8,
|
11
|
+
'check_timeout_interval_seconds': 0.1,
|
10
12
|
'logs' : {
|
11
13
|
'num_files_to_keep': 5,
|
12
14
|
'max_file_size': 100_000,
|
@@ -14,11 +16,32 @@ default_jobs_config = {
|
|
14
16
|
'refresh_files_seconds': 5.0,
|
15
17
|
'min_buffer_len': 15,
|
16
18
|
'colors' : [
|
17
|
-
'cyan',
|
18
|
-
'
|
19
|
-
'
|
20
|
-
'
|
21
|
-
'
|
19
|
+
'cyan',
|
20
|
+
'magenta',
|
21
|
+
'orange3',
|
22
|
+
'green',
|
23
|
+
'blue',
|
24
|
+
'red',
|
25
|
+
'spring_green3',
|
26
|
+
'medium_purple3',
|
27
|
+
'medium_violet_red',
|
28
|
+
'slate_blue1',
|
29
|
+
'bright_red',
|
30
|
+
'steel_blue3',
|
31
|
+
'aquamarine1',
|
32
|
+
'dark_khaki',
|
33
|
+
'pink3',
|
34
|
+
'gold3',
|
35
|
+
'pale_green1',
|
36
|
+
'light coral',
|
37
|
+
'light_goldenrod2',
|
38
|
+
'cornsilk1',
|
39
|
+
'orange_red1',
|
40
|
+
'deep_pink1',
|
41
|
+
'aquamarine3',
|
42
|
+
'sky_blue2',
|
43
|
+
'tan',
|
44
|
+
'honeydew2',
|
22
45
|
],
|
23
46
|
},
|
24
47
|
}
|
meerschaum/config/_paths.py
CHANGED
@@ -48,22 +48,38 @@ if ENVIRONMENT_PLUGINS_DIR in os.environ:
|
|
48
48
|
Path(path).resolve()
|
49
49
|
for path in json.loads(os.environ[ENVIRONMENT_PLUGINS_DIR])
|
50
50
|
] if os.environ[ENVIRONMENT_PLUGINS_DIR].lstrip().startswith('[')
|
51
|
-
else [
|
51
|
+
else [
|
52
|
+
Path(path_str).resolve()
|
53
|
+
for path_str in os.environ[ENVIRONMENT_PLUGINS_DIR].split(':')
|
54
|
+
if path_str
|
55
|
+
]
|
52
56
|
)
|
53
57
|
except Exception as e:
|
54
58
|
PLUGINS_DIR_PATHS = []
|
55
59
|
|
56
60
|
if not PLUGINS_DIR_PATHS:
|
57
61
|
print(
|
58
|
-
"Invalid plugins directories set for "
|
59
|
-
f"environment variable '{ENVIRONMENT_PLUGINS_DIR}'.\n"
|
60
|
-
f"
|
61
|
-
|
62
|
+
"Invalid plugins directories set for "
|
63
|
+
f"environment variable '{ENVIRONMENT_PLUGINS_DIR}'.\n\n"
|
64
|
+
f"Set this to a colon-separated path string:\n\n"
|
65
|
+
f"`export {ENVIRONMENT_PLUGINS_DIR}=./plugins:/another/path/to/plugins`\n\n"
|
66
|
+
"or a JSON-encoded path list:\n\n"
|
67
|
+
f"`export {ENVIRONMENT_PLUGINS_DIR}=" + "'[\"./plugins\", \"/another/path/to/plugins\"]'`"
|
68
|
+
f"",
|
62
69
|
)
|
63
70
|
sys.exit(1)
|
64
71
|
else:
|
65
72
|
PLUGINS_DIR_PATHS = [_ROOT_DIR_PATH / 'plugins']
|
66
73
|
|
74
|
+
### Remove duplicate plugins paths.
|
75
|
+
_seen_plugins_paths, _plugins_paths_to_remove = set(), set()
|
76
|
+
for _plugin_path in PLUGINS_DIR_PATHS:
|
77
|
+
if _plugin_path in _seen_plugins_paths:
|
78
|
+
_plugins_paths_to_remove.add(_plugin_path)
|
79
|
+
_seen_plugins_paths.add(_plugin_path)
|
80
|
+
for _plugin_path in _plugins_paths_to_remove:
|
81
|
+
PLUGINS_DIR_PATHS.remove(_plugin_path)
|
82
|
+
|
67
83
|
|
68
84
|
paths = {
|
69
85
|
'PACKAGE_ROOT_PATH' : str(Path(__file__).parent.parent.resolve()),
|
meerschaum/config/_version.py
CHANGED
@@ -42,7 +42,7 @@ def fetch(
|
|
42
42
|
remote_metric_key = instructions.get('metric_key', None)
|
43
43
|
remote_location_key = instructions.get('location_key', None)
|
44
44
|
if begin is None:
|
45
|
-
begin = pipe.
|
45
|
+
begin = pipe.get_sync_time(debug=debug)
|
46
46
|
|
47
47
|
_params = copy.deepcopy(params) if params is not None else {}
|
48
48
|
_params = apply_patch_to_config(_params, instructions.get('params', {}))
|
@@ -7,6 +7,9 @@ Register or fetch Pipes from the API
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
import time
|
11
|
+
import json
|
12
|
+
from io import StringIO
|
10
13
|
from datetime import datetime
|
11
14
|
from meerschaum.utils.debug import dprint
|
12
15
|
from meerschaum.utils.warnings import warn, error
|
@@ -123,7 +126,6 @@ def fetch_pipes_keys(
|
|
123
126
|
A list of tuples containing pipes' keys.
|
124
127
|
"""
|
125
128
|
from meerschaum.config.static import STATIC_CONFIG
|
126
|
-
import json
|
127
129
|
if connector_keys is None:
|
128
130
|
connector_keys = []
|
129
131
|
if metric_keys is None:
|
@@ -169,7 +171,6 @@ def sync_pipe(
|
|
169
171
|
from meerschaum.utils.misc import json_serialize_datetime
|
170
172
|
from meerschaum.config import get_config
|
171
173
|
from meerschaum.utils.packages import attempt_import
|
172
|
-
import json, time
|
173
174
|
begin = time.time()
|
174
175
|
more_itertools = attempt_import('more_itertools')
|
175
176
|
if df is None:
|
@@ -310,7 +311,6 @@ def get_pipe_data(
|
|
310
311
|
**kw: Any
|
311
312
|
) -> Union[pandas.DataFrame, None]:
|
312
313
|
"""Fetch data from the API."""
|
313
|
-
import json
|
314
314
|
r_url = pipe_r_url(pipe)
|
315
315
|
chunks_list = []
|
316
316
|
while True:
|
@@ -340,7 +340,7 @@ def get_pipe_data(
|
|
340
340
|
from meerschaum.utils.dataframe import parse_df_datetimes
|
341
341
|
pd = import_pandas()
|
342
342
|
try:
|
343
|
-
df = pd.read_json(response.text)
|
343
|
+
df = pd.read_json(StringIO(response.text))
|
344
344
|
except Exception as e:
|
345
345
|
warn(f"Failed to parse response for {pipe}:\n{e}")
|
346
346
|
return None
|
@@ -367,7 +367,6 @@ def get_backtrack_data(
|
|
367
367
|
**kw: Any,
|
368
368
|
) -> pandas.DataFrame:
|
369
369
|
"""Get a Pipe's backtrack data from the API."""
|
370
|
-
import json
|
371
370
|
r_url = pipe_r_url(pipe)
|
372
371
|
try:
|
373
372
|
response = self.get(
|
@@ -389,12 +388,12 @@ def get_backtrack_data(
|
|
389
388
|
dprint(response.text)
|
390
389
|
pd = import_pandas()
|
391
390
|
try:
|
392
|
-
df = pd.read_json(response.text)
|
391
|
+
df = pd.read_json(StringIO(response.text))
|
393
392
|
except Exception as e:
|
394
393
|
warn(f"Failed to read response into a dataframe:\n{e}")
|
395
394
|
return None
|
396
395
|
|
397
|
-
df = parse_df_datetimes(pd.read_json(response.text), debug=debug)
|
396
|
+
df = parse_df_datetimes(pd.read_json(StringIO(response.text)), debug=debug)
|
398
397
|
return df
|
399
398
|
|
400
399
|
def get_pipe_id(
|
@@ -438,7 +437,6 @@ def get_pipe_attributes(
|
|
438
437
|
"""
|
439
438
|
r_url = pipe_r_url(pipe)
|
440
439
|
response = self.get(r_url + '/attributes', debug=debug)
|
441
|
-
import json
|
442
440
|
try:
|
443
441
|
return json.loads(response.text)
|
444
442
|
except Exception as e:
|
@@ -474,7 +472,6 @@ def get_sync_time(
|
|
474
472
|
"""
|
475
473
|
from meerschaum.utils.misc import is_int
|
476
474
|
from meerschaum.utils.warnings import warn
|
477
|
-
import datetime, json
|
478
475
|
r_url = pipe_r_url(pipe)
|
479
476
|
response = self.get(
|
480
477
|
r_url + '/sync_time',
|
@@ -545,7 +542,6 @@ def create_metadata(
|
|
545
542
|
"""
|
546
543
|
from meerschaum.utils.debug import dprint
|
547
544
|
from meerschaum.config.static import STATIC_CONFIG
|
548
|
-
import json
|
549
545
|
r_url = STATIC_CONFIG['api']['endpoints']['metadata']
|
550
546
|
response = self.post(r_url, debug=debug)
|
551
547
|
if debug:
|
@@ -590,7 +586,6 @@ def get_pipe_rowcount(
|
|
590
586
|
The number of rows in the pipe's table, bound the given parameters.
|
591
587
|
If the table does not exist, return 0.
|
592
588
|
"""
|
593
|
-
import json
|
594
589
|
r_url = pipe_r_url(pipe)
|
595
590
|
response = self.get(
|
596
591
|
r_url + "/rowcount",
|
@@ -148,7 +148,7 @@ def get_pipe_metadef(
|
|
148
148
|
dt_name = sql_item_name(_dt, self.flavor)
|
149
149
|
is_guess = False
|
150
150
|
|
151
|
-
if begin
|
151
|
+
if begin not in (None, '') or end is not None:
|
152
152
|
if is_guess:
|
153
153
|
if _dt is None:
|
154
154
|
warn(
|
@@ -168,20 +168,38 @@ def get_pipe_metadef(
|
|
168
168
|
if 'order by' in definition.lower() and 'over' not in definition.lower():
|
169
169
|
error("Cannot fetch with an ORDER clause in the definition")
|
170
170
|
|
171
|
+
apply_backtrack = begin == ''
|
171
172
|
begin = (
|
172
|
-
|
173
|
-
|
173
|
+
pipe.get_sync_time(debug=debug)
|
174
|
+
if begin == ''
|
175
|
+
else begin
|
174
176
|
)
|
175
|
-
|
177
|
+
|
178
|
+
if begin and end and begin >= end:
|
179
|
+
begin = None
|
180
|
+
|
176
181
|
da = None
|
177
182
|
if dt_name:
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
183
|
+
begin_da = (
|
184
|
+
dateadd_str(
|
185
|
+
flavor = self.flavor,
|
186
|
+
datepart = 'minute',
|
187
|
+
number = ((-1 * btm) if apply_backtrack else 0),
|
188
|
+
begin = begin,
|
189
|
+
)
|
190
|
+
if begin
|
191
|
+
else None
|
192
|
+
)
|
193
|
+
end_da = (
|
194
|
+
dateadd_str(
|
195
|
+
flavor = self.flavor,
|
196
|
+
datepart = 'minute',
|
197
|
+
number = 0,
|
198
|
+
begin = end,
|
199
|
+
)
|
200
|
+
if end
|
201
|
+
else None
|
202
|
+
)
|
185
203
|
|
186
204
|
meta_def = (
|
187
205
|
_simple_fetch_query(pipe) if (
|
@@ -6,7 +6,7 @@
|
|
6
6
|
Interact with Pipes metadata via SQLConnector.
|
7
7
|
"""
|
8
8
|
from __future__ import annotations
|
9
|
-
from datetime import datetime, date
|
9
|
+
from datetime import datetime, date, timedelta
|
10
10
|
import meerschaum as mrsm
|
11
11
|
from meerschaum.utils.typing import (
|
12
12
|
Union, Any, SuccessTuple, Tuple, Dict, Optional, List
|
@@ -427,10 +427,17 @@ def get_create_index_queries(
|
|
427
427
|
get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
|
428
428
|
if (_id is not None and _create_space_partition) else None
|
429
429
|
)
|
430
|
+
|
431
|
+
chunk_interval = pipe.get_chunk_interval(debug=debug)
|
432
|
+
chunk_interval_minutes = (
|
433
|
+
chunk_interval
|
434
|
+
if isinstance(chunk_interval, int)
|
435
|
+
else int(chunk_interval.total_seconds() / 60)
|
436
|
+
)
|
430
437
|
chunk_time_interval = (
|
431
|
-
|
432
|
-
|
433
|
-
|
438
|
+
f"INTERVAL '{chunk_interval_minutes} MINUTES'"
|
439
|
+
if isinstance(chunk_interval, timedelta)
|
440
|
+
else f'{chunk_interval_minutes}'
|
434
441
|
)
|
435
442
|
|
436
443
|
dt_query = (
|
@@ -102,10 +102,6 @@ def read(
|
|
102
102
|
`chunksize` must not be `None` (falls back to 1000 if so),
|
103
103
|
and hooks are not called in this case.
|
104
104
|
|
105
|
-
as_dask: bool, default False
|
106
|
-
If `True`, return a `dask.DataFrame`
|
107
|
-
(which may be loaded into a Pandas DataFrame with `df.compute()`).
|
108
|
-
|
109
105
|
index_col: Optional[str], default None
|
110
106
|
If using Dask, use this column as the index column.
|
111
107
|
If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
|
@@ -134,7 +130,6 @@ def read(
|
|
134
130
|
is_dask = 'dask' in pd.__name__
|
135
131
|
pd = attempt_import('pandas')
|
136
132
|
# pd = import_pandas()
|
137
|
-
dd = attempt_import('dask.dataframe') if as_dask else None
|
138
133
|
is_dask = dd is not None
|
139
134
|
npartitions = chunksize_to_npartitions(chunksize)
|
140
135
|
if is_dask:
|
@@ -687,7 +682,7 @@ def to_sql(
|
|
687
682
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
688
683
|
sqlalchemy = attempt_import('sqlalchemy', debug=debug)
|
689
684
|
pd = import_pandas()
|
690
|
-
is_dask = 'dask' in
|
685
|
+
is_dask = 'dask' in df.__module__
|
691
686
|
|
692
687
|
stats = {'target': name, }
|
693
688
|
### resort to defaults if None
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -119,7 +119,11 @@ class Pipe:
|
|
119
119
|
_get_chunk_label,
|
120
120
|
get_num_workers,
|
121
121
|
)
|
122
|
-
from ._verify import
|
122
|
+
from ._verify import (
|
123
|
+
verify,
|
124
|
+
get_bound_interval,
|
125
|
+
get_bound_time,
|
126
|
+
)
|
123
127
|
from ._delete import delete
|
124
128
|
from ._drop import drop
|
125
129
|
from ._clear import clear
|
meerschaum/core/Pipe/_data.py
CHANGED
@@ -20,6 +20,7 @@ def get_data(
|
|
20
20
|
params: Optional[Dict[str, Any]] = None,
|
21
21
|
as_iterator: bool = False,
|
22
22
|
as_chunks: bool = False,
|
23
|
+
as_dask: bool = False,
|
23
24
|
chunk_interval: Union[timedelta, int, None] = None,
|
24
25
|
fresh: bool = False,
|
25
26
|
debug: bool = False,
|
@@ -57,6 +58,10 @@ def get_data(
|
|
57
58
|
as_chunks: bool, default False
|
58
59
|
Alias for `as_iterator`.
|
59
60
|
|
61
|
+
as_dask: bool, default False
|
62
|
+
If `True`, return a `dask.DataFrame`
|
63
|
+
(which may be loaded into a Pandas DataFrame with `df.compute()`).
|
64
|
+
|
60
65
|
chunk_interval: Union[timedelta, int, None], default None
|
61
66
|
If `as_iterator`, then return chunks with `begin` and `end` separated by this interval.
|
62
67
|
This may be set under `pipe.parameters['chunk_minutes']`.
|
@@ -85,6 +90,9 @@ def get_data(
|
|
85
90
|
from meerschaum.connectors import get_connector_plugin
|
86
91
|
from meerschaum.utils.misc import iterate_chunks, items_str
|
87
92
|
from meerschaum.utils.dataframe import add_missing_cols_to_df
|
93
|
+
from meerschaum.utils.packages import attempt_import
|
94
|
+
dd = attempt_import('dask.dataframe') if as_dask else None
|
95
|
+
dask = attempt_import('dask') if as_dask else None
|
88
96
|
|
89
97
|
if select_columns == '*':
|
90
98
|
select_columns = None
|
@@ -108,6 +116,33 @@ def get_data(
|
|
108
116
|
debug = debug,
|
109
117
|
)
|
110
118
|
|
119
|
+
if as_dask:
|
120
|
+
from multiprocessing.pool import ThreadPool
|
121
|
+
dask_pool = ThreadPool(self.get_num_workers())
|
122
|
+
dask.config.set(pool=dask_pool)
|
123
|
+
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
124
|
+
bounds = self.get_chunk_bounds(
|
125
|
+
begin = begin,
|
126
|
+
end = end,
|
127
|
+
bounded = False,
|
128
|
+
chunk_interval = chunk_interval,
|
129
|
+
debug = debug,
|
130
|
+
)
|
131
|
+
dask_chunks = [
|
132
|
+
dask.delayed(self.get_data)(
|
133
|
+
select_columns = select_columns,
|
134
|
+
omit_columns = omit_columns,
|
135
|
+
begin = chunk_begin,
|
136
|
+
end = chunk_end,
|
137
|
+
params = params,
|
138
|
+
chunk_interval = chunk_interval,
|
139
|
+
fresh = fresh,
|
140
|
+
debug = debug,
|
141
|
+
)
|
142
|
+
for (chunk_begin, chunk_end) in bounds
|
143
|
+
]
|
144
|
+
return dd.from_delayed(dask_chunks)
|
145
|
+
|
111
146
|
if not self.exists(debug=debug):
|
112
147
|
return None
|
113
148
|
|
@@ -245,12 +280,7 @@ def _get_data_as_iterator(
|
|
245
280
|
elif isinstance(max_dt, datetime):
|
246
281
|
max_dt = round_time(max_dt + timedelta(minutes=1))
|
247
282
|
|
248
|
-
|
249
|
-
chunk_interval = self.get_chunk_interval(debug=debug)
|
250
|
-
elif isinstance(chunk_interval, int) and isinstance(min_dt, datetime):
|
251
|
-
chunk_interval = timedelta(minutes=1)
|
252
|
-
elif isinstance(chunk_interval, timedelta) and isinstance(min_dt, int):
|
253
|
-
chunk_interval = int(chunk_interval.total_seconds() / 60)
|
283
|
+
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
254
284
|
|
255
285
|
### If we can't determine bounds
|
256
286
|
### or if chunk_interval exceeds the max,
|
@@ -458,14 +488,34 @@ def get_rowcount(
|
|
458
488
|
|
459
489
|
def get_chunk_interval(
|
460
490
|
self,
|
491
|
+
chunk_interval: Union[timedelta, int, None] = None,
|
461
492
|
debug: bool = False,
|
462
493
|
) -> Union[timedelta, int]:
|
463
494
|
"""
|
464
495
|
Get the chunk interval to use for this pipe.
|
496
|
+
|
497
|
+
Parameters
|
498
|
+
----------
|
499
|
+
chunk_interval: Union[timedelta, int, None], default None
|
500
|
+
If provided, coerce this value into the correct type.
|
501
|
+
For example, if the datetime axis is an integer, then
|
502
|
+
return the number of minutes.
|
503
|
+
|
504
|
+
Returns
|
505
|
+
-------
|
506
|
+
The chunk interval (`timedelta` or `int`) to use with this pipe's `datetime` axis.
|
465
507
|
"""
|
466
508
|
default_chunk_minutes = get_config('pipes', 'parameters', 'chunk_minutes')
|
467
509
|
configured_chunk_minutes = self.parameters.get('chunk_minutes', None)
|
468
|
-
chunk_minutes =
|
510
|
+
chunk_minutes = (
|
511
|
+
(configured_chunk_minutes or default_chunk_minutes)
|
512
|
+
if chunk_interval is None
|
513
|
+
else (
|
514
|
+
chunk_interval
|
515
|
+
if isinstance(chunk_interval, int)
|
516
|
+
else int(chunk_interval.total_seconds() / 60)
|
517
|
+
)
|
518
|
+
)
|
469
519
|
|
470
520
|
dt_col = self.columns.get('datetime', None)
|
471
521
|
if dt_col is None:
|
@@ -529,8 +579,7 @@ def get_chunk_bounds(
|
|
529
579
|
return [(None, None)]
|
530
580
|
|
531
581
|
### Set the chunk interval under `pipe.parameters['chunk_minutes']`.
|
532
|
-
|
533
|
-
chunk_interval = self.get_chunk_interval(debug=debug)
|
582
|
+
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
534
583
|
|
535
584
|
### Build a list of tuples containing the chunk boundaries
|
536
585
|
### so that we can sync multiple chunks in parallel.
|