mage-ai 0.8.25__py3-none-any.whl → 0.8.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mage-ai might be problematic. Click here for more details.
- mage_ai/data_integrations/sources/constants.py +1 -0
- mage_ai/data_preparation/executors/streaming_pipeline_executor.py +2 -1
- mage_ai/data_preparation/logging/logger_manager.py +7 -1
- mage_ai/data_preparation/models/block/__init__.py +60 -17
- mage_ai/data_preparation/models/block/sql/__init__.py +64 -17
- mage_ai/data_preparation/models/block/sql/utils/shared.py +49 -3
- mage_ai/data_preparation/models/variable.py +6 -1
- mage_ai/data_preparation/repo_manager.py +5 -2
- mage_ai/data_preparation/shared/secrets.py +6 -3
- mage_ai/data_preparation/templates/sensors/bigquery.py +32 -0
- mage_ai/data_preparation/templates/sensors/mysql.py +33 -0
- mage_ai/data_preparation/templates/sensors/postgres.py +33 -0
- mage_ai/data_preparation/templates/sensors/redshift.py +33 -0
- mage_ai/data_preparation/templates/sensors/s3.py +11 -7
- mage_ai/data_preparation/templates/sensors/snowflake.py +33 -0
- mage_ai/io/postgres.py +13 -1
- mage_ai/server/constants.py +1 -1
- mage_ai/server/frontend_dist/404.html +2 -2
- mage_ai/server/frontend_dist/404.html.html +2 -2
- mage_ai/server/frontend_dist/_next/static/WbTBCvyjQQ9UFFLZOU1E5/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/2344-f8ae030d6a6863ae.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/2626-e7fa4f83f8214c97.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{4178-663d9f70bffc7a47.js → 4178-a6d1bd2be4706f51.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/4261-88b0103fad331620.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{4538-8a3c3e47be976ede.js → 4538-347283088b83c6bf.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/5141-57c3868a80196da8.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{5477-e2cc1ca7108ebc6b.js → 5477-b439f211b6146a11.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/{5872-103815a4a043489b.js → 5872-1767c45ee6690ae5.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/{5896-f84e336fb8877027.js → 5896-10a676bcc86978cc.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/6166-705b4fdecaf11e63.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/6532-b1bd0b3f422abec8.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{7400-26ce25ec46728ef7.js → 7400-a48b270726b9eef5.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/8180-8de652170ea5ed93.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/839-15c54471a9a9bf2e.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{9386-4b9e157e18dd2c65.js → 9386-d4cc11bab74eec8d.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/{9832-c8b8970bb522f302.js → 9832-f97919376d52e3bf.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{_app-624f87faa4b5ee9a.js → _app-5f3dbed367342a3d.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-3046bc53d24917c7.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-f6059e27e601627c.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-123556bdfe2e194b.js → [...slug]-050ef37b6672100a.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-5f95bb4c3a2d7d46.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-270c0198eeef1542.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-c9f1df40e0aa6981.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-e0eb0098dcbf27ac.js → block-runs-d74850779dbe87b3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-848544a58563dbec.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{monitors-204daac985c03b62.js → monitors-675171cfd7d7b346.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/{[run]-44533e244974a422.js → [run]-b2955f0ff960894e.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{runs-2d41695001370abc.js → runs-219960b3cc4742e3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-1767a2f57f887ef7.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-0f373aaa7deb98c9.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-181343d8eb894426.js → triggers-bea0439ca2a862ba.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-7446a70bdd8381a5.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{preferences-cd6121ffe82e3834.js → preferences-997acba85f777259.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-91fbb84976467947.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-d72dfc596e943cc4.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/sign-in-c99e74aa506a6cfd.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-4c9ad80f8f9d1074.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-783b9526167f1249.js +1 -0
- mage_ai/server/frontend_dist/index.html +2 -2
- mage_ai/server/frontend_dist/manage.html +4 -4
- mage_ai/server/frontend_dist/pipeline-runs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline].html +2 -2
- mage_ai/server/frontend_dist/pipelines.html +5 -5
- mage_ai/server/frontend_dist/settings/account/profile.html +5 -5
- mage_ai/server/frontend_dist/settings/workspace/preferences.html +5 -5
- mage_ai/server/frontend_dist/settings/workspace/{sync_data.html → sync-data.html} +5 -5
- mage_ai/server/frontend_dist/settings/workspace/users.html +5 -5
- mage_ai/server/frontend_dist/settings.html +2 -2
- mage_ai/server/frontend_dist/sign-in.html +9 -9
- mage_ai/server/frontend_dist/terminal.html +5 -5
- mage_ai/server/frontend_dist/test.html +3 -3
- mage_ai/server/frontend_dist/triggers.html +5 -5
- mage_ai/server/server.py +1 -6
- mage_ai/shared/security.py +9 -2
- mage_ai/shared/utils.py +7 -0
- mage_ai/tests/data_preparation/models/block/__init__.py +0 -0
- mage_ai/tests/data_preparation/models/block/test_sql.py +42 -0
- mage_ai/tests/data_preparation/models/test_block.py +63 -0
- mage_ai/tests/shared/test_security.py +6 -6
- {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/METADATA +1 -1
- {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/RECORD +99 -92
- mage_ai/server/frontend_dist/_next/static/ErmV1Ii-luEqAYMZkbkmO/_buildManifest.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/1830-3882c19d710feadd.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/2344-a82a406b72fe782b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/2626-501fffa58c71ee7c.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3688-562e0f129b09d1cd.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3699-dcc0946dd0709216.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/4463-777e71000be29fc4.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/6532-baf1818fbc89e6c1.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/6567-2488118bb39a9d99.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-11d6cbf2313f0689.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-c226d1e215d66129.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-26974ca695994804.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-1de289b75e9c5bf1.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-72011b08ef047531.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-3b17a6d28cdde471.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-c31fec2be1160dbe.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-3a60003fce7dfb93.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-0e88f39d4980fc10.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync_data-64e03c3a285d301e.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-ff24167e21f2f0cf.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/sign-in-404d934deb8950d5.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-90abd9c4dfca2556.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-6540e41d09691d6b.js +0 -1
- /mage_ai/server/frontend_dist/_next/static/{ErmV1Ii-luEqAYMZkbkmO → WbTBCvyjQQ9UFFLZOU1E5}/_middlewareManifest.js +0 -0
- /mage_ai/server/frontend_dist/_next/static/{ErmV1Ii-luEqAYMZkbkmO → WbTBCvyjQQ9UFFLZOU1E5}/_ssgManifest.js +0 -0
- {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/LICENSE +0 -0
- {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/WHEEL +0 -0
- {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/entry_points.txt +0 -0
- {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ from mage_ai.data_preparation.executors.pipeline_executor import PipelineExecuto
|
|
|
3
3
|
from mage_ai.data_preparation.models.constants import BlockType
|
|
4
4
|
from mage_ai.data_preparation.models.pipeline import Pipeline
|
|
5
5
|
from mage_ai.data_preparation.shared.stream import StreamToLogger
|
|
6
|
+
from mage_ai.shared.hash import merge_dict
|
|
6
7
|
from typing import Callable, Dict, List, Union
|
|
7
8
|
import os
|
|
8
9
|
import yaml
|
|
@@ -83,7 +84,7 @@ class StreamingPipelineExecutor(PipelineExecutor):
|
|
|
83
84
|
if not build_block_output_stdout:
|
|
84
85
|
self.logger.exception(
|
|
85
86
|
f'Failed to execute streaming pipeline {self.pipeline.uuid}',
|
|
86
|
-
error=e,
|
|
87
|
+
**merge_dict(dict(error=e), tags),
|
|
87
88
|
)
|
|
88
89
|
raise e
|
|
89
90
|
|
|
@@ -7,6 +7,8 @@ import io
|
|
|
7
7
|
import logging
|
|
8
8
|
import os
|
|
9
9
|
|
|
10
|
+
MAX_LOG_FILE_SIZE = 5 * 1024 * 1024
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
class LoggerManager:
|
|
12
14
|
def __init__(
|
|
@@ -50,7 +52,11 @@ class LoggerManager:
|
|
|
50
52
|
handler = self.create_stream_handler()
|
|
51
53
|
else:
|
|
52
54
|
log_filepath = self.get_log_filepath(create_dir=True)
|
|
53
|
-
handler = logging.
|
|
55
|
+
handler = logging.handlers.RotatingFileHandler(
|
|
56
|
+
log_filepath,
|
|
57
|
+
backupCount=10,
|
|
58
|
+
maxBytes=MAX_LOG_FILE_SIZE,
|
|
59
|
+
)
|
|
54
60
|
|
|
55
61
|
handler.setLevel(self.log_level)
|
|
56
62
|
handler.setFormatter(self.formatter)
|
|
@@ -4,6 +4,7 @@ from inspect import Parameter, signature
|
|
|
4
4
|
from logging import Logger
|
|
5
5
|
from mage_ai.data_cleaner.shared.utils import (
|
|
6
6
|
is_geo_dataframe,
|
|
7
|
+
is_spark_dataframe,
|
|
7
8
|
)
|
|
8
9
|
from mage_ai.data_preparation.models.block.extension.utils import handle_run_tests
|
|
9
10
|
from mage_ai.data_preparation.models.block.utils import (
|
|
@@ -251,6 +252,10 @@ class Block:
|
|
|
251
252
|
self.dynamic_block_uuid = None
|
|
252
253
|
self.dynamic_upstream_block_uuids = None
|
|
253
254
|
|
|
255
|
+
# Spark session
|
|
256
|
+
self.spark = None
|
|
257
|
+
self.spark_init = False
|
|
258
|
+
|
|
254
259
|
@property
|
|
255
260
|
def uuid(self):
|
|
256
261
|
return self.dynamic_block_uuid or self._uuid
|
|
@@ -347,23 +352,22 @@ class Block:
|
|
|
347
352
|
@property
|
|
348
353
|
def full_table_name(self) -> str:
|
|
349
354
|
from mage_ai.data_preparation.models.block.sql.utils.shared import (
|
|
350
|
-
|
|
355
|
+
extract_create_statement_table_name,
|
|
356
|
+
extract_insert_statement_table_names,
|
|
351
357
|
)
|
|
352
358
|
|
|
353
359
|
if not self.content:
|
|
354
360
|
return None
|
|
355
361
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
r'\(',
|
|
360
|
-
)
|
|
362
|
+
table_name = extract_create_statement_table_name(self.content)
|
|
363
|
+
if table_name:
|
|
364
|
+
return table_name
|
|
361
365
|
|
|
362
|
-
|
|
366
|
+
matches = extract_insert_statement_table_names(self.content)
|
|
367
|
+
if len(matches) == 0:
|
|
363
368
|
return None
|
|
364
369
|
|
|
365
|
-
|
|
366
|
-
return parts[-1]
|
|
370
|
+
return matches[len(matches) - 1]
|
|
367
371
|
|
|
368
372
|
@classmethod
|
|
369
373
|
def after_create(self, block: 'Block', **kwargs):
|
|
@@ -1041,7 +1045,6 @@ class Block:
|
|
|
1041
1045
|
block_uuid,
|
|
1042
1046
|
partition=execution_partition,
|
|
1043
1047
|
)
|
|
1044
|
-
|
|
1045
1048
|
if not include_print_outputs:
|
|
1046
1049
|
all_variables = self.output_variables(execution_partition=execution_partition)
|
|
1047
1050
|
|
|
@@ -1051,6 +1054,7 @@ class Block:
|
|
|
1051
1054
|
block_uuid,
|
|
1052
1055
|
v,
|
|
1053
1056
|
partition=execution_partition,
|
|
1057
|
+
spark=self.__get_spark_session(),
|
|
1054
1058
|
)
|
|
1055
1059
|
|
|
1056
1060
|
if variable_type is not None and variable_object.variable_type != variable_type:
|
|
@@ -1059,6 +1063,7 @@ class Block:
|
|
|
1059
1063
|
data = variable_object.read_data(
|
|
1060
1064
|
sample=True,
|
|
1061
1065
|
sample_count=sample_count,
|
|
1066
|
+
spark=self.__get_spark_session(),
|
|
1062
1067
|
)
|
|
1063
1068
|
if type(data) is pd.DataFrame:
|
|
1064
1069
|
try:
|
|
@@ -1118,6 +1123,19 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
|
|
|
1118
1123
|
type=DataType.TEXT,
|
|
1119
1124
|
variable_uuid=v,
|
|
1120
1125
|
)
|
|
1126
|
+
elif is_spark_dataframe(data):
|
|
1127
|
+
df = data.toPandas()
|
|
1128
|
+
columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
|
|
1129
|
+
data = dict(
|
|
1130
|
+
sample_data=dict(
|
|
1131
|
+
columns=columns_to_display,
|
|
1132
|
+
rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
|
|
1133
|
+
),
|
|
1134
|
+
type=DataType.TABLE,
|
|
1135
|
+
variable_uuid=v,
|
|
1136
|
+
)
|
|
1137
|
+
data_products.append(data)
|
|
1138
|
+
continue
|
|
1121
1139
|
outputs.append(data)
|
|
1122
1140
|
return outputs + data_products
|
|
1123
1141
|
|
|
@@ -1154,6 +1172,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
|
|
|
1154
1172
|
block_uuid,
|
|
1155
1173
|
v,
|
|
1156
1174
|
partition=execution_partition,
|
|
1175
|
+
spark=self.__get_spark_session(),
|
|
1157
1176
|
)
|
|
1158
1177
|
|
|
1159
1178
|
if variable_type is not None and variable_object.variable_type != variable_type:
|
|
@@ -1162,6 +1181,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
|
|
|
1162
1181
|
data = await variable_object.read_data_async(
|
|
1163
1182
|
sample=True,
|
|
1164
1183
|
sample_count=sample_count,
|
|
1184
|
+
spark=self.__get_spark_session(),
|
|
1165
1185
|
)
|
|
1166
1186
|
if type(data) is pd.DataFrame:
|
|
1167
1187
|
try:
|
|
@@ -1221,6 +1241,19 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
|
|
|
1221
1241
|
type=DataType.TEXT,
|
|
1222
1242
|
variable_uuid=v,
|
|
1223
1243
|
)
|
|
1244
|
+
elif is_spark_dataframe(data):
|
|
1245
|
+
df = data.toPandas()
|
|
1246
|
+
columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
|
|
1247
|
+
data = dict(
|
|
1248
|
+
sample_data=dict(
|
|
1249
|
+
columns=columns_to_display,
|
|
1250
|
+
rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
|
|
1251
|
+
),
|
|
1252
|
+
type=DataType.TABLE,
|
|
1253
|
+
variable_uuid=v,
|
|
1254
|
+
)
|
|
1255
|
+
data_products.append(data)
|
|
1256
|
+
continue
|
|
1224
1257
|
outputs.append(data)
|
|
1225
1258
|
return outputs + data_products
|
|
1226
1259
|
|
|
@@ -1651,14 +1684,23 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
|
|
|
1651
1684
|
is_spark_env()):
|
|
1652
1685
|
global_vars = global_vars or dict()
|
|
1653
1686
|
if not global_vars.get('spark'):
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
global_vars['spark'] =
|
|
1657
|
-
os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
|
|
1658
|
-
except Exception:
|
|
1659
|
-
pass
|
|
1687
|
+
spark = self.__get_spark_session()
|
|
1688
|
+
if spark is not None:
|
|
1689
|
+
global_vars['spark'] = spark
|
|
1660
1690
|
return global_vars
|
|
1661
1691
|
|
|
1692
|
+
def __get_spark_session(self):
|
|
1693
|
+
if self.spark_init:
|
|
1694
|
+
return self.spark
|
|
1695
|
+
try:
|
|
1696
|
+
from pyspark.sql import SparkSession
|
|
1697
|
+
self.spark = SparkSession.builder.master(
|
|
1698
|
+
os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
|
|
1699
|
+
except Exception:
|
|
1700
|
+
self.spark = None
|
|
1701
|
+
self.spark_init = True
|
|
1702
|
+
return self.spark
|
|
1703
|
+
|
|
1662
1704
|
def __store_variables_prepare(
|
|
1663
1705
|
self,
|
|
1664
1706
|
variable_mapping: Dict,
|
|
@@ -1710,7 +1752,8 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
|
|
|
1710
1752
|
dynamic_block_uuid,
|
|
1711
1753
|
)
|
|
1712
1754
|
for uuid, data in variables_data['variable_mapping'].items():
|
|
1713
|
-
if spark is not None and type
|
|
1755
|
+
if spark is not None and self.pipeline.type == PipelineType.PYSPARK \
|
|
1756
|
+
and type(data) is pd.DataFrame:
|
|
1714
1757
|
data = spark.createDataFrame(data)
|
|
1715
1758
|
self.pipeline.variable_manager.add_variable(
|
|
1716
1759
|
self.pipeline.uuid,
|
|
@@ -9,6 +9,7 @@ from mage_ai.data_preparation.models.block.sql import (
|
|
|
9
9
|
trino,
|
|
10
10
|
)
|
|
11
11
|
from mage_ai.data_preparation.models.block.sql.utils.shared import (
|
|
12
|
+
has_create_or_insert_statement,
|
|
12
13
|
interpolate_vars,
|
|
13
14
|
)
|
|
14
15
|
from mage_ai.data_preparation.models.constants import BlockType
|
|
@@ -18,7 +19,9 @@ from mage_ai.io.config import ConfigFileLoader
|
|
|
18
19
|
from os import path
|
|
19
20
|
from time import sleep
|
|
20
21
|
from typing import Any, Dict, List
|
|
22
|
+
import re
|
|
21
23
|
|
|
24
|
+
MAGE_SEMI_COLON = '__MAGE_SEMI_COLON__'
|
|
22
25
|
PREVIEWABLE_BLOCK_TYPES = [
|
|
23
26
|
BlockType.DATA_EXPORTER,
|
|
24
27
|
BlockType.DATA_LOADER,
|
|
@@ -82,6 +85,7 @@ def execute_sql_code(
|
|
|
82
85
|
loader,
|
|
83
86
|
block,
|
|
84
87
|
query_string,
|
|
88
|
+
configuration=configuration,
|
|
85
89
|
should_query=should_query,
|
|
86
90
|
)
|
|
87
91
|
else:
|
|
@@ -132,6 +136,7 @@ def execute_sql_code(
|
|
|
132
136
|
loader,
|
|
133
137
|
block,
|
|
134
138
|
query_string,
|
|
139
|
+
configuration=configuration,
|
|
135
140
|
should_query=should_query,
|
|
136
141
|
)
|
|
137
142
|
else:
|
|
@@ -172,6 +177,7 @@ def execute_sql_code(
|
|
|
172
177
|
loader,
|
|
173
178
|
block,
|
|
174
179
|
query_string,
|
|
180
|
+
configuration=configuration,
|
|
175
181
|
should_query=should_query,
|
|
176
182
|
)
|
|
177
183
|
else:
|
|
@@ -209,6 +215,7 @@ def execute_sql_code(
|
|
|
209
215
|
loader,
|
|
210
216
|
block,
|
|
211
217
|
query_string,
|
|
218
|
+
configuration=configuration,
|
|
212
219
|
should_query=should_query,
|
|
213
220
|
)
|
|
214
221
|
else:
|
|
@@ -246,6 +253,7 @@ def execute_sql_code(
|
|
|
246
253
|
loader,
|
|
247
254
|
block,
|
|
248
255
|
query_string,
|
|
256
|
+
configuration=configuration,
|
|
249
257
|
should_query=should_query,
|
|
250
258
|
)
|
|
251
259
|
else:
|
|
@@ -287,6 +295,7 @@ def execute_sql_code(
|
|
|
287
295
|
loader,
|
|
288
296
|
block,
|
|
289
297
|
query_string,
|
|
298
|
+
configuration=configuration,
|
|
290
299
|
should_query=should_query,
|
|
291
300
|
)
|
|
292
301
|
else:
|
|
@@ -329,6 +338,7 @@ def execute_sql_code(
|
|
|
329
338
|
loader,
|
|
330
339
|
block,
|
|
331
340
|
query_string,
|
|
341
|
+
configuration=configuration,
|
|
332
342
|
should_query=should_query,
|
|
333
343
|
)
|
|
334
344
|
else:
|
|
@@ -354,36 +364,73 @@ def execute_sql_code(
|
|
|
354
364
|
]
|
|
355
365
|
|
|
356
366
|
|
|
367
|
+
def split_query_string(query_string: str) -> List[str]:
|
|
368
|
+
text_parts = []
|
|
369
|
+
|
|
370
|
+
matches = re.finditer(r"'(.*?)'|\"(.*?)\"", query_string, re.IGNORECASE)
|
|
371
|
+
|
|
372
|
+
previous_idx = 0
|
|
373
|
+
|
|
374
|
+
for idx, match in enumerate(matches):
|
|
375
|
+
matched_string = match.group()
|
|
376
|
+
updated_string = re.sub(r';', MAGE_SEMI_COLON, matched_string)
|
|
377
|
+
|
|
378
|
+
start_idx, end_idx = match.span()
|
|
379
|
+
|
|
380
|
+
previous_chunk = query_string[previous_idx:start_idx]
|
|
381
|
+
text_parts.append(previous_chunk)
|
|
382
|
+
text_parts.append(updated_string)
|
|
383
|
+
previous_idx = end_idx
|
|
384
|
+
|
|
385
|
+
text_parts.append(query_string[previous_idx:])
|
|
386
|
+
|
|
387
|
+
text_combined = ''.join(text_parts)
|
|
388
|
+
queries = text_combined.split(';')
|
|
389
|
+
|
|
390
|
+
arr = []
|
|
391
|
+
for query in queries:
|
|
392
|
+
query = query.strip()
|
|
393
|
+
if not query:
|
|
394
|
+
continue
|
|
395
|
+
|
|
396
|
+
lines = query.split('\n')
|
|
397
|
+
query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
|
|
398
|
+
query = query.strip()
|
|
399
|
+
query = re.sub(MAGE_SEMI_COLON, ';', query)
|
|
400
|
+
|
|
401
|
+
if query:
|
|
402
|
+
arr.append(query)
|
|
403
|
+
|
|
404
|
+
return arr
|
|
405
|
+
|
|
406
|
+
|
|
357
407
|
def execute_raw_sql(
|
|
358
408
|
loader,
|
|
359
409
|
block: 'Block',
|
|
360
410
|
query_string: str,
|
|
411
|
+
configuration: Dict = {},
|
|
361
412
|
should_query: bool = False,
|
|
362
413
|
) -> List[Any]:
|
|
363
414
|
queries = []
|
|
364
415
|
fetch_query_at_indexes = []
|
|
365
416
|
|
|
366
|
-
|
|
367
|
-
# query_string,
|
|
368
|
-
# 'create',
|
|
369
|
-
# ';',
|
|
370
|
-
# case_sensitive=True,
|
|
371
|
-
# )
|
|
372
|
-
|
|
373
|
-
# if create_statement:
|
|
374
|
-
# queries.append(create_statement)
|
|
375
|
-
# fetch_query_at_indexes.append(False)
|
|
376
|
-
|
|
377
|
-
# queries.append(query_statement)
|
|
378
|
-
# fetch_query_at_indexes.append(False)
|
|
417
|
+
has_create_or_insert = has_create_or_insert_statement(query_string)
|
|
379
418
|
|
|
380
|
-
for query in query_string
|
|
381
|
-
|
|
382
|
-
if query and not query.startswith('--'):
|
|
419
|
+
for query in split_query_string(query_string):
|
|
420
|
+
if has_create_or_insert:
|
|
383
421
|
queries.append(query)
|
|
384
422
|
fetch_query_at_indexes.append(False)
|
|
423
|
+
else:
|
|
424
|
+
if should_query:
|
|
425
|
+
query = f"""SELECT *
|
|
426
|
+
FROM (
|
|
427
|
+
{query}
|
|
428
|
+
) AS {block.table_name}__limit
|
|
429
|
+
LIMIT 1000"""
|
|
430
|
+
queries.append(query)
|
|
431
|
+
fetch_query_at_indexes.append(True)
|
|
385
432
|
|
|
386
|
-
if should_query:
|
|
433
|
+
if should_query and has_create_or_insert:
|
|
387
434
|
queries.append(f'SELECT * FROM {block.full_table_name} LIMIT 1000')
|
|
388
435
|
fetch_query_at_indexes.append(block.full_table_name)
|
|
389
436
|
|
|
@@ -47,23 +47,33 @@ def interpolate_input(block, query, replace_func=None):
|
|
|
47
47
|
for idx, upstream_block in enumerate(block.upstream_blocks):
|
|
48
48
|
matcher1 = '{} df_{} {}'.format('{{', idx + 1, '}}')
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
is_sql = BlockLanguage.SQL == upstream_block.language
|
|
51
|
+
if is_sql:
|
|
51
52
|
configuration = upstream_block.configuration
|
|
52
53
|
else:
|
|
53
54
|
configuration = block.configuration
|
|
55
|
+
use_raw_sql = configuration.get('use_raw_sql')
|
|
54
56
|
|
|
55
57
|
database = configuration.get('data_provider_database', '')
|
|
56
58
|
schema = configuration.get('data_provider_schema', '')
|
|
57
59
|
|
|
60
|
+
replace_with = __replace_func(database, schema, upstream_block.table_name)
|
|
61
|
+
upstream_block_content = upstream_block.content
|
|
62
|
+
if is_sql and use_raw_sql and not has_create_or_insert_statement(upstream_block_content):
|
|
63
|
+
upstream_query = interpolate_input(upstream_block, upstream_block_content)
|
|
64
|
+
replace_with = f"""(
|
|
65
|
+
{upstream_query}
|
|
66
|
+
) AS {upstream_block.table_name}"""
|
|
67
|
+
|
|
58
68
|
query = re.sub(
|
|
59
69
|
'{}[ ]*df_{}[ ]*{}'.format(r'\{\{', idx + 1, r'\}\}'),
|
|
60
|
-
|
|
70
|
+
replace_with,
|
|
61
71
|
query,
|
|
62
72
|
)
|
|
63
73
|
|
|
64
74
|
query = query.replace(
|
|
65
75
|
f'{matcher1}',
|
|
66
|
-
|
|
76
|
+
replace_with,
|
|
67
77
|
)
|
|
68
78
|
|
|
69
79
|
return query
|
|
@@ -170,3 +180,39 @@ def extract_and_replace_text_between_strings(
|
|
|
170
180
|
new_text = text[0:max(start_idx - 1, 0)] + replace_string + text[end_idx + 1:]
|
|
171
181
|
|
|
172
182
|
return extracted_text, new_text
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def remove_comments(text: str) -> str:
|
|
186
|
+
lines = text.split('\n')
|
|
187
|
+
return '\n'.join(line for line in lines if not line.startswith('--'))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def extract_create_statement_table_name(text: str) -> str:
|
|
191
|
+
statement_partial, _ = extract_and_replace_text_between_strings(
|
|
192
|
+
remove_comments(text),
|
|
193
|
+
r'create table(?: if not exists)*',
|
|
194
|
+
r'\(',
|
|
195
|
+
)
|
|
196
|
+
if not statement_partial:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
parts = statement_partial[:len(statement_partial) - 1].strip().split(' ')
|
|
200
|
+
return parts[-1]
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_insert_statement_table_names(text: str) -> List[str]:
|
|
204
|
+
matches = re.findall(
|
|
205
|
+
r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
|
|
206
|
+
remove_comments(text),
|
|
207
|
+
re.IGNORECASE,
|
|
208
|
+
)
|
|
209
|
+
return matches
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def has_create_or_insert_statement(text: str) -> bool:
|
|
213
|
+
table_name = extract_create_statement_table_name(text)
|
|
214
|
+
if table_name:
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
matches = extract_insert_statement_table_names(text)
|
|
218
|
+
return len(matches) >= 1
|
|
@@ -175,6 +175,8 @@ class Variable:
|
|
|
175
175
|
"""
|
|
176
176
|
if self.variable_type == VariableType.DATAFRAME:
|
|
177
177
|
return self.__read_parquet(sample=sample, sample_count=sample_count)
|
|
178
|
+
elif self.variable_type == VariableType.SPARK_DATAFRAME:
|
|
179
|
+
return self.__read_spark_parquet(sample=sample, sample_count=sample_count, spark=spark)
|
|
178
180
|
elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
179
181
|
return await self.__read_dataframe_analysis_async(
|
|
180
182
|
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
@@ -367,7 +369,7 @@ class Variable:
|
|
|
367
369
|
def __read_spark_parquet(self, sample: bool = False, sample_count: int = None, spark=None):
|
|
368
370
|
if spark is None:
|
|
369
371
|
return None
|
|
370
|
-
|
|
372
|
+
df = (
|
|
371
373
|
spark.read
|
|
372
374
|
.format('csv')
|
|
373
375
|
.option('header', 'true')
|
|
@@ -375,6 +377,9 @@ class Variable:
|
|
|
375
377
|
.option('delimiter', ',')
|
|
376
378
|
.load(self.variable_path)
|
|
377
379
|
)
|
|
380
|
+
if sample and sample_count:
|
|
381
|
+
df = df.limit(sample_count)
|
|
382
|
+
return df
|
|
378
383
|
|
|
379
384
|
def __write_geo_dataframe(self, data) -> None:
|
|
380
385
|
os.makedirs(self.variable_path, exist_ok=True)
|
|
@@ -114,12 +114,15 @@ def init_repo(repo_path: str) -> None:
|
|
|
114
114
|
if os.path.exists(repo_path):
|
|
115
115
|
raise FileExistsError(f'Repository {repo_path} already exists')
|
|
116
116
|
|
|
117
|
-
os.makedirs(
|
|
117
|
+
os.makedirs(
|
|
118
|
+
os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR,
|
|
119
|
+
exist_ok=True,
|
|
120
|
+
)
|
|
118
121
|
copy_template_directory('repo', repo_path)
|
|
119
122
|
|
|
120
123
|
|
|
121
124
|
def get_data_dir() -> str:
|
|
122
|
-
return os.getenv(MAGE_DATA_DIR_ENV_VAR
|
|
125
|
+
return os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR
|
|
123
126
|
|
|
124
127
|
|
|
125
128
|
def get_repo_name() -> str:
|
|
@@ -66,6 +66,9 @@ def get_secret_value(name: str) -> str:
|
|
|
66
66
|
from mage_ai.orchestration.db.models import Secret
|
|
67
67
|
fernet = Fernet(get_encryption_key())
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
try:
|
|
70
|
+
secret = Secret.query.filter(Secret.name == name).one_or_none()
|
|
71
|
+
if secret:
|
|
72
|
+
return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
|
|
73
|
+
except Exception:
|
|
74
|
+
print(f'WARNING: Could not find secret value for secret {name}')
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.bigquery import BigQuery
|
|
3
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_bigquery_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a BigQuery query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your BigQuery query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
loader = BigQuery.with_config(ConfigFileLoader(config_path, config_profile))
|
|
26
|
+
df = loader.load(query)
|
|
27
|
+
|
|
28
|
+
# Add your checks here
|
|
29
|
+
if df.empty:
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
return True
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.mysql import MySQL
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_mysql_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a MySQL query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your MySQL query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with MySQL.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.postgres import Postgres
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_postgres_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a Postgres query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your Postgres query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with Postgres.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.redshift import Redshift
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_redshift_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a Redshift query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your Redshift query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with Redshift.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|