mage-ai 0.8.26__py3-none-any.whl → 0.8.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mage-ai might be problematic. Click here for more details.
- mage_ai/data_preparation/executors/streaming_pipeline_executor.py +2 -1
- mage_ai/data_preparation/logging/logger_manager.py +7 -1
- mage_ai/data_preparation/models/block/__init__.py +60 -29
- mage_ai/data_preparation/models/block/sql/__init__.py +25 -8
- mage_ai/data_preparation/models/block/sql/utils/shared.py +49 -3
- mage_ai/data_preparation/models/variable.py +6 -1
- mage_ai/data_preparation/repo_manager.py +5 -2
- mage_ai/data_preparation/shared/secrets.py +6 -3
- mage_ai/data_preparation/templates/sensors/bigquery.py +32 -0
- mage_ai/data_preparation/templates/sensors/mysql.py +33 -0
- mage_ai/data_preparation/templates/sensors/postgres.py +33 -0
- mage_ai/data_preparation/templates/sensors/redshift.py +33 -0
- mage_ai/data_preparation/templates/sensors/s3.py +11 -7
- mage_ai/data_preparation/templates/sensors/snowflake.py +33 -0
- mage_ai/io/postgres.py +13 -1
- mage_ai/server/constants.py +1 -1
- mage_ai/server/frontend_dist/404.html +2 -2
- mage_ai/server/frontend_dist/404.html.html +2 -2
- mage_ai/server/frontend_dist/_next/static/WbTBCvyjQQ9UFFLZOU1E5/_buildManifest.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/2344-f8ae030d6a6863ae.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/2626-e7fa4f83f8214c97.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{4178-663d9f70bffc7a47.js → 4178-a6d1bd2be4706f51.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/4261-88b0103fad331620.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/5141-57c3868a80196da8.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/6166-705b4fdecaf11e63.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/6532-b1bd0b3f422abec8.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/8180-8de652170ea5ed93.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/839-15c54471a9a9bf2e.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/{9386-9d6a4e5836229264.js → 9386-d4cc11bab74eec8d.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/{_app-624f87faa4b5ee9a.js → _app-5f3dbed367342a3d.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-3046bc53d24917c7.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-f6059e27e601627c.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-123556bdfe2e194b.js → [...slug]-050ef37b6672100a.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-5f95bb4c3a2d7d46.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-270c0198eeef1542.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-c9f1df40e0aa6981.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-e0eb0098dcbf27ac.js → block-runs-d74850779dbe87b3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-848544a58563dbec.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{monitors-204daac985c03b62.js → monitors-675171cfd7d7b346.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/{[run]-44533e244974a422.js → [run]-b2955f0ff960894e.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{runs-2d41695001370abc.js → runs-219960b3cc4742e3.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-1767a2f57f887ef7.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-0f373aaa7deb98c9.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-181343d8eb894426.js → triggers-bea0439ca2a862ba.js} +1 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-7446a70bdd8381a5.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-d72dfc596e943cc4.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-4c9ad80f8f9d1074.js +1 -0
- mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-783b9526167f1249.js +1 -0
- mage_ai/server/frontend_dist/index.html +2 -2
- mage_ai/server/frontend_dist/manage.html +4 -4
- mage_ai/server/frontend_dist/pipeline-runs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +2 -2
- mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +5 -5
- mage_ai/server/frontend_dist/pipelines/[pipeline].html +2 -2
- mage_ai/server/frontend_dist/pipelines.html +5 -5
- mage_ai/server/frontend_dist/settings/account/profile.html +5 -5
- mage_ai/server/frontend_dist/settings/workspace/preferences.html +5 -5
- mage_ai/server/frontend_dist/settings/workspace/sync-data.html +5 -5
- mage_ai/server/frontend_dist/settings/workspace/users.html +5 -5
- mage_ai/server/frontend_dist/settings.html +2 -2
- mage_ai/server/frontend_dist/sign-in.html +9 -9
- mage_ai/server/frontend_dist/terminal.html +5 -5
- mage_ai/server/frontend_dist/test.html +3 -3
- mage_ai/server/frontend_dist/triggers.html +5 -5
- mage_ai/server/server.py +1 -6
- mage_ai/shared/security.py +9 -2
- mage_ai/shared/utils.py +7 -0
- mage_ai/tests/shared/test_security.py +6 -6
- {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/METADATA +1 -1
- {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/RECORD +86 -81
- mage_ai/server/frontend_dist/_next/static/chunks/1830-3882c19d710feadd.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/2344-a82a406b72fe782b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/2626-30c0fab7c3926578.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3688-562e0f129b09d1cd.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/3699-dcc0946dd0709216.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/4463-777e71000be29fc4.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/6532-baf1818fbc89e6c1.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/6567-2488118bb39a9d99.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-c580ee38f5442bef.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-dd72d2f3375064cf.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-26974ca695994804.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-df603787c041cd8a.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-72011b08ef047531.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-3b17a6d28cdde471.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-c31fec2be1160dbe.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-3a60003fce7dfb93.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-ebb4a57934e4fa52.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-ff24167e21f2f0cf.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-be9eab29e8ed712b.js +0 -1
- mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-e9c1789f6d5a0429.js +0 -1
- mage_ai/server/frontend_dist/_next/static/kiWhqtXdRSgsbVPwfDLY4/_buildManifest.js +0 -1
- /mage_ai/server/frontend_dist/_next/static/{kiWhqtXdRSgsbVPwfDLY4 → WbTBCvyjQQ9UFFLZOU1E5}/_middlewareManifest.js +0 -0
- /mage_ai/server/frontend_dist/_next/static/{kiWhqtXdRSgsbVPwfDLY4 → WbTBCvyjQQ9UFFLZOU1E5}/_ssgManifest.js +0 -0
- {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/LICENSE +0 -0
- {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/WHEEL +0 -0
- {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/entry_points.txt +0 -0
- {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ from mage_ai.data_preparation.executors.pipeline_executor import PipelineExecuto
|
|
|
3
3
|
from mage_ai.data_preparation.models.constants import BlockType
|
|
4
4
|
from mage_ai.data_preparation.models.pipeline import Pipeline
|
|
5
5
|
from mage_ai.data_preparation.shared.stream import StreamToLogger
|
|
6
|
+
from mage_ai.shared.hash import merge_dict
|
|
6
7
|
from typing import Callable, Dict, List, Union
|
|
7
8
|
import os
|
|
8
9
|
import yaml
|
|
@@ -83,7 +84,7 @@ class StreamingPipelineExecutor(PipelineExecutor):
|
|
|
83
84
|
if not build_block_output_stdout:
|
|
84
85
|
self.logger.exception(
|
|
85
86
|
f'Failed to execute streaming pipeline {self.pipeline.uuid}',
|
|
86
|
-
error=e,
|
|
87
|
+
**merge_dict(dict(error=e), tags),
|
|
87
88
|
)
|
|
88
89
|
raise e
|
|
89
90
|
|
|
@@ -7,6 +7,8 @@ import io
|
|
|
7
7
|
import logging
|
|
8
8
|
import os
|
|
9
9
|
|
|
10
|
+
MAX_LOG_FILE_SIZE = 5 * 1024 * 1024
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
class LoggerManager:
|
|
12
14
|
def __init__(
|
|
@@ -50,7 +52,11 @@ class LoggerManager:
|
|
|
50
52
|
handler = self.create_stream_handler()
|
|
51
53
|
else:
|
|
52
54
|
log_filepath = self.get_log_filepath(create_dir=True)
|
|
53
|
-
handler = logging.
|
|
55
|
+
handler = logging.handlers.RotatingFileHandler(
|
|
56
|
+
log_filepath,
|
|
57
|
+
backupCount=10,
|
|
58
|
+
maxBytes=MAX_LOG_FILE_SIZE,
|
|
59
|
+
)
|
|
54
60
|
|
|
55
61
|
handler.setLevel(self.log_level)
|
|
56
62
|
handler.setFormatter(self.formatter)
|
|
@@ -4,6 +4,7 @@ from inspect import Parameter, signature
|
|
|
4
4
|
from logging import Logger
|
|
5
5
|
from mage_ai.data_cleaner.shared.utils import (
|
|
6
6
|
is_geo_dataframe,
|
|
7
|
+
is_spark_dataframe,
|
|
7
8
|
)
|
|
8
9
|
from mage_ai.data_preparation.models.block.extension.utils import handle_run_tests
|
|
9
10
|
from mage_ai.data_preparation.models.block.utils import (
|
|
@@ -50,7 +51,6 @@ import functools
|
|
|
50
51
|
import json
|
|
51
52
|
import os
|
|
52
53
|
import pandas as pd
|
|
53
|
-
import re
|
|
54
54
|
import simplejson
|
|
55
55
|
import sys
|
|
56
56
|
import time
|
|
@@ -252,6 +252,10 @@ class Block:
|
|
|
252
252
|
self.dynamic_block_uuid = None
|
|
253
253
|
self.dynamic_upstream_block_uuids = None
|
|
254
254
|
|
|
255
|
+
# Spark session
|
|
256
|
+
self.spark = None
|
|
257
|
+
self.spark_init = False
|
|
258
|
+
|
|
255
259
|
@property
|
|
256
260
|
def uuid(self):
|
|
257
261
|
return self.dynamic_block_uuid or self._uuid
|
|
@@ -348,34 +352,22 @@ class Block:
|
|
|
348
352
|
@property
|
|
349
353
|
def full_table_name(self) -> str:
|
|
350
354
|
from mage_ai.data_preparation.models.block.sql.utils.shared import (
|
|
351
|
-
|
|
355
|
+
extract_create_statement_table_name,
|
|
356
|
+
extract_insert_statement_table_names,
|
|
352
357
|
)
|
|
353
358
|
|
|
354
359
|
if not self.content:
|
|
355
360
|
return None
|
|
356
361
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
r'\(',
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
if not statement_partial:
|
|
364
|
-
matches = re.findall(
|
|
365
|
-
r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
|
|
366
|
-
self.content,
|
|
367
|
-
re.IGNORECASE,
|
|
368
|
-
)
|
|
369
|
-
if len(matches) >= 1:
|
|
370
|
-
return matches[len(matches) - 1]
|
|
371
|
-
else:
|
|
372
|
-
return None
|
|
362
|
+
table_name = extract_create_statement_table_name(self.content)
|
|
363
|
+
if table_name:
|
|
364
|
+
return table_name
|
|
373
365
|
|
|
374
|
-
|
|
366
|
+
matches = extract_insert_statement_table_names(self.content)
|
|
367
|
+
if len(matches) == 0:
|
|
375
368
|
return None
|
|
376
369
|
|
|
377
|
-
|
|
378
|
-
return parts[-1]
|
|
370
|
+
return matches[len(matches) - 1]
|
|
379
371
|
|
|
380
372
|
@classmethod
|
|
381
373
|
def after_create(self, block: 'Block', **kwargs):
|
|
@@ -1053,7 +1045,6 @@ class Block:
|
|
|
1053
1045
|
block_uuid,
|
|
1054
1046
|
partition=execution_partition,
|
|
1055
1047
|
)
|
|
1056
|
-
|
|
1057
1048
|
if not include_print_outputs:
|
|
1058
1049
|
all_variables = self.output_variables(execution_partition=execution_partition)
|
|
1059
1050
|
|
|
@@ -1063,6 +1054,7 @@ class Block:
|
|
|
1063
1054
|
block_uuid,
|
|
1064
1055
|
v,
|
|
1065
1056
|
partition=execution_partition,
|
|
1057
|
+
spark=self.__get_spark_session(),
|
|
1066
1058
|
)
|
|
1067
1059
|
|
|
1068
1060
|
if variable_type is not None and variable_object.variable_type != variable_type:
|
|
@@ -1071,6 +1063,7 @@ class Block:
|
|
|
1071
1063
|
data = variable_object.read_data(
|
|
1072
1064
|
sample=True,
|
|
1073
1065
|
sample_count=sample_count,
|
|
1066
|
+
spark=self.__get_spark_session(),
|
|
1074
1067
|
)
|
|
1075
1068
|
if type(data) is pd.DataFrame:
|
|
1076
1069
|
try:
|
|
@@ -1130,6 +1123,19 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
|
|
|
1130
1123
|
type=DataType.TEXT,
|
|
1131
1124
|
variable_uuid=v,
|
|
1132
1125
|
)
|
|
1126
|
+
elif is_spark_dataframe(data):
|
|
1127
|
+
df = data.toPandas()
|
|
1128
|
+
columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
|
|
1129
|
+
data = dict(
|
|
1130
|
+
sample_data=dict(
|
|
1131
|
+
columns=columns_to_display,
|
|
1132
|
+
rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
|
|
1133
|
+
),
|
|
1134
|
+
type=DataType.TABLE,
|
|
1135
|
+
variable_uuid=v,
|
|
1136
|
+
)
|
|
1137
|
+
data_products.append(data)
|
|
1138
|
+
continue
|
|
1133
1139
|
outputs.append(data)
|
|
1134
1140
|
return outputs + data_products
|
|
1135
1141
|
|
|
@@ -1166,6 +1172,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
|
|
|
1166
1172
|
block_uuid,
|
|
1167
1173
|
v,
|
|
1168
1174
|
partition=execution_partition,
|
|
1175
|
+
spark=self.__get_spark_session(),
|
|
1169
1176
|
)
|
|
1170
1177
|
|
|
1171
1178
|
if variable_type is not None and variable_object.variable_type != variable_type:
|
|
@@ -1174,6 +1181,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
|
|
|
1174
1181
|
data = await variable_object.read_data_async(
|
|
1175
1182
|
sample=True,
|
|
1176
1183
|
sample_count=sample_count,
|
|
1184
|
+
spark=self.__get_spark_session(),
|
|
1177
1185
|
)
|
|
1178
1186
|
if type(data) is pd.DataFrame:
|
|
1179
1187
|
try:
|
|
@@ -1233,6 +1241,19 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
|
|
|
1233
1241
|
type=DataType.TEXT,
|
|
1234
1242
|
variable_uuid=v,
|
|
1235
1243
|
)
|
|
1244
|
+
elif is_spark_dataframe(data):
|
|
1245
|
+
df = data.toPandas()
|
|
1246
|
+
columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
|
|
1247
|
+
data = dict(
|
|
1248
|
+
sample_data=dict(
|
|
1249
|
+
columns=columns_to_display,
|
|
1250
|
+
rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
|
|
1251
|
+
),
|
|
1252
|
+
type=DataType.TABLE,
|
|
1253
|
+
variable_uuid=v,
|
|
1254
|
+
)
|
|
1255
|
+
data_products.append(data)
|
|
1256
|
+
continue
|
|
1236
1257
|
outputs.append(data)
|
|
1237
1258
|
return outputs + data_products
|
|
1238
1259
|
|
|
@@ -1663,14 +1684,23 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
|
|
|
1663
1684
|
is_spark_env()):
|
|
1664
1685
|
global_vars = global_vars or dict()
|
|
1665
1686
|
if not global_vars.get('spark'):
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
global_vars['spark'] =
|
|
1669
|
-
os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
|
|
1670
|
-
except Exception:
|
|
1671
|
-
pass
|
|
1687
|
+
spark = self.__get_spark_session()
|
|
1688
|
+
if spark is not None:
|
|
1689
|
+
global_vars['spark'] = spark
|
|
1672
1690
|
return global_vars
|
|
1673
1691
|
|
|
1692
|
+
def __get_spark_session(self):
|
|
1693
|
+
if self.spark_init:
|
|
1694
|
+
return self.spark
|
|
1695
|
+
try:
|
|
1696
|
+
from pyspark.sql import SparkSession
|
|
1697
|
+
self.spark = SparkSession.builder.master(
|
|
1698
|
+
os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
|
|
1699
|
+
except Exception:
|
|
1700
|
+
self.spark = None
|
|
1701
|
+
self.spark_init = True
|
|
1702
|
+
return self.spark
|
|
1703
|
+
|
|
1674
1704
|
def __store_variables_prepare(
|
|
1675
1705
|
self,
|
|
1676
1706
|
variable_mapping: Dict,
|
|
@@ -1722,7 +1752,8 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
|
|
|
1722
1752
|
dynamic_block_uuid,
|
|
1723
1753
|
)
|
|
1724
1754
|
for uuid, data in variables_data['variable_mapping'].items():
|
|
1725
|
-
if spark is not None and type
|
|
1755
|
+
if spark is not None and self.pipeline.type == PipelineType.PYSPARK \
|
|
1756
|
+
and type(data) is pd.DataFrame:
|
|
1726
1757
|
data = spark.createDataFrame(data)
|
|
1727
1758
|
self.pipeline.variable_manager.add_variable(
|
|
1728
1759
|
self.pipeline.uuid,
|
|
@@ -9,6 +9,7 @@ from mage_ai.data_preparation.models.block.sql import (
|
|
|
9
9
|
trino,
|
|
10
10
|
)
|
|
11
11
|
from mage_ai.data_preparation.models.block.sql.utils.shared import (
|
|
12
|
+
has_create_or_insert_statement,
|
|
12
13
|
interpolate_vars,
|
|
13
14
|
)
|
|
14
15
|
from mage_ai.data_preparation.models.constants import BlockType
|
|
@@ -389,11 +390,15 @@ def split_query_string(query_string: str) -> List[str]:
|
|
|
389
390
|
arr = []
|
|
390
391
|
for query in queries:
|
|
391
392
|
query = query.strip()
|
|
393
|
+
if not query:
|
|
394
|
+
continue
|
|
395
|
+
|
|
396
|
+
lines = query.split('\n')
|
|
397
|
+
query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
|
|
398
|
+
query = query.strip()
|
|
399
|
+
query = re.sub(MAGE_SEMI_COLON, ';', query)
|
|
400
|
+
|
|
392
401
|
if query:
|
|
393
|
-
lines = query.split('\n')
|
|
394
|
-
query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
|
|
395
|
-
query = query.strip()
|
|
396
|
-
query = re.sub(MAGE_SEMI_COLON, ';', query)
|
|
397
402
|
arr.append(query)
|
|
398
403
|
|
|
399
404
|
return arr
|
|
@@ -409,11 +414,23 @@ def execute_raw_sql(
|
|
|
409
414
|
queries = []
|
|
410
415
|
fetch_query_at_indexes = []
|
|
411
416
|
|
|
412
|
-
|
|
413
|
-
queries.append(query)
|
|
414
|
-
fetch_query_at_indexes.append(False)
|
|
417
|
+
has_create_or_insert = has_create_or_insert_statement(query_string)
|
|
415
418
|
|
|
416
|
-
|
|
419
|
+
for query in split_query_string(query_string):
|
|
420
|
+
if has_create_or_insert:
|
|
421
|
+
queries.append(query)
|
|
422
|
+
fetch_query_at_indexes.append(False)
|
|
423
|
+
else:
|
|
424
|
+
if should_query:
|
|
425
|
+
query = f"""SELECT *
|
|
426
|
+
FROM (
|
|
427
|
+
{query}
|
|
428
|
+
) AS {block.table_name}__limit
|
|
429
|
+
LIMIT 1000"""
|
|
430
|
+
queries.append(query)
|
|
431
|
+
fetch_query_at_indexes.append(True)
|
|
432
|
+
|
|
433
|
+
if should_query and has_create_or_insert:
|
|
417
434
|
queries.append(f'SELECT * FROM {block.full_table_name} LIMIT 1000')
|
|
418
435
|
fetch_query_at_indexes.append(block.full_table_name)
|
|
419
436
|
|
|
@@ -47,23 +47,33 @@ def interpolate_input(block, query, replace_func=None):
|
|
|
47
47
|
for idx, upstream_block in enumerate(block.upstream_blocks):
|
|
48
48
|
matcher1 = '{} df_{} {}'.format('{{', idx + 1, '}}')
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
is_sql = BlockLanguage.SQL == upstream_block.language
|
|
51
|
+
if is_sql:
|
|
51
52
|
configuration = upstream_block.configuration
|
|
52
53
|
else:
|
|
53
54
|
configuration = block.configuration
|
|
55
|
+
use_raw_sql = configuration.get('use_raw_sql')
|
|
54
56
|
|
|
55
57
|
database = configuration.get('data_provider_database', '')
|
|
56
58
|
schema = configuration.get('data_provider_schema', '')
|
|
57
59
|
|
|
60
|
+
replace_with = __replace_func(database, schema, upstream_block.table_name)
|
|
61
|
+
upstream_block_content = upstream_block.content
|
|
62
|
+
if is_sql and use_raw_sql and not has_create_or_insert_statement(upstream_block_content):
|
|
63
|
+
upstream_query = interpolate_input(upstream_block, upstream_block_content)
|
|
64
|
+
replace_with = f"""(
|
|
65
|
+
{upstream_query}
|
|
66
|
+
) AS {upstream_block.table_name}"""
|
|
67
|
+
|
|
58
68
|
query = re.sub(
|
|
59
69
|
'{}[ ]*df_{}[ ]*{}'.format(r'\{\{', idx + 1, r'\}\}'),
|
|
60
|
-
|
|
70
|
+
replace_with,
|
|
61
71
|
query,
|
|
62
72
|
)
|
|
63
73
|
|
|
64
74
|
query = query.replace(
|
|
65
75
|
f'{matcher1}',
|
|
66
|
-
|
|
76
|
+
replace_with,
|
|
67
77
|
)
|
|
68
78
|
|
|
69
79
|
return query
|
|
@@ -170,3 +180,39 @@ def extract_and_replace_text_between_strings(
|
|
|
170
180
|
new_text = text[0:max(start_idx - 1, 0)] + replace_string + text[end_idx + 1:]
|
|
171
181
|
|
|
172
182
|
return extracted_text, new_text
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def remove_comments(text: str) -> str:
|
|
186
|
+
lines = text.split('\n')
|
|
187
|
+
return '\n'.join(line for line in lines if not line.startswith('--'))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def extract_create_statement_table_name(text: str) -> str:
|
|
191
|
+
statement_partial, _ = extract_and_replace_text_between_strings(
|
|
192
|
+
remove_comments(text),
|
|
193
|
+
r'create table(?: if not exists)*',
|
|
194
|
+
r'\(',
|
|
195
|
+
)
|
|
196
|
+
if not statement_partial:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
parts = statement_partial[:len(statement_partial) - 1].strip().split(' ')
|
|
200
|
+
return parts[-1]
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_insert_statement_table_names(text: str) -> List[str]:
|
|
204
|
+
matches = re.findall(
|
|
205
|
+
r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
|
|
206
|
+
remove_comments(text),
|
|
207
|
+
re.IGNORECASE,
|
|
208
|
+
)
|
|
209
|
+
return matches
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def has_create_or_insert_statement(text: str) -> bool:
|
|
213
|
+
table_name = extract_create_statement_table_name(text)
|
|
214
|
+
if table_name:
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
matches = extract_insert_statement_table_names(text)
|
|
218
|
+
return len(matches) >= 1
|
|
@@ -175,6 +175,8 @@ class Variable:
|
|
|
175
175
|
"""
|
|
176
176
|
if self.variable_type == VariableType.DATAFRAME:
|
|
177
177
|
return self.__read_parquet(sample=sample, sample_count=sample_count)
|
|
178
|
+
elif self.variable_type == VariableType.SPARK_DATAFRAME:
|
|
179
|
+
return self.__read_spark_parquet(sample=sample, sample_count=sample_count, spark=spark)
|
|
178
180
|
elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
|
|
179
181
|
return await self.__read_dataframe_analysis_async(
|
|
180
182
|
dataframe_analysis_keys=dataframe_analysis_keys,
|
|
@@ -367,7 +369,7 @@ class Variable:
|
|
|
367
369
|
def __read_spark_parquet(self, sample: bool = False, sample_count: int = None, spark=None):
|
|
368
370
|
if spark is None:
|
|
369
371
|
return None
|
|
370
|
-
|
|
372
|
+
df = (
|
|
371
373
|
spark.read
|
|
372
374
|
.format('csv')
|
|
373
375
|
.option('header', 'true')
|
|
@@ -375,6 +377,9 @@ class Variable:
|
|
|
375
377
|
.option('delimiter', ',')
|
|
376
378
|
.load(self.variable_path)
|
|
377
379
|
)
|
|
380
|
+
if sample and sample_count:
|
|
381
|
+
df = df.limit(sample_count)
|
|
382
|
+
return df
|
|
378
383
|
|
|
379
384
|
def __write_geo_dataframe(self, data) -> None:
|
|
380
385
|
os.makedirs(self.variable_path, exist_ok=True)
|
|
@@ -114,12 +114,15 @@ def init_repo(repo_path: str) -> None:
|
|
|
114
114
|
if os.path.exists(repo_path):
|
|
115
115
|
raise FileExistsError(f'Repository {repo_path} already exists')
|
|
116
116
|
|
|
117
|
-
os.makedirs(
|
|
117
|
+
os.makedirs(
|
|
118
|
+
os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR,
|
|
119
|
+
exist_ok=True,
|
|
120
|
+
)
|
|
118
121
|
copy_template_directory('repo', repo_path)
|
|
119
122
|
|
|
120
123
|
|
|
121
124
|
def get_data_dir() -> str:
|
|
122
|
-
return os.getenv(MAGE_DATA_DIR_ENV_VAR
|
|
125
|
+
return os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR
|
|
123
126
|
|
|
124
127
|
|
|
125
128
|
def get_repo_name() -> str:
|
|
@@ -66,6 +66,9 @@ def get_secret_value(name: str) -> str:
|
|
|
66
66
|
from mage_ai.orchestration.db.models import Secret
|
|
67
67
|
fernet = Fernet(get_encryption_key())
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
try:
|
|
70
|
+
secret = Secret.query.filter(Secret.name == name).one_or_none()
|
|
71
|
+
if secret:
|
|
72
|
+
return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
|
|
73
|
+
except Exception:
|
|
74
|
+
print(f'WARNING: Could not find secret value for secret {name}')
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.bigquery import BigQuery
|
|
3
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_bigquery_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a BigQuery query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your BigQuery query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
loader = BigQuery.with_config(ConfigFileLoader(config_path, config_profile))
|
|
26
|
+
df = loader.load(query)
|
|
27
|
+
|
|
28
|
+
# Add your checks here
|
|
29
|
+
if df.empty:
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
return True
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.mysql import MySQL
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_mysql_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a MySQL query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your MySQL query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with MySQL.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.postgres import Postgres
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_postgres_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a Postgres query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your Postgres query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with Postgres.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.redshift import Redshift
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_redshift_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a Redshift query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your Redshift query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with Redshift.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|
|
@@ -3,9 +3,6 @@ from mage_ai.io.config import ConfigFileLoader
|
|
|
3
3
|
from mage_ai.io.s3 import S3
|
|
4
4
|
from os import path
|
|
5
5
|
|
|
6
|
-
import time
|
|
7
|
-
|
|
8
|
-
|
|
9
6
|
if 'sensor' not in globals():
|
|
10
7
|
from mage_ai.data_preparation.decorators import sensor
|
|
11
8
|
|
|
@@ -13,15 +10,22 @@ if 'sensor' not in globals():
|
|
|
13
10
|
@sensor
|
|
14
11
|
def check_condition(**kwargs) -> bool:
|
|
15
12
|
"""
|
|
16
|
-
Template code for checking if a
|
|
13
|
+
Template code for checking if a file or folder exists in a S3 bucket
|
|
14
|
+
|
|
15
|
+
You will also need to fill out the following AWS related fields
|
|
16
|
+
in `io_config.yaml`:
|
|
17
|
+
- AWS_ACCESS_KEY_ID
|
|
18
|
+
- AWS_SECRET_ACCESS_KEY
|
|
19
|
+
- AWS_REGION
|
|
17
20
|
"""
|
|
18
21
|
|
|
19
22
|
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
20
23
|
config_profile = 'default'
|
|
21
24
|
|
|
22
25
|
bucket_name = 'your_bucket_name'
|
|
23
|
-
|
|
26
|
+
s3_path = 'path/to/folder/or/file'
|
|
24
27
|
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
config_file_loader = ConfigFileLoader(config_path, config_profile)
|
|
29
|
+
return S3.with_config(config_file_loader).exists(
|
|
30
|
+
bucket_name, s3_path
|
|
27
31
|
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mage_ai.data_preparation.repo_manager import get_repo_path
|
|
2
|
+
from mage_ai.io.config import ConfigFileLoader
|
|
3
|
+
from mage_ai.io.snowflake import Snowflake
|
|
4
|
+
from os import path
|
|
5
|
+
|
|
6
|
+
if 'sensor' not in globals():
|
|
7
|
+
from mage_ai.data_preparation.decorators import sensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@sensor
|
|
11
|
+
def query_snowflake_and_check_condition(**kwargs) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Template code for checking the results of a Snowflake query.
|
|
14
|
+
Specify your configuration settings in 'io_config.yaml'.
|
|
15
|
+
|
|
16
|
+
Return: True if the sensor should complete, False if it should
|
|
17
|
+
keep waiting
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
config_path = path.join(get_repo_path(), 'io_config.yaml')
|
|
21
|
+
config_profile = 'default'
|
|
22
|
+
|
|
23
|
+
query = 'Your Snowflake query' # Specify your SQL query here
|
|
24
|
+
|
|
25
|
+
with Snowflake.with_config(
|
|
26
|
+
ConfigFileLoader(config_path, config_profile)) as loader:
|
|
27
|
+
df = loader.load(query)
|
|
28
|
+
|
|
29
|
+
# Add your checks here
|
|
30
|
+
if df.empty:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return True
|
mage_ai/io/postgres.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from mage_ai.io.config import BaseConfigLoader, ConfigKey
|
|
2
2
|
from mage_ai.io.export_utils import BadConversionError, PandasTypes
|
|
3
3
|
from mage_ai.io.sql import BaseSQL
|
|
4
|
+
from mage_ai.shared.utils import is_port_in_use
|
|
4
5
|
from pandas import DataFrame, Series
|
|
5
6
|
from psycopg2 import connect, _psycopg
|
|
6
7
|
from sshtunnel import SSHTunnelForwarder
|
|
@@ -87,10 +88,21 @@ class Postgres(BaseSQL):
|
|
|
87
88
|
ssh_setting['ssh_pkey'] = self.settings['ssh_pkey']
|
|
88
89
|
else:
|
|
89
90
|
ssh_setting['ssh_password'] = self.settings['ssh_password']
|
|
91
|
+
|
|
92
|
+
# Find an available local port
|
|
93
|
+
local_port = port
|
|
94
|
+
max_local_port = local_port + 100
|
|
95
|
+
while is_port_in_use(local_port):
|
|
96
|
+
if local_port > max_local_port:
|
|
97
|
+
raise Exception(
|
|
98
|
+
'Unable to find an open port, please clear your running processes '
|
|
99
|
+
'if possible.'
|
|
100
|
+
)
|
|
101
|
+
local_port += 1
|
|
90
102
|
self.ssh_tunnel = SSHTunnelForwarder(
|
|
91
103
|
(self.settings['ssh_host'], self.settings['ssh_port']),
|
|
92
104
|
remote_bind_address=(host, port),
|
|
93
|
-
local_bind_address=('',
|
|
105
|
+
local_bind_address=('', local_port),
|
|
94
106
|
**ssh_setting,
|
|
95
107
|
)
|
|
96
108
|
self.ssh_tunnel.start()
|
mage_ai/server/constants.py
CHANGED