mage-ai 0.8.26__py3-none-any.whl → 0.8.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mage-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. mage_ai/data_preparation/executors/streaming_pipeline_executor.py +2 -1
  2. mage_ai/data_preparation/logging/logger_manager.py +7 -1
  3. mage_ai/data_preparation/models/block/__init__.py +60 -29
  4. mage_ai/data_preparation/models/block/sql/__init__.py +25 -8
  5. mage_ai/data_preparation/models/block/sql/utils/shared.py +49 -3
  6. mage_ai/data_preparation/models/variable.py +6 -1
  7. mage_ai/data_preparation/repo_manager.py +5 -2
  8. mage_ai/data_preparation/shared/secrets.py +6 -3
  9. mage_ai/data_preparation/templates/sensors/bigquery.py +32 -0
  10. mage_ai/data_preparation/templates/sensors/mysql.py +33 -0
  11. mage_ai/data_preparation/templates/sensors/postgres.py +33 -0
  12. mage_ai/data_preparation/templates/sensors/redshift.py +33 -0
  13. mage_ai/data_preparation/templates/sensors/s3.py +11 -7
  14. mage_ai/data_preparation/templates/sensors/snowflake.py +33 -0
  15. mage_ai/io/postgres.py +13 -1
  16. mage_ai/server/constants.py +1 -1
  17. mage_ai/server/frontend_dist/404.html +2 -2
  18. mage_ai/server/frontend_dist/404.html.html +2 -2
  19. mage_ai/server/frontend_dist/_next/static/WbTBCvyjQQ9UFFLZOU1E5/_buildManifest.js +1 -0
  20. mage_ai/server/frontend_dist/_next/static/chunks/2344-f8ae030d6a6863ae.js +1 -0
  21. mage_ai/server/frontend_dist/_next/static/chunks/2626-e7fa4f83f8214c97.js +1 -0
  22. mage_ai/server/frontend_dist/_next/static/chunks/{4178-663d9f70bffc7a47.js → 4178-a6d1bd2be4706f51.js} +1 -1
  23. mage_ai/server/frontend_dist/_next/static/chunks/4261-88b0103fad331620.js +1 -0
  24. mage_ai/server/frontend_dist/_next/static/chunks/5141-57c3868a80196da8.js +1 -0
  25. mage_ai/server/frontend_dist/_next/static/chunks/6166-705b4fdecaf11e63.js +1 -0
  26. mage_ai/server/frontend_dist/_next/static/chunks/6532-b1bd0b3f422abec8.js +1 -0
  27. mage_ai/server/frontend_dist/_next/static/chunks/8180-8de652170ea5ed93.js +1 -0
  28. mage_ai/server/frontend_dist/_next/static/chunks/839-15c54471a9a9bf2e.js +1 -0
  29. mage_ai/server/frontend_dist/_next/static/chunks/{9386-9d6a4e5836229264.js → 9386-d4cc11bab74eec8d.js} +1 -1
  30. mage_ai/server/frontend_dist/_next/static/chunks/pages/{_app-624f87faa4b5ee9a.js → _app-5f3dbed367342a3d.js} +1 -1
  31. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-3046bc53d24917c7.js +1 -0
  32. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-f6059e27e601627c.js +1 -0
  33. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-123556bdfe2e194b.js → [...slug]-050ef37b6672100a.js} +1 -1
  34. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-5f95bb4c3a2d7d46.js +1 -0
  35. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-270c0198eeef1542.js +1 -0
  36. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-c9f1df40e0aa6981.js +1 -0
  37. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-e0eb0098dcbf27ac.js → block-runs-d74850779dbe87b3.js} +1 -1
  38. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-848544a58563dbec.js +1 -0
  39. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{monitors-204daac985c03b62.js → monitors-675171cfd7d7b346.js} +1 -1
  40. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/{[run]-44533e244974a422.js → [run]-b2955f0ff960894e.js} +1 -1
  41. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{runs-2d41695001370abc.js → runs-219960b3cc4742e3.js} +1 -1
  42. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-1767a2f57f887ef7.js +1 -0
  43. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-0f373aaa7deb98c9.js +1 -0
  44. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-181343d8eb894426.js → triggers-bea0439ca2a862ba.js} +1 -1
  45. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-7446a70bdd8381a5.js +1 -0
  46. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-d72dfc596e943cc4.js +1 -0
  47. mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-4c9ad80f8f9d1074.js +1 -0
  48. mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-783b9526167f1249.js +1 -0
  49. mage_ai/server/frontend_dist/index.html +2 -2
  50. mage_ai/server/frontend_dist/manage.html +4 -4
  51. mage_ai/server/frontend_dist/pipeline-runs.html +5 -5
  52. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +5 -5
  53. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +5 -5
  54. mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +2 -2
  55. mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +5 -5
  56. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +5 -5
  57. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +5 -5
  58. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +5 -5
  59. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +5 -5
  60. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +5 -5
  61. mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +5 -5
  62. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +5 -5
  63. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +5 -5
  64. mage_ai/server/frontend_dist/pipelines/[pipeline].html +2 -2
  65. mage_ai/server/frontend_dist/pipelines.html +5 -5
  66. mage_ai/server/frontend_dist/settings/account/profile.html +5 -5
  67. mage_ai/server/frontend_dist/settings/workspace/preferences.html +5 -5
  68. mage_ai/server/frontend_dist/settings/workspace/sync-data.html +5 -5
  69. mage_ai/server/frontend_dist/settings/workspace/users.html +5 -5
  70. mage_ai/server/frontend_dist/settings.html +2 -2
  71. mage_ai/server/frontend_dist/sign-in.html +9 -9
  72. mage_ai/server/frontend_dist/terminal.html +5 -5
  73. mage_ai/server/frontend_dist/test.html +3 -3
  74. mage_ai/server/frontend_dist/triggers.html +5 -5
  75. mage_ai/server/server.py +1 -6
  76. mage_ai/shared/security.py +9 -2
  77. mage_ai/shared/utils.py +7 -0
  78. mage_ai/tests/shared/test_security.py +6 -6
  79. {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/METADATA +1 -1
  80. {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/RECORD +86 -81
  81. mage_ai/server/frontend_dist/_next/static/chunks/1830-3882c19d710feadd.js +0 -1
  82. mage_ai/server/frontend_dist/_next/static/chunks/2344-a82a406b72fe782b.js +0 -1
  83. mage_ai/server/frontend_dist/_next/static/chunks/2626-30c0fab7c3926578.js +0 -1
  84. mage_ai/server/frontend_dist/_next/static/chunks/3688-562e0f129b09d1cd.js +0 -1
  85. mage_ai/server/frontend_dist/_next/static/chunks/3699-dcc0946dd0709216.js +0 -1
  86. mage_ai/server/frontend_dist/_next/static/chunks/4463-777e71000be29fc4.js +0 -1
  87. mage_ai/server/frontend_dist/_next/static/chunks/6532-baf1818fbc89e6c1.js +0 -1
  88. mage_ai/server/frontend_dist/_next/static/chunks/6567-2488118bb39a9d99.js +0 -1
  89. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-c580ee38f5442bef.js +0 -1
  90. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-dd72d2f3375064cf.js +0 -1
  91. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-26974ca695994804.js +0 -1
  92. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-df603787c041cd8a.js +0 -1
  93. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-72011b08ef047531.js +0 -1
  94. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-3b17a6d28cdde471.js +0 -1
  95. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-c31fec2be1160dbe.js +0 -1
  96. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-3a60003fce7dfb93.js +0 -1
  97. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-ebb4a57934e4fa52.js +0 -1
  98. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-ff24167e21f2f0cf.js +0 -1
  99. mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-be9eab29e8ed712b.js +0 -1
  100. mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-e9c1789f6d5a0429.js +0 -1
  101. mage_ai/server/frontend_dist/_next/static/kiWhqtXdRSgsbVPwfDLY4/_buildManifest.js +0 -1
  102. /mage_ai/server/frontend_dist/_next/static/{kiWhqtXdRSgsbVPwfDLY4 → WbTBCvyjQQ9UFFLZOU1E5}/_middlewareManifest.js +0 -0
  103. /mage_ai/server/frontend_dist/_next/static/{kiWhqtXdRSgsbVPwfDLY4 → WbTBCvyjQQ9UFFLZOU1E5}/_ssgManifest.js +0 -0
  104. {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/LICENSE +0 -0
  105. {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/WHEEL +0 -0
  106. {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/entry_points.txt +0 -0
  107. {mage_ai-0.8.26.dist-info → mage_ai-0.8.27.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ from mage_ai.data_preparation.executors.pipeline_executor import PipelineExecuto
3
3
  from mage_ai.data_preparation.models.constants import BlockType
4
4
  from mage_ai.data_preparation.models.pipeline import Pipeline
5
5
  from mage_ai.data_preparation.shared.stream import StreamToLogger
6
+ from mage_ai.shared.hash import merge_dict
6
7
  from typing import Callable, Dict, List, Union
7
8
  import os
8
9
  import yaml
@@ -83,7 +84,7 @@ class StreamingPipelineExecutor(PipelineExecutor):
83
84
  if not build_block_output_stdout:
84
85
  self.logger.exception(
85
86
  f'Failed to execute streaming pipeline {self.pipeline.uuid}',
86
- error=e,
87
+ **merge_dict(dict(error=e), tags),
87
88
  )
88
89
  raise e
89
90
 
@@ -7,6 +7,8 @@ import io
7
7
  import logging
8
8
  import os
9
9
 
10
+ MAX_LOG_FILE_SIZE = 5 * 1024 * 1024
11
+
10
12
 
11
13
  class LoggerManager:
12
14
  def __init__(
@@ -50,7 +52,11 @@ class LoggerManager:
50
52
  handler = self.create_stream_handler()
51
53
  else:
52
54
  log_filepath = self.get_log_filepath(create_dir=True)
53
- handler = logging.FileHandler(log_filepath)
55
+ handler = logging.handlers.RotatingFileHandler(
56
+ log_filepath,
57
+ backupCount=10,
58
+ maxBytes=MAX_LOG_FILE_SIZE,
59
+ )
54
60
 
55
61
  handler.setLevel(self.log_level)
56
62
  handler.setFormatter(self.formatter)
@@ -4,6 +4,7 @@ from inspect import Parameter, signature
4
4
  from logging import Logger
5
5
  from mage_ai.data_cleaner.shared.utils import (
6
6
  is_geo_dataframe,
7
+ is_spark_dataframe,
7
8
  )
8
9
  from mage_ai.data_preparation.models.block.extension.utils import handle_run_tests
9
10
  from mage_ai.data_preparation.models.block.utils import (
@@ -50,7 +51,6 @@ import functools
50
51
  import json
51
52
  import os
52
53
  import pandas as pd
53
- import re
54
54
  import simplejson
55
55
  import sys
56
56
  import time
@@ -252,6 +252,10 @@ class Block:
252
252
  self.dynamic_block_uuid = None
253
253
  self.dynamic_upstream_block_uuids = None
254
254
 
255
+ # Spark session
256
+ self.spark = None
257
+ self.spark_init = False
258
+
255
259
  @property
256
260
  def uuid(self):
257
261
  return self.dynamic_block_uuid or self._uuid
@@ -348,34 +352,22 @@ class Block:
348
352
  @property
349
353
  def full_table_name(self) -> str:
350
354
  from mage_ai.data_preparation.models.block.sql.utils.shared import (
351
- extract_and_replace_text_between_strings,
355
+ extract_create_statement_table_name,
356
+ extract_insert_statement_table_names,
352
357
  )
353
358
 
354
359
  if not self.content:
355
360
  return None
356
361
 
357
- statement_partial, _ = extract_and_replace_text_between_strings(
358
- self.content,
359
- 'create',
360
- r'\(',
361
- )
362
-
363
- if not statement_partial:
364
- matches = re.findall(
365
- r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
366
- self.content,
367
- re.IGNORECASE,
368
- )
369
- if len(matches) >= 1:
370
- return matches[len(matches) - 1]
371
- else:
372
- return None
362
+ table_name = extract_create_statement_table_name(self.content)
363
+ if table_name:
364
+ return table_name
373
365
 
374
- if not statement_partial:
366
+ matches = extract_insert_statement_table_names(self.content)
367
+ if len(matches) == 0:
375
368
  return None
376
369
 
377
- parts = statement_partial[:len(statement_partial) - 1].strip().split(' ')
378
- return parts[-1]
370
+ return matches[len(matches) - 1]
379
371
 
380
372
  @classmethod
381
373
  def after_create(self, block: 'Block', **kwargs):
@@ -1053,7 +1045,6 @@ class Block:
1053
1045
  block_uuid,
1054
1046
  partition=execution_partition,
1055
1047
  )
1056
-
1057
1048
  if not include_print_outputs:
1058
1049
  all_variables = self.output_variables(execution_partition=execution_partition)
1059
1050
 
@@ -1063,6 +1054,7 @@ class Block:
1063
1054
  block_uuid,
1064
1055
  v,
1065
1056
  partition=execution_partition,
1057
+ spark=self.__get_spark_session(),
1066
1058
  )
1067
1059
 
1068
1060
  if variable_type is not None and variable_object.variable_type != variable_type:
@@ -1071,6 +1063,7 @@ class Block:
1071
1063
  data = variable_object.read_data(
1072
1064
  sample=True,
1073
1065
  sample_count=sample_count,
1066
+ spark=self.__get_spark_session(),
1074
1067
  )
1075
1068
  if type(data) is pd.DataFrame:
1076
1069
  try:
@@ -1130,6 +1123,19 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
1130
1123
  type=DataType.TEXT,
1131
1124
  variable_uuid=v,
1132
1125
  )
1126
+ elif is_spark_dataframe(data):
1127
+ df = data.toPandas()
1128
+ columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
1129
+ data = dict(
1130
+ sample_data=dict(
1131
+ columns=columns_to_display,
1132
+ rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
1133
+ ),
1134
+ type=DataType.TABLE,
1135
+ variable_uuid=v,
1136
+ )
1137
+ data_products.append(data)
1138
+ continue
1133
1139
  outputs.append(data)
1134
1140
  return outputs + data_products
1135
1141
 
@@ -1166,6 +1172,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
1166
1172
  block_uuid,
1167
1173
  v,
1168
1174
  partition=execution_partition,
1175
+ spark=self.__get_spark_session(),
1169
1176
  )
1170
1177
 
1171
1178
  if variable_type is not None and variable_object.variable_type != variable_type:
@@ -1174,6 +1181,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
1174
1181
  data = await variable_object.read_data_async(
1175
1182
  sample=True,
1176
1183
  sample_count=sample_count,
1184
+ spark=self.__get_spark_session(),
1177
1185
  )
1178
1186
  if type(data) is pd.DataFrame:
1179
1187
  try:
@@ -1233,6 +1241,19 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
1233
1241
  type=DataType.TEXT,
1234
1242
  variable_uuid=v,
1235
1243
  )
1244
+ elif is_spark_dataframe(data):
1245
+ df = data.toPandas()
1246
+ columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
1247
+ data = dict(
1248
+ sample_data=dict(
1249
+ columns=columns_to_display,
1250
+ rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
1251
+ ),
1252
+ type=DataType.TABLE,
1253
+ variable_uuid=v,
1254
+ )
1255
+ data_products.append(data)
1256
+ continue
1236
1257
  outputs.append(data)
1237
1258
  return outputs + data_products
1238
1259
 
@@ -1663,14 +1684,23 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
1663
1684
  is_spark_env()):
1664
1685
  global_vars = global_vars or dict()
1665
1686
  if not global_vars.get('spark'):
1666
- try:
1667
- from pyspark.sql import SparkSession
1668
- global_vars['spark'] = SparkSession.builder.master(
1669
- os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
1670
- except Exception:
1671
- pass
1687
+ spark = self.__get_spark_session()
1688
+ if spark is not None:
1689
+ global_vars['spark'] = spark
1672
1690
  return global_vars
1673
1691
 
1692
+ def __get_spark_session(self):
1693
+ if self.spark_init:
1694
+ return self.spark
1695
+ try:
1696
+ from pyspark.sql import SparkSession
1697
+ self.spark = SparkSession.builder.master(
1698
+ os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
1699
+ except Exception:
1700
+ self.spark = None
1701
+ self.spark_init = True
1702
+ return self.spark
1703
+
1674
1704
  def __store_variables_prepare(
1675
1705
  self,
1676
1706
  variable_mapping: Dict,
@@ -1722,7 +1752,8 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
1722
1752
  dynamic_block_uuid,
1723
1753
  )
1724
1754
  for uuid, data in variables_data['variable_mapping'].items():
1725
- if spark is not None and type(data) is pd.DataFrame:
1755
+ if spark is not None and self.pipeline.type == PipelineType.PYSPARK \
1756
+ and type(data) is pd.DataFrame:
1726
1757
  data = spark.createDataFrame(data)
1727
1758
  self.pipeline.variable_manager.add_variable(
1728
1759
  self.pipeline.uuid,
@@ -9,6 +9,7 @@ from mage_ai.data_preparation.models.block.sql import (
9
9
  trino,
10
10
  )
11
11
  from mage_ai.data_preparation.models.block.sql.utils.shared import (
12
+ has_create_or_insert_statement,
12
13
  interpolate_vars,
13
14
  )
14
15
  from mage_ai.data_preparation.models.constants import BlockType
@@ -389,11 +390,15 @@ def split_query_string(query_string: str) -> List[str]:
389
390
  arr = []
390
391
  for query in queries:
391
392
  query = query.strip()
393
+ if not query:
394
+ continue
395
+
396
+ lines = query.split('\n')
397
+ query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
398
+ query = query.strip()
399
+ query = re.sub(MAGE_SEMI_COLON, ';', query)
400
+
392
401
  if query:
393
- lines = query.split('\n')
394
- query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
395
- query = query.strip()
396
- query = re.sub(MAGE_SEMI_COLON, ';', query)
397
402
  arr.append(query)
398
403
 
399
404
  return arr
@@ -409,11 +414,23 @@ def execute_raw_sql(
409
414
  queries = []
410
415
  fetch_query_at_indexes = []
411
416
 
412
- for query in split_query_string(query_string):
413
- queries.append(query)
414
- fetch_query_at_indexes.append(False)
417
+ has_create_or_insert = has_create_or_insert_statement(query_string)
415
418
 
416
- if should_query:
419
+ for query in split_query_string(query_string):
420
+ if has_create_or_insert:
421
+ queries.append(query)
422
+ fetch_query_at_indexes.append(False)
423
+ else:
424
+ if should_query:
425
+ query = f"""SELECT *
426
+ FROM (
427
+ {query}
428
+ ) AS {block.table_name}__limit
429
+ LIMIT 1000"""
430
+ queries.append(query)
431
+ fetch_query_at_indexes.append(True)
432
+
433
+ if should_query and has_create_or_insert:
417
434
  queries.append(f'SELECT * FROM {block.full_table_name} LIMIT 1000')
418
435
  fetch_query_at_indexes.append(block.full_table_name)
419
436
 
@@ -47,23 +47,33 @@ def interpolate_input(block, query, replace_func=None):
47
47
  for idx, upstream_block in enumerate(block.upstream_blocks):
48
48
  matcher1 = '{} df_{} {}'.format('{{', idx + 1, '}}')
49
49
 
50
- if BlockLanguage.SQL == upstream_block.type:
50
+ is_sql = BlockLanguage.SQL == upstream_block.language
51
+ if is_sql:
51
52
  configuration = upstream_block.configuration
52
53
  else:
53
54
  configuration = block.configuration
55
+ use_raw_sql = configuration.get('use_raw_sql')
54
56
 
55
57
  database = configuration.get('data_provider_database', '')
56
58
  schema = configuration.get('data_provider_schema', '')
57
59
 
60
+ replace_with = __replace_func(database, schema, upstream_block.table_name)
61
+ upstream_block_content = upstream_block.content
62
+ if is_sql and use_raw_sql and not has_create_or_insert_statement(upstream_block_content):
63
+ upstream_query = interpolate_input(upstream_block, upstream_block_content)
64
+ replace_with = f"""(
65
+ {upstream_query}
66
+ ) AS {upstream_block.table_name}"""
67
+
58
68
  query = re.sub(
59
69
  '{}[ ]*df_{}[ ]*{}'.format(r'\{\{', idx + 1, r'\}\}'),
60
- __replace_func(database, schema, upstream_block.table_name),
70
+ replace_with,
61
71
  query,
62
72
  )
63
73
 
64
74
  query = query.replace(
65
75
  f'{matcher1}',
66
- __replace_func(database, schema, upstream_block.table_name),
76
+ replace_with,
67
77
  )
68
78
 
69
79
  return query
@@ -170,3 +180,39 @@ def extract_and_replace_text_between_strings(
170
180
  new_text = text[0:max(start_idx - 1, 0)] + replace_string + text[end_idx + 1:]
171
181
 
172
182
  return extracted_text, new_text
183
+
184
+
185
+ def remove_comments(text: str) -> str:
186
+ lines = text.split('\n')
187
+ return '\n'.join(line for line in lines if not line.startswith('--'))
188
+
189
+
190
+ def extract_create_statement_table_name(text: str) -> str:
191
+ statement_partial, _ = extract_and_replace_text_between_strings(
192
+ remove_comments(text),
193
+ r'create table(?: if not exists)*',
194
+ r'\(',
195
+ )
196
+ if not statement_partial:
197
+ return None
198
+
199
+ parts = statement_partial[:len(statement_partial) - 1].strip().split(' ')
200
+ return parts[-1]
201
+
202
+
203
+ def extract_insert_statement_table_names(text: str) -> List[str]:
204
+ matches = re.findall(
205
+ r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
206
+ remove_comments(text),
207
+ re.IGNORECASE,
208
+ )
209
+ return matches
210
+
211
+
212
+ def has_create_or_insert_statement(text: str) -> bool:
213
+ table_name = extract_create_statement_table_name(text)
214
+ if table_name:
215
+ return True
216
+
217
+ matches = extract_insert_statement_table_names(text)
218
+ return len(matches) >= 1
@@ -175,6 +175,8 @@ class Variable:
175
175
  """
176
176
  if self.variable_type == VariableType.DATAFRAME:
177
177
  return self.__read_parquet(sample=sample, sample_count=sample_count)
178
+ elif self.variable_type == VariableType.SPARK_DATAFRAME:
179
+ return self.__read_spark_parquet(sample=sample, sample_count=sample_count, spark=spark)
178
180
  elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
179
181
  return await self.__read_dataframe_analysis_async(
180
182
  dataframe_analysis_keys=dataframe_analysis_keys,
@@ -367,7 +369,7 @@ class Variable:
367
369
  def __read_spark_parquet(self, sample: bool = False, sample_count: int = None, spark=None):
368
370
  if spark is None:
369
371
  return None
370
- return (
372
+ df = (
371
373
  spark.read
372
374
  .format('csv')
373
375
  .option('header', 'true')
@@ -375,6 +377,9 @@ class Variable:
375
377
  .option('delimiter', ',')
376
378
  .load(self.variable_path)
377
379
  )
380
+ if sample and sample_count:
381
+ df = df.limit(sample_count)
382
+ return df
378
383
 
379
384
  def __write_geo_dataframe(self, data) -> None:
380
385
  os.makedirs(self.variable_path, exist_ok=True)
@@ -114,12 +114,15 @@ def init_repo(repo_path: str) -> None:
114
114
  if os.path.exists(repo_path):
115
115
  raise FileExistsError(f'Repository {repo_path} already exists')
116
116
 
117
- os.makedirs(os.getenv(MAGE_DATA_DIR_ENV_VAR, DEFAULT_MAGE_DATA_DIR), exist_ok=True)
117
+ os.makedirs(
118
+ os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR,
119
+ exist_ok=True,
120
+ )
118
121
  copy_template_directory('repo', repo_path)
119
122
 
120
123
 
121
124
  def get_data_dir() -> str:
122
- return os.getenv(MAGE_DATA_DIR_ENV_VAR, DEFAULT_MAGE_DATA_DIR)
125
+ return os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR
123
126
 
124
127
 
125
128
  def get_repo_name() -> str:
@@ -66,6 +66,9 @@ def get_secret_value(name: str) -> str:
66
66
  from mage_ai.orchestration.db.models import Secret
67
67
  fernet = Fernet(get_encryption_key())
68
68
 
69
- secret = Secret.query.filter(Secret.name == name).one_or_none()
70
- if secret:
71
- return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
69
+ try:
70
+ secret = Secret.query.filter(Secret.name == name).one_or_none()
71
+ if secret:
72
+ return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
73
+ except Exception:
74
+ print(f'WARNING: Could not find secret value for secret {name}')
@@ -0,0 +1,32 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.bigquery import BigQuery
3
+ from mage_ai.io.config import ConfigFileLoader
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_bigquery_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a BigQuery query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your BigQuery query' # Specify your SQL query here
24
+
25
+ loader = BigQuery.with_config(ConfigFileLoader(config_path, config_profile))
26
+ df = loader.load(query)
27
+
28
+ # Add your checks here
29
+ if df.empty:
30
+ return False
31
+
32
+ return True
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.mysql import MySQL
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_mysql_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a MySQL query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your MySQL query' # Specify your SQL query here
24
+
25
+ with MySQL.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.postgres import Postgres
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_postgres_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a Postgres query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your Postgres query' # Specify your SQL query here
24
+
25
+ with Postgres.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.redshift import Redshift
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_redshift_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a Redshift query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your Redshift query' # Specify your SQL query here
24
+
25
+ with Redshift.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True
@@ -3,9 +3,6 @@ from mage_ai.io.config import ConfigFileLoader
3
3
  from mage_ai.io.s3 import S3
4
4
  from os import path
5
5
 
6
- import time
7
-
8
-
9
6
  if 'sensor' not in globals():
10
7
  from mage_ai.data_preparation.decorators import sensor
11
8
 
@@ -13,15 +10,22 @@ if 'sensor' not in globals():
13
10
  @sensor
14
11
  def check_condition(**kwargs) -> bool:
15
12
  """
16
- Template code for checking if a partition exists in a S3 bucket
13
+ Template code for checking if a file or folder exists in a S3 bucket
14
+
15
+ You will also need to fill out the following AWS related fields
16
+ in `io_config.yaml`:
17
+ - AWS_ACCESS_KEY_ID
18
+ - AWS_SECRET_ACCESS_KEY
19
+ - AWS_REGION
17
20
  """
18
21
 
19
22
  config_path = path.join(get_repo_path(), 'io_config.yaml')
20
23
  config_profile = 'default'
21
24
 
22
25
  bucket_name = 'your_bucket_name'
23
- path = 'your_partition_path'
26
+ s3_path = 'path/to/folder/or/file'
24
27
 
25
- return S3.with_config(ConfigFileLoader(config_path, config_profile)).exists(
26
- bucket_name, path
28
+ config_file_loader = ConfigFileLoader(config_path, config_profile)
29
+ return S3.with_config(config_file_loader).exists(
30
+ bucket_name, s3_path
27
31
  )
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.snowflake import Snowflake
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_snowflake_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a Snowflake query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your Snowflake query' # Specify your SQL query here
24
+
25
+ with Snowflake.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True
mage_ai/io/postgres.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from mage_ai.io.config import BaseConfigLoader, ConfigKey
2
2
  from mage_ai.io.export_utils import BadConversionError, PandasTypes
3
3
  from mage_ai.io.sql import BaseSQL
4
+ from mage_ai.shared.utils import is_port_in_use
4
5
  from pandas import DataFrame, Series
5
6
  from psycopg2 import connect, _psycopg
6
7
  from sshtunnel import SSHTunnelForwarder
@@ -87,10 +88,21 @@ class Postgres(BaseSQL):
87
88
  ssh_setting['ssh_pkey'] = self.settings['ssh_pkey']
88
89
  else:
89
90
  ssh_setting['ssh_password'] = self.settings['ssh_password']
91
+
92
+ # Find an available local port
93
+ local_port = port
94
+ max_local_port = local_port + 100
95
+ while is_port_in_use(local_port):
96
+ if local_port > max_local_port:
97
+ raise Exception(
98
+ 'Unable to find an open port, please clear your running processes '
99
+ 'if possible.'
100
+ )
101
+ local_port += 1
90
102
  self.ssh_tunnel = SSHTunnelForwarder(
91
103
  (self.settings['ssh_host'], self.settings['ssh_port']),
92
104
  remote_bind_address=(host, port),
93
- local_bind_address=('', port),
105
+ local_bind_address=('', local_port),
94
106
  **ssh_setting,
95
107
  )
96
108
  self.ssh_tunnel.start()
@@ -12,4 +12,4 @@ DATAFRAME_OUTPUT_SAMPLE_COUNT = 10
12
12
  # Dockerfile depends on it because it runs ./scripts/install_mage.sh and uses
13
13
  # the last line to determine the version to install.
14
14
  VERSION = \
15
- '0.8.26'
15
+ '0.8.27'