mage-ai 0.8.25__py3-none-any.whl → 0.8.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mage-ai might be problematic. Click here for more details.

Files changed (122) hide show
  1. mage_ai/data_integrations/sources/constants.py +1 -0
  2. mage_ai/data_preparation/executors/streaming_pipeline_executor.py +2 -1
  3. mage_ai/data_preparation/logging/logger_manager.py +7 -1
  4. mage_ai/data_preparation/models/block/__init__.py +60 -17
  5. mage_ai/data_preparation/models/block/sql/__init__.py +64 -17
  6. mage_ai/data_preparation/models/block/sql/utils/shared.py +49 -3
  7. mage_ai/data_preparation/models/variable.py +6 -1
  8. mage_ai/data_preparation/repo_manager.py +5 -2
  9. mage_ai/data_preparation/shared/secrets.py +6 -3
  10. mage_ai/data_preparation/templates/sensors/bigquery.py +32 -0
  11. mage_ai/data_preparation/templates/sensors/mysql.py +33 -0
  12. mage_ai/data_preparation/templates/sensors/postgres.py +33 -0
  13. mage_ai/data_preparation/templates/sensors/redshift.py +33 -0
  14. mage_ai/data_preparation/templates/sensors/s3.py +11 -7
  15. mage_ai/data_preparation/templates/sensors/snowflake.py +33 -0
  16. mage_ai/io/postgres.py +13 -1
  17. mage_ai/server/constants.py +1 -1
  18. mage_ai/server/frontend_dist/404.html +2 -2
  19. mage_ai/server/frontend_dist/404.html.html +2 -2
  20. mage_ai/server/frontend_dist/_next/static/WbTBCvyjQQ9UFFLZOU1E5/_buildManifest.js +1 -0
  21. mage_ai/server/frontend_dist/_next/static/chunks/2344-f8ae030d6a6863ae.js +1 -0
  22. mage_ai/server/frontend_dist/_next/static/chunks/2626-e7fa4f83f8214c97.js +1 -0
  23. mage_ai/server/frontend_dist/_next/static/chunks/{4178-663d9f70bffc7a47.js → 4178-a6d1bd2be4706f51.js} +1 -1
  24. mage_ai/server/frontend_dist/_next/static/chunks/4261-88b0103fad331620.js +1 -0
  25. mage_ai/server/frontend_dist/_next/static/chunks/{4538-8a3c3e47be976ede.js → 4538-347283088b83c6bf.js} +1 -1
  26. mage_ai/server/frontend_dist/_next/static/chunks/5141-57c3868a80196da8.js +1 -0
  27. mage_ai/server/frontend_dist/_next/static/chunks/{5477-e2cc1ca7108ebc6b.js → 5477-b439f211b6146a11.js} +1 -1
  28. mage_ai/server/frontend_dist/_next/static/chunks/{5872-103815a4a043489b.js → 5872-1767c45ee6690ae5.js} +1 -1
  29. mage_ai/server/frontend_dist/_next/static/chunks/{5896-f84e336fb8877027.js → 5896-10a676bcc86978cc.js} +1 -1
  30. mage_ai/server/frontend_dist/_next/static/chunks/6166-705b4fdecaf11e63.js +1 -0
  31. mage_ai/server/frontend_dist/_next/static/chunks/6532-b1bd0b3f422abec8.js +1 -0
  32. mage_ai/server/frontend_dist/_next/static/chunks/{7400-26ce25ec46728ef7.js → 7400-a48b270726b9eef5.js} +1 -1
  33. mage_ai/server/frontend_dist/_next/static/chunks/8180-8de652170ea5ed93.js +1 -0
  34. mage_ai/server/frontend_dist/_next/static/chunks/839-15c54471a9a9bf2e.js +1 -0
  35. mage_ai/server/frontend_dist/_next/static/chunks/{9386-4b9e157e18dd2c65.js → 9386-d4cc11bab74eec8d.js} +1 -1
  36. mage_ai/server/frontend_dist/_next/static/chunks/{9832-c8b8970bb522f302.js → 9832-f97919376d52e3bf.js} +1 -1
  37. mage_ai/server/frontend_dist/_next/static/chunks/pages/{_app-624f87faa4b5ee9a.js → _app-5f3dbed367342a3d.js} +1 -1
  38. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-3046bc53d24917c7.js +1 -0
  39. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-f6059e27e601627c.js +1 -0
  40. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-123556bdfe2e194b.js → [...slug]-050ef37b6672100a.js} +1 -1
  41. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-5f95bb4c3a2d7d46.js +1 -0
  42. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-270c0198eeef1542.js +1 -0
  43. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-c9f1df40e0aa6981.js +1 -0
  44. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-e0eb0098dcbf27ac.js → block-runs-d74850779dbe87b3.js} +1 -1
  45. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-848544a58563dbec.js +1 -0
  46. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{monitors-204daac985c03b62.js → monitors-675171cfd7d7b346.js} +1 -1
  47. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/{[run]-44533e244974a422.js → [run]-b2955f0ff960894e.js} +1 -1
  48. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{runs-2d41695001370abc.js → runs-219960b3cc4742e3.js} +1 -1
  49. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-1767a2f57f887ef7.js +1 -0
  50. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-0f373aaa7deb98c9.js +1 -0
  51. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-181343d8eb894426.js → triggers-bea0439ca2a862ba.js} +1 -1
  52. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-7446a70bdd8381a5.js +1 -0
  53. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/{preferences-cd6121ffe82e3834.js → preferences-997acba85f777259.js} +1 -1
  54. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-91fbb84976467947.js +1 -0
  55. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-d72dfc596e943cc4.js +1 -0
  56. mage_ai/server/frontend_dist/_next/static/chunks/pages/sign-in-c99e74aa506a6cfd.js +1 -0
  57. mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-4c9ad80f8f9d1074.js +1 -0
  58. mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-783b9526167f1249.js +1 -0
  59. mage_ai/server/frontend_dist/index.html +2 -2
  60. mage_ai/server/frontend_dist/manage.html +4 -4
  61. mage_ai/server/frontend_dist/pipeline-runs.html +5 -5
  62. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +5 -5
  63. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +5 -5
  64. mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +2 -2
  65. mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +5 -5
  66. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +5 -5
  67. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +5 -5
  68. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +5 -5
  69. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +5 -5
  70. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +5 -5
  71. mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +5 -5
  72. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +5 -5
  73. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +5 -5
  74. mage_ai/server/frontend_dist/pipelines/[pipeline].html +2 -2
  75. mage_ai/server/frontend_dist/pipelines.html +5 -5
  76. mage_ai/server/frontend_dist/settings/account/profile.html +5 -5
  77. mage_ai/server/frontend_dist/settings/workspace/preferences.html +5 -5
  78. mage_ai/server/frontend_dist/settings/workspace/{sync_data.html → sync-data.html} +5 -5
  79. mage_ai/server/frontend_dist/settings/workspace/users.html +5 -5
  80. mage_ai/server/frontend_dist/settings.html +2 -2
  81. mage_ai/server/frontend_dist/sign-in.html +9 -9
  82. mage_ai/server/frontend_dist/terminal.html +5 -5
  83. mage_ai/server/frontend_dist/test.html +3 -3
  84. mage_ai/server/frontend_dist/triggers.html +5 -5
  85. mage_ai/server/server.py +1 -6
  86. mage_ai/shared/security.py +9 -2
  87. mage_ai/shared/utils.py +7 -0
  88. mage_ai/tests/data_preparation/models/block/__init__.py +0 -0
  89. mage_ai/tests/data_preparation/models/block/test_sql.py +42 -0
  90. mage_ai/tests/data_preparation/models/test_block.py +63 -0
  91. mage_ai/tests/shared/test_security.py +6 -6
  92. {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/METADATA +1 -1
  93. {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/RECORD +99 -92
  94. mage_ai/server/frontend_dist/_next/static/ErmV1Ii-luEqAYMZkbkmO/_buildManifest.js +0 -1
  95. mage_ai/server/frontend_dist/_next/static/chunks/1830-3882c19d710feadd.js +0 -1
  96. mage_ai/server/frontend_dist/_next/static/chunks/2344-a82a406b72fe782b.js +0 -1
  97. mage_ai/server/frontend_dist/_next/static/chunks/2626-501fffa58c71ee7c.js +0 -1
  98. mage_ai/server/frontend_dist/_next/static/chunks/3688-562e0f129b09d1cd.js +0 -1
  99. mage_ai/server/frontend_dist/_next/static/chunks/3699-dcc0946dd0709216.js +0 -1
  100. mage_ai/server/frontend_dist/_next/static/chunks/4463-777e71000be29fc4.js +0 -1
  101. mage_ai/server/frontend_dist/_next/static/chunks/6532-baf1818fbc89e6c1.js +0 -1
  102. mage_ai/server/frontend_dist/_next/static/chunks/6567-2488118bb39a9d99.js +0 -1
  103. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-11d6cbf2313f0689.js +0 -1
  104. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-c226d1e215d66129.js +0 -1
  105. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills-26974ca695994804.js +0 -1
  106. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-1de289b75e9c5bf1.js +0 -1
  107. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/logs-72011b08ef047531.js +0 -1
  108. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-3b17a6d28cdde471.js +0 -1
  109. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/syncs-c31fec2be1160dbe.js +0 -1
  110. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/[...slug]-3a60003fce7dfb93.js +0 -1
  111. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-0e88f39d4980fc10.js +0 -1
  112. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync_data-64e03c3a285d301e.js +0 -1
  113. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-ff24167e21f2f0cf.js +0 -1
  114. mage_ai/server/frontend_dist/_next/static/chunks/pages/sign-in-404d934deb8950d5.js +0 -1
  115. mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-90abd9c4dfca2556.js +0 -1
  116. mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-6540e41d09691d6b.js +0 -1
  117. /mage_ai/server/frontend_dist/_next/static/{ErmV1Ii-luEqAYMZkbkmO → WbTBCvyjQQ9UFFLZOU1E5}/_middlewareManifest.js +0 -0
  118. /mage_ai/server/frontend_dist/_next/static/{ErmV1Ii-luEqAYMZkbkmO → WbTBCvyjQQ9UFFLZOU1E5}/_ssgManifest.js +0 -0
  119. {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/LICENSE +0 -0
  120. {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/WHEEL +0 -0
  121. {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/entry_points.txt +0 -0
  122. {mage_ai-0.8.25.dist-info → mage_ai-0.8.27.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ SOURCES = sorted([
31
31
  dict(name='Monday'),
32
32
  dict(name='Outreach'),
33
33
  dict(name='Paystack'),
34
+ dict(name='Pipedrive'),
34
35
  dict(name='Postmark'),
35
36
  dict(name='Salesforce'),
36
37
  dict(name='Stripe'),
@@ -3,6 +3,7 @@ from mage_ai.data_preparation.executors.pipeline_executor import PipelineExecuto
3
3
  from mage_ai.data_preparation.models.constants import BlockType
4
4
  from mage_ai.data_preparation.models.pipeline import Pipeline
5
5
  from mage_ai.data_preparation.shared.stream import StreamToLogger
6
+ from mage_ai.shared.hash import merge_dict
6
7
  from typing import Callable, Dict, List, Union
7
8
  import os
8
9
  import yaml
@@ -83,7 +84,7 @@ class StreamingPipelineExecutor(PipelineExecutor):
83
84
  if not build_block_output_stdout:
84
85
  self.logger.exception(
85
86
  f'Failed to execute streaming pipeline {self.pipeline.uuid}',
86
- error=e,
87
+ **merge_dict(dict(error=e), tags),
87
88
  )
88
89
  raise e
89
90
 
@@ -7,6 +7,8 @@ import io
7
7
  import logging
8
8
  import os
9
9
 
10
+ MAX_LOG_FILE_SIZE = 5 * 1024 * 1024
11
+
10
12
 
11
13
  class LoggerManager:
12
14
  def __init__(
@@ -50,7 +52,11 @@ class LoggerManager:
50
52
  handler = self.create_stream_handler()
51
53
  else:
52
54
  log_filepath = self.get_log_filepath(create_dir=True)
53
- handler = logging.FileHandler(log_filepath)
55
+ handler = logging.handlers.RotatingFileHandler(
56
+ log_filepath,
57
+ backupCount=10,
58
+ maxBytes=MAX_LOG_FILE_SIZE,
59
+ )
54
60
 
55
61
  handler.setLevel(self.log_level)
56
62
  handler.setFormatter(self.formatter)
@@ -4,6 +4,7 @@ from inspect import Parameter, signature
4
4
  from logging import Logger
5
5
  from mage_ai.data_cleaner.shared.utils import (
6
6
  is_geo_dataframe,
7
+ is_spark_dataframe,
7
8
  )
8
9
  from mage_ai.data_preparation.models.block.extension.utils import handle_run_tests
9
10
  from mage_ai.data_preparation.models.block.utils import (
@@ -251,6 +252,10 @@ class Block:
251
252
  self.dynamic_block_uuid = None
252
253
  self.dynamic_upstream_block_uuids = None
253
254
 
255
+ # Spark session
256
+ self.spark = None
257
+ self.spark_init = False
258
+
254
259
  @property
255
260
  def uuid(self):
256
261
  return self.dynamic_block_uuid or self._uuid
@@ -347,23 +352,22 @@ class Block:
347
352
  @property
348
353
  def full_table_name(self) -> str:
349
354
  from mage_ai.data_preparation.models.block.sql.utils.shared import (
350
- extract_and_replace_text_between_strings,
355
+ extract_create_statement_table_name,
356
+ extract_insert_statement_table_names,
351
357
  )
352
358
 
353
359
  if not self.content:
354
360
  return None
355
361
 
356
- create_statement_partial, _ = extract_and_replace_text_between_strings(
357
- self.content,
358
- 'create',
359
- r'\(',
360
- )
362
+ table_name = extract_create_statement_table_name(self.content)
363
+ if table_name:
364
+ return table_name
361
365
 
362
- if not create_statement_partial:
366
+ matches = extract_insert_statement_table_names(self.content)
367
+ if len(matches) == 0:
363
368
  return None
364
369
 
365
- parts = create_statement_partial[:len(create_statement_partial) - 1].strip().split(' ')
366
- return parts[-1]
370
+ return matches[len(matches) - 1]
367
371
 
368
372
  @classmethod
369
373
  def after_create(self, block: 'Block', **kwargs):
@@ -1041,7 +1045,6 @@ class Block:
1041
1045
  block_uuid,
1042
1046
  partition=execution_partition,
1043
1047
  )
1044
-
1045
1048
  if not include_print_outputs:
1046
1049
  all_variables = self.output_variables(execution_partition=execution_partition)
1047
1050
 
@@ -1051,6 +1054,7 @@ class Block:
1051
1054
  block_uuid,
1052
1055
  v,
1053
1056
  partition=execution_partition,
1057
+ spark=self.__get_spark_session(),
1054
1058
  )
1055
1059
 
1056
1060
  if variable_type is not None and variable_object.variable_type != variable_type:
@@ -1059,6 +1063,7 @@ class Block:
1059
1063
  data = variable_object.read_data(
1060
1064
  sample=True,
1061
1065
  sample_count=sample_count,
1066
+ spark=self.__get_spark_session(),
1062
1067
  )
1063
1068
  if type(data) is pd.DataFrame:
1064
1069
  try:
@@ -1118,6 +1123,19 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
1118
1123
  type=DataType.TEXT,
1119
1124
  variable_uuid=v,
1120
1125
  )
1126
+ elif is_spark_dataframe(data):
1127
+ df = data.toPandas()
1128
+ columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
1129
+ data = dict(
1130
+ sample_data=dict(
1131
+ columns=columns_to_display,
1132
+ rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
1133
+ ),
1134
+ type=DataType.TABLE,
1135
+ variable_uuid=v,
1136
+ )
1137
+ data_products.append(data)
1138
+ continue
1121
1139
  outputs.append(data)
1122
1140
  return outputs + data_products
1123
1141
 
@@ -1154,6 +1172,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
1154
1172
  block_uuid,
1155
1173
  v,
1156
1174
  partition=execution_partition,
1175
+ spark=self.__get_spark_session(),
1157
1176
  )
1158
1177
 
1159
1178
  if variable_type is not None and variable_object.variable_type != variable_type:
@@ -1162,6 +1181,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
1162
1181
  data = await variable_object.read_data_async(
1163
1182
  sample=True,
1164
1183
  sample_count=sample_count,
1184
+ spark=self.__get_spark_session(),
1165
1185
  )
1166
1186
  if type(data) is pd.DataFrame:
1167
1187
  try:
@@ -1221,6 +1241,19 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
1221
1241
  type=DataType.TEXT,
1222
1242
  variable_uuid=v,
1223
1243
  )
1244
+ elif is_spark_dataframe(data):
1245
+ df = data.toPandas()
1246
+ columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
1247
+ data = dict(
1248
+ sample_data=dict(
1249
+ columns=columns_to_display,
1250
+ rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
1251
+ ),
1252
+ type=DataType.TABLE,
1253
+ variable_uuid=v,
1254
+ )
1255
+ data_products.append(data)
1256
+ continue
1224
1257
  outputs.append(data)
1225
1258
  return outputs + data_products
1226
1259
 
@@ -1651,14 +1684,23 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
1651
1684
  is_spark_env()):
1652
1685
  global_vars = global_vars or dict()
1653
1686
  if not global_vars.get('spark'):
1654
- try:
1655
- from pyspark.sql import SparkSession
1656
- global_vars['spark'] = SparkSession.builder.master(
1657
- os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
1658
- except Exception:
1659
- pass
1687
+ spark = self.__get_spark_session()
1688
+ if spark is not None:
1689
+ global_vars['spark'] = spark
1660
1690
  return global_vars
1661
1691
 
1692
+ def __get_spark_session(self):
1693
+ if self.spark_init:
1694
+ return self.spark
1695
+ try:
1696
+ from pyspark.sql import SparkSession
1697
+ self.spark = SparkSession.builder.master(
1698
+ os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
1699
+ except Exception:
1700
+ self.spark = None
1701
+ self.spark_init = True
1702
+ return self.spark
1703
+
1662
1704
  def __store_variables_prepare(
1663
1705
  self,
1664
1706
  variable_mapping: Dict,
@@ -1710,7 +1752,8 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
1710
1752
  dynamic_block_uuid,
1711
1753
  )
1712
1754
  for uuid, data in variables_data['variable_mapping'].items():
1713
- if spark is not None and type(data) is pd.DataFrame:
1755
+ if spark is not None and self.pipeline.type == PipelineType.PYSPARK \
1756
+ and type(data) is pd.DataFrame:
1714
1757
  data = spark.createDataFrame(data)
1715
1758
  self.pipeline.variable_manager.add_variable(
1716
1759
  self.pipeline.uuid,
@@ -9,6 +9,7 @@ from mage_ai.data_preparation.models.block.sql import (
9
9
  trino,
10
10
  )
11
11
  from mage_ai.data_preparation.models.block.sql.utils.shared import (
12
+ has_create_or_insert_statement,
12
13
  interpolate_vars,
13
14
  )
14
15
  from mage_ai.data_preparation.models.constants import BlockType
@@ -18,7 +19,9 @@ from mage_ai.io.config import ConfigFileLoader
18
19
  from os import path
19
20
  from time import sleep
20
21
  from typing import Any, Dict, List
22
+ import re
21
23
 
24
+ MAGE_SEMI_COLON = '__MAGE_SEMI_COLON__'
22
25
  PREVIEWABLE_BLOCK_TYPES = [
23
26
  BlockType.DATA_EXPORTER,
24
27
  BlockType.DATA_LOADER,
@@ -82,6 +85,7 @@ def execute_sql_code(
82
85
  loader,
83
86
  block,
84
87
  query_string,
88
+ configuration=configuration,
85
89
  should_query=should_query,
86
90
  )
87
91
  else:
@@ -132,6 +136,7 @@ def execute_sql_code(
132
136
  loader,
133
137
  block,
134
138
  query_string,
139
+ configuration=configuration,
135
140
  should_query=should_query,
136
141
  )
137
142
  else:
@@ -172,6 +177,7 @@ def execute_sql_code(
172
177
  loader,
173
178
  block,
174
179
  query_string,
180
+ configuration=configuration,
175
181
  should_query=should_query,
176
182
  )
177
183
  else:
@@ -209,6 +215,7 @@ def execute_sql_code(
209
215
  loader,
210
216
  block,
211
217
  query_string,
218
+ configuration=configuration,
212
219
  should_query=should_query,
213
220
  )
214
221
  else:
@@ -246,6 +253,7 @@ def execute_sql_code(
246
253
  loader,
247
254
  block,
248
255
  query_string,
256
+ configuration=configuration,
249
257
  should_query=should_query,
250
258
  )
251
259
  else:
@@ -287,6 +295,7 @@ def execute_sql_code(
287
295
  loader,
288
296
  block,
289
297
  query_string,
298
+ configuration=configuration,
290
299
  should_query=should_query,
291
300
  )
292
301
  else:
@@ -329,6 +338,7 @@ def execute_sql_code(
329
338
  loader,
330
339
  block,
331
340
  query_string,
341
+ configuration=configuration,
332
342
  should_query=should_query,
333
343
  )
334
344
  else:
@@ -354,36 +364,73 @@ def execute_sql_code(
354
364
  ]
355
365
 
356
366
 
367
+ def split_query_string(query_string: str) -> List[str]:
368
+ text_parts = []
369
+
370
+ matches = re.finditer(r"'(.*?)'|\"(.*?)\"", query_string, re.IGNORECASE)
371
+
372
+ previous_idx = 0
373
+
374
+ for idx, match in enumerate(matches):
375
+ matched_string = match.group()
376
+ updated_string = re.sub(r';', MAGE_SEMI_COLON, matched_string)
377
+
378
+ start_idx, end_idx = match.span()
379
+
380
+ previous_chunk = query_string[previous_idx:start_idx]
381
+ text_parts.append(previous_chunk)
382
+ text_parts.append(updated_string)
383
+ previous_idx = end_idx
384
+
385
+ text_parts.append(query_string[previous_idx:])
386
+
387
+ text_combined = ''.join(text_parts)
388
+ queries = text_combined.split(';')
389
+
390
+ arr = []
391
+ for query in queries:
392
+ query = query.strip()
393
+ if not query:
394
+ continue
395
+
396
+ lines = query.split('\n')
397
+ query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
398
+ query = query.strip()
399
+ query = re.sub(MAGE_SEMI_COLON, ';', query)
400
+
401
+ if query:
402
+ arr.append(query)
403
+
404
+ return arr
405
+
406
+
357
407
  def execute_raw_sql(
358
408
  loader,
359
409
  block: 'Block',
360
410
  query_string: str,
411
+ configuration: Dict = {},
361
412
  should_query: bool = False,
362
413
  ) -> List[Any]:
363
414
  queries = []
364
415
  fetch_query_at_indexes = []
365
416
 
366
- # create_statement, query_statement = extract_and_replace_text_between_strings(
367
- # query_string,
368
- # 'create',
369
- # ';',
370
- # case_sensitive=True,
371
- # )
372
-
373
- # if create_statement:
374
- # queries.append(create_statement)
375
- # fetch_query_at_indexes.append(False)
376
-
377
- # queries.append(query_statement)
378
- # fetch_query_at_indexes.append(False)
417
+ has_create_or_insert = has_create_or_insert_statement(query_string)
379
418
 
380
- for query in query_string.split(';'):
381
- query = query.strip()
382
- if query and not query.startswith('--'):
419
+ for query in split_query_string(query_string):
420
+ if has_create_or_insert:
383
421
  queries.append(query)
384
422
  fetch_query_at_indexes.append(False)
423
+ else:
424
+ if should_query:
425
+ query = f"""SELECT *
426
+ FROM (
427
+ {query}
428
+ ) AS {block.table_name}__limit
429
+ LIMIT 1000"""
430
+ queries.append(query)
431
+ fetch_query_at_indexes.append(True)
385
432
 
386
- if should_query:
433
+ if should_query and has_create_or_insert:
387
434
  queries.append(f'SELECT * FROM {block.full_table_name} LIMIT 1000')
388
435
  fetch_query_at_indexes.append(block.full_table_name)
389
436
 
@@ -47,23 +47,33 @@ def interpolate_input(block, query, replace_func=None):
47
47
  for idx, upstream_block in enumerate(block.upstream_blocks):
48
48
  matcher1 = '{} df_{} {}'.format('{{', idx + 1, '}}')
49
49
 
50
- if BlockLanguage.SQL == upstream_block.type:
50
+ is_sql = BlockLanguage.SQL == upstream_block.language
51
+ if is_sql:
51
52
  configuration = upstream_block.configuration
52
53
  else:
53
54
  configuration = block.configuration
55
+ use_raw_sql = configuration.get('use_raw_sql')
54
56
 
55
57
  database = configuration.get('data_provider_database', '')
56
58
  schema = configuration.get('data_provider_schema', '')
57
59
 
60
+ replace_with = __replace_func(database, schema, upstream_block.table_name)
61
+ upstream_block_content = upstream_block.content
62
+ if is_sql and use_raw_sql and not has_create_or_insert_statement(upstream_block_content):
63
+ upstream_query = interpolate_input(upstream_block, upstream_block_content)
64
+ replace_with = f"""(
65
+ {upstream_query}
66
+ ) AS {upstream_block.table_name}"""
67
+
58
68
  query = re.sub(
59
69
  '{}[ ]*df_{}[ ]*{}'.format(r'\{\{', idx + 1, r'\}\}'),
60
- __replace_func(database, schema, upstream_block.table_name),
70
+ replace_with,
61
71
  query,
62
72
  )
63
73
 
64
74
  query = query.replace(
65
75
  f'{matcher1}',
66
- __replace_func(database, schema, upstream_block.table_name),
76
+ replace_with,
67
77
  )
68
78
 
69
79
  return query
@@ -170,3 +180,39 @@ def extract_and_replace_text_between_strings(
170
180
  new_text = text[0:max(start_idx - 1, 0)] + replace_string + text[end_idx + 1:]
171
181
 
172
182
  return extracted_text, new_text
183
+
184
+
185
+ def remove_comments(text: str) -> str:
186
+ lines = text.split('\n')
187
+ return '\n'.join(line for line in lines if not line.startswith('--'))
188
+
189
+
190
+ def extract_create_statement_table_name(text: str) -> str:
191
+ statement_partial, _ = extract_and_replace_text_between_strings(
192
+ remove_comments(text),
193
+ r'create table(?: if not exists)*',
194
+ r'\(',
195
+ )
196
+ if not statement_partial:
197
+ return None
198
+
199
+ parts = statement_partial[:len(statement_partial) - 1].strip().split(' ')
200
+ return parts[-1]
201
+
202
+
203
+ def extract_insert_statement_table_names(text: str) -> List[str]:
204
+ matches = re.findall(
205
+ r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
206
+ remove_comments(text),
207
+ re.IGNORECASE,
208
+ )
209
+ return matches
210
+
211
+
212
+ def has_create_or_insert_statement(text: str) -> bool:
213
+ table_name = extract_create_statement_table_name(text)
214
+ if table_name:
215
+ return True
216
+
217
+ matches = extract_insert_statement_table_names(text)
218
+ return len(matches) >= 1
@@ -175,6 +175,8 @@ class Variable:
175
175
  """
176
176
  if self.variable_type == VariableType.DATAFRAME:
177
177
  return self.__read_parquet(sample=sample, sample_count=sample_count)
178
+ elif self.variable_type == VariableType.SPARK_DATAFRAME:
179
+ return self.__read_spark_parquet(sample=sample, sample_count=sample_count, spark=spark)
178
180
  elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
179
181
  return await self.__read_dataframe_analysis_async(
180
182
  dataframe_analysis_keys=dataframe_analysis_keys,
@@ -367,7 +369,7 @@ class Variable:
367
369
  def __read_spark_parquet(self, sample: bool = False, sample_count: int = None, spark=None):
368
370
  if spark is None:
369
371
  return None
370
- return (
372
+ df = (
371
373
  spark.read
372
374
  .format('csv')
373
375
  .option('header', 'true')
@@ -375,6 +377,9 @@ class Variable:
375
377
  .option('delimiter', ',')
376
378
  .load(self.variable_path)
377
379
  )
380
+ if sample and sample_count:
381
+ df = df.limit(sample_count)
382
+ return df
378
383
 
379
384
  def __write_geo_dataframe(self, data) -> None:
380
385
  os.makedirs(self.variable_path, exist_ok=True)
@@ -114,12 +114,15 @@ def init_repo(repo_path: str) -> None:
114
114
  if os.path.exists(repo_path):
115
115
  raise FileExistsError(f'Repository {repo_path} already exists')
116
116
 
117
- os.makedirs(os.getenv(MAGE_DATA_DIR_ENV_VAR, DEFAULT_MAGE_DATA_DIR), exist_ok=True)
117
+ os.makedirs(
118
+ os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR,
119
+ exist_ok=True,
120
+ )
118
121
  copy_template_directory('repo', repo_path)
119
122
 
120
123
 
121
124
  def get_data_dir() -> str:
122
- return os.getenv(MAGE_DATA_DIR_ENV_VAR, DEFAULT_MAGE_DATA_DIR)
125
+ return os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR
123
126
 
124
127
 
125
128
  def get_repo_name() -> str:
@@ -66,6 +66,9 @@ def get_secret_value(name: str) -> str:
66
66
  from mage_ai.orchestration.db.models import Secret
67
67
  fernet = Fernet(get_encryption_key())
68
68
 
69
- secret = Secret.query.filter(Secret.name == name).one_or_none()
70
- if secret:
71
- return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
69
+ try:
70
+ secret = Secret.query.filter(Secret.name == name).one_or_none()
71
+ if secret:
72
+ return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
73
+ except Exception:
74
+ print(f'WARNING: Could not find secret value for secret {name}')
@@ -0,0 +1,32 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.bigquery import BigQuery
3
+ from mage_ai.io.config import ConfigFileLoader
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_bigquery_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a BigQuery query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your BigQuery query' # Specify your SQL query here
24
+
25
+ loader = BigQuery.with_config(ConfigFileLoader(config_path, config_profile))
26
+ df = loader.load(query)
27
+
28
+ # Add your checks here
29
+ if df.empty:
30
+ return False
31
+
32
+ return True
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.mysql import MySQL
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_mysql_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a MySQL query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your MySQL query' # Specify your SQL query here
24
+
25
+ with MySQL.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.postgres import Postgres
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_postgres_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a Postgres query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your Postgres query' # Specify your SQL query here
24
+
25
+ with Postgres.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True
@@ -0,0 +1,33 @@
1
+ from mage_ai.data_preparation.repo_manager import get_repo_path
2
+ from mage_ai.io.config import ConfigFileLoader
3
+ from mage_ai.io.redshift import Redshift
4
+ from os import path
5
+
6
+ if 'sensor' not in globals():
7
+ from mage_ai.data_preparation.decorators import sensor
8
+
9
+
10
+ @sensor
11
+ def query_redshift_and_check_condition(**kwargs) -> bool:
12
+ """
13
+ Template code for checking the results of a Redshift query.
14
+ Specify your configuration settings in 'io_config.yaml'.
15
+
16
+ Return: True if the sensor should complete, False if it should
17
+ keep waiting
18
+ """
19
+
20
+ config_path = path.join(get_repo_path(), 'io_config.yaml')
21
+ config_profile = 'default'
22
+
23
+ query = 'Your Redshift query' # Specify your SQL query here
24
+
25
+ with Redshift.with_config(
26
+ ConfigFileLoader(config_path, config_profile)) as loader:
27
+ df = loader.load(query)
28
+
29
+ # Add your checks here
30
+ if df.empty:
31
+ return False
32
+
33
+ return True