PyPI - mage-ai - Versions diffs - 0.8.25__py3-none-any.whl → 0.8.27__py3-none-any.whl - Mend

mage-ai 0.8.25py3-none-any.whl → 0.8.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mage-ai might be problematic. Click here for more details.

Files changed (122) hide show

mage_ai/data_integrations/sources/constants.py CHANGED Viewed

@@ -31,6 +31,7 @@ SOURCES = sorted([
     dict(name='Monday'),
     dict(name='Outreach'),
     dict(name='Paystack'),
+    dict(name='Pipedrive'),
     dict(name='Postmark'),
     dict(name='Salesforce'),
     dict(name='Stripe'),

mage_ai/data_preparation/executors/streaming_pipeline_executor.py CHANGED Viewed

@@ -3,6 +3,7 @@ from mage_ai.data_preparation.executors.pipeline_executor import PipelineExecuto
 from mage_ai.data_preparation.models.constants import BlockType
 from mage_ai.data_preparation.models.pipeline import Pipeline
 from mage_ai.data_preparation.shared.stream import StreamToLogger
+from mage_ai.shared.hash import merge_dict
 from typing import Callable, Dict, List, Union
 import os
 import yaml
@@ -83,7 +84,7 @@ class StreamingPipelineExecutor(PipelineExecutor):
             if not build_block_output_stdout:
                 self.logger.exception(
                         f'Failed to execute streaming pipeline {self.pipeline.uuid}',
-                        error=e,
+                        **merge_dict(dict(error=e), tags),
                     )
             raise e

mage_ai/data_preparation/logging/logger_manager.py CHANGED Viewed

@@ -7,6 +7,8 @@ import io
 import logging
 import os
+MAX_LOG_FILE_SIZE = 5 * 1024 * 1024
 class LoggerManager:
     def __init__(
@@ -50,7 +52,11 @@ class LoggerManager:
                 handler = self.create_stream_handler()
             else:
                 log_filepath = self.get_log_filepath(create_dir=True)
-                handler = logging.FileHandler(log_filepath)
+                handler = logging.handlers.RotatingFileHandler(
+                    log_filepath,
+                    backupCount=10,
+                    maxBytes=MAX_LOG_FILE_SIZE,
+                )
             handler.setLevel(self.log_level)
             handler.setFormatter(self.formatter)

mage_ai/data_preparation/models/block/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ from inspect import Parameter, signature
 from logging import Logger
 from mage_ai.data_cleaner.shared.utils import (
     is_geo_dataframe,
+    is_spark_dataframe,
 )
 from mage_ai.data_preparation.models.block.extension.utils import handle_run_tests
 from mage_ai.data_preparation.models.block.utils import (
@@ -251,6 +252,10 @@ class Block:
         self.dynamic_block_uuid = None
         self.dynamic_upstream_block_uuids = None
+        # Spark session
+        self.spark = None
+        self.spark_init = False
     @property
     def uuid(self):
         return self.dynamic_block_uuid or self._uuid
@@ -347,23 +352,22 @@ class Block:
     @property
     def full_table_name(self) -> str:
         from mage_ai.data_preparation.models.block.sql.utils.shared import (
-            extract_and_replace_text_between_strings,
+            extract_create_statement_table_name,
+            extract_insert_statement_table_names,
         )
         if not self.content:
             return None
-        create_statement_partial, _ = extract_and_replace_text_between_strings(
-            self.content,
-            'create',
-            r'\(',
-        )
+        table_name = extract_create_statement_table_name(self.content)
+        if table_name:
+            return table_name
-        if not create_statement_partial:
+        matches = extract_insert_statement_table_names(self.content)
+        if len(matches) == 0:
             return None
-        parts = create_statement_partial[:len(create_statement_partial) - 1].strip().split(' ')
-        return parts[-1]
+        return matches[len(matches) - 1]
     @classmethod
     def after_create(self, block: 'Block', **kwargs):
@@ -1041,7 +1045,6 @@ class Block:
             block_uuid,
             partition=execution_partition,
         )
         if not include_print_outputs:
             all_variables = self.output_variables(execution_partition=execution_partition)
@@ -1051,6 +1054,7 @@ class Block:
                 block_uuid,
                 v,
                 partition=execution_partition,
+                spark=self.__get_spark_session(),
             )
             if variable_type is not None and variable_object.variable_type != variable_type:
@@ -1059,6 +1063,7 @@ class Block:
             data = variable_object.read_data(
                 sample=True,
                 sample_count=sample_count,
+                spark=self.__get_spark_session(),
             )
             if type(data) is pd.DataFrame:
                 try:
@@ -1118,6 +1123,19 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
                     type=DataType.TEXT,
                     variable_uuid=v,
                 )
+            elif is_spark_dataframe(data):
+                df = data.toPandas()
+                columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
+                data = dict(
+                    sample_data=dict(
+                        columns=columns_to_display,
+                        rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
+                    ),
+                    type=DataType.TABLE,
+                    variable_uuid=v,
+                )
+                data_products.append(data)
+                continue
             outputs.append(data)
         return outputs + data_products
@@ -1154,6 +1172,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
                 block_uuid,
                 v,
                 partition=execution_partition,
+                spark=self.__get_spark_session(),
             )
             if variable_type is not None and variable_object.variable_type != variable_type:
@@ -1162,6 +1181,7 @@ df = get_variable('{self.pipeline.uuid}', '{self.uuid}', 'df')
             data = await variable_object.read_data_async(
                 sample=True,
                 sample_count=sample_count,
+                spark=self.__get_spark_session(),
             )
             if type(data) is pd.DataFrame:
                 try:
@@ -1221,6 +1241,19 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
                     type=DataType.TEXT,
                     variable_uuid=v,
                 )
+            elif is_spark_dataframe(data):
+                df = data.toPandas()
+                columns_to_display = df.columns.tolist()[:DATAFRAME_ANALYSIS_MAX_COLUMNS]
+                data = dict(
+                    sample_data=dict(
+                        columns=columns_to_display,
+                        rows=json.loads(df[columns_to_display].to_json(orient='split'))['data']
+                    ),
+                    type=DataType.TABLE,
+                    variable_uuid=v,
+                )
+                data_products.append(data)
+                continue
             outputs.append(data)
         return outputs + data_products
@@ -1651,14 +1684,23 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
                 is_spark_env()):
             global_vars = global_vars or dict()
             if not global_vars.get('spark'):
-                try:
-                    from pyspark.sql import SparkSession
-                    global_vars['spark'] = SparkSession.builder.master(
-                        os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
-                except Exception:
-                    pass
+                spark = self.__get_spark_session()
+                if spark is not None:
+                    global_vars['spark'] = spark
         return global_vars
+    def __get_spark_session(self):
+        if self.spark_init:
+            return self.spark
+        try:
+            from pyspark.sql import SparkSession
+            self.spark = SparkSession.builder.master(
+                os.getenv('SPARK_MASTER_HOST', 'local')).getOrCreate()
+        except Exception:
+            self.spark = None
+        self.spark_init = True
+        return self.spark
     def __store_variables_prepare(
         self,
         variable_mapping: Dict,
@@ -1710,7 +1752,8 @@ df = get_variable('{self.pipeline.uuid}', '{block_uuid}', 'df')
             dynamic_block_uuid,
         )
         for uuid, data in variables_data['variable_mapping'].items():
-            if spark is not None and type(data) is pd.DataFrame:
+            if spark is not None and self.pipeline.type == PipelineType.PYSPARK \
+                    and type(data) is pd.DataFrame:
                 data = spark.createDataFrame(data)
             self.pipeline.variable_manager.add_variable(
                 self.pipeline.uuid,

mage_ai/data_preparation/models/block/sql/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from mage_ai.data_preparation.models.block.sql import (
     trino,
 )
 from mage_ai.data_preparation.models.block.sql.utils.shared import (
+    has_create_or_insert_statement,
     interpolate_vars,
 )
 from mage_ai.data_preparation.models.constants import BlockType
@@ -18,7 +19,9 @@ from mage_ai.io.config import ConfigFileLoader
 from os import path
 from time import sleep
 from typing import Any, Dict, List
+import re
+MAGE_SEMI_COLON = '__MAGE_SEMI_COLON__'
 PREVIEWABLE_BLOCK_TYPES = [
     BlockType.DATA_EXPORTER,
     BlockType.DATA_LOADER,
@@ -82,6 +85,7 @@ def execute_sql_code(
                 loader,
                 block,
                 query_string,
+                configuration=configuration,
                 should_query=should_query,
             )
         else:
@@ -132,6 +136,7 @@ def execute_sql_code(
                     loader,
                     block,
                     query_string,
+                    configuration=configuration,
                     should_query=should_query,
                 )
             else:
@@ -172,6 +177,7 @@ def execute_sql_code(
                     loader,
                     block,
                     query_string,
+                    configuration=configuration,
                     should_query=should_query,
                 )
             else:
@@ -209,6 +215,7 @@ def execute_sql_code(
                     loader,
                     block,
                     query_string,
+                    configuration=configuration,
                     should_query=should_query,
                 )
             else:
@@ -246,6 +253,7 @@ def execute_sql_code(
                     loader,
                     block,
                     query_string,
+                    configuration=configuration,
                     should_query=should_query,
                 )
             else:
@@ -287,6 +295,7 @@ def execute_sql_code(
                     loader,
                     block,
                     query_string,
+                    configuration=configuration,
                     should_query=should_query,
                 )
             else:
@@ -329,6 +338,7 @@ def execute_sql_code(
                     loader,
                     block,
                     query_string,
+                    configuration=configuration,
                     should_query=should_query,
                 )
             else:
@@ -354,36 +364,73 @@ def execute_sql_code(
                     ]
+def split_query_string(query_string: str) -> List[str]:
+    text_parts = []
+    matches = re.finditer(r"'(.*?)'|\"(.*?)\"", query_string, re.IGNORECASE)
+    previous_idx = 0
+    for idx, match in enumerate(matches):
+        matched_string = match.group()
+        updated_string = re.sub(r';', MAGE_SEMI_COLON, matched_string)
+        start_idx, end_idx = match.span()
+        previous_chunk = query_string[previous_idx:start_idx]
+        text_parts.append(previous_chunk)
+        text_parts.append(updated_string)
+        previous_idx = end_idx
+    text_parts.append(query_string[previous_idx:])
+    text_combined = ''.join(text_parts)
+    queries = text_combined.split(';')
+    arr = []
+    for query in queries:
+        query = query.strip()
+        if not query:
+            continue
+        lines = query.split('\n')
+        query = '\n'.join(list(filter(lambda x: not x.startswith('--'), lines)))
+        query = query.strip()
+        query = re.sub(MAGE_SEMI_COLON, ';', query)
+        if query:
+            arr.append(query)
+    return arr
 def execute_raw_sql(
     loader,
     block: 'Block',
     query_string: str,
+    configuration: Dict = {},
     should_query: bool = False,
 ) -> List[Any]:
     queries = []
     fetch_query_at_indexes = []
-    # create_statement, query_statement = extract_and_replace_text_between_strings(
-    #     query_string,
-    #     'create',
-    #     ';',
-    #     case_sensitive=True,
-    # )
-    # if create_statement:
-    #     queries.append(create_statement)
-    #     fetch_query_at_indexes.append(False)
-    # queries.append(query_statement)
-    # fetch_query_at_indexes.append(False)
+    has_create_or_insert = has_create_or_insert_statement(query_string)
-    for query in query_string.split(';'):
-        query = query.strip()
-        if query and not query.startswith('--'):
+    for query in split_query_string(query_string):
+        if has_create_or_insert:
             queries.append(query)
             fetch_query_at_indexes.append(False)
+        else:
+            if should_query:
+                query = f"""SELECT *
+FROM (
+    {query}
+) AS {block.table_name}__limit
+LIMIT 1000"""
+            queries.append(query)
+            fetch_query_at_indexes.append(True)
-    if should_query:
+    if should_query and has_create_or_insert:
         queries.append(f'SELECT * FROM {block.full_table_name} LIMIT 1000')
         fetch_query_at_indexes.append(block.full_table_name)

mage_ai/data_preparation/models/block/sql/utils/shared.py CHANGED Viewed

@@ -47,23 +47,33 @@ def interpolate_input(block, query, replace_func=None):
     for idx, upstream_block in enumerate(block.upstream_blocks):
         matcher1 = '{} df_{} {}'.format('{{', idx + 1, '}}')
-        if BlockLanguage.SQL == upstream_block.type:
+        is_sql = BlockLanguage.SQL == upstream_block.language
+        if is_sql:
             configuration = upstream_block.configuration
         else:
             configuration = block.configuration
+        use_raw_sql = configuration.get('use_raw_sql')
         database = configuration.get('data_provider_database', '')
         schema = configuration.get('data_provider_schema', '')
+        replace_with = __replace_func(database, schema, upstream_block.table_name)
+        upstream_block_content = upstream_block.content
+        if is_sql and use_raw_sql and not has_create_or_insert_statement(upstream_block_content):
+            upstream_query = interpolate_input(upstream_block, upstream_block_content)
+            replace_with = f"""(
+    {upstream_query}
+) AS {upstream_block.table_name}"""
         query = re.sub(
             '{}[ ]*df_{}[ ]*{}'.format(r'\{\{', idx + 1, r'\}\}'),
-            __replace_func(database, schema, upstream_block.table_name),
+            replace_with,
             query,
         )
         query = query.replace(
             f'{matcher1}',
-            __replace_func(database, schema, upstream_block.table_name),
+            replace_with,
         )
     return query
@@ -170,3 +180,39 @@ def extract_and_replace_text_between_strings(
     new_text = text[0:max(start_idx - 1, 0)] + replace_string + text[end_idx + 1:]
     return extracted_text, new_text
+def remove_comments(text: str) -> str:
+    lines = text.split('\n')
+    return '\n'.join(line for line in lines if not line.startswith('--'))
+def extract_create_statement_table_name(text: str) -> str:
+    statement_partial, _ = extract_and_replace_text_between_strings(
+        remove_comments(text),
+        r'create table(?: if not exists)*',
+        r'\(',
+    )
+    if not statement_partial:
+        return None
+    parts = statement_partial[:len(statement_partial) - 1].strip().split(' ')
+    return parts[-1]
+def extract_insert_statement_table_names(text: str) -> List[str]:
+    matches = re.findall(
+        r'insert(?: overwrite)*(?: into)*[\s]+([\w.]+)',
+        remove_comments(text),
+        re.IGNORECASE,
+    )
+    return matches
+def has_create_or_insert_statement(text: str) -> bool:
+    table_name = extract_create_statement_table_name(text)
+    if table_name:
+        return True
+    matches = extract_insert_statement_table_names(text)
+    return len(matches) >= 1

mage_ai/data_preparation/models/variable.py CHANGED Viewed

@@ -175,6 +175,8 @@ class Variable:
         """
         if self.variable_type == VariableType.DATAFRAME:
             return self.__read_parquet(sample=sample, sample_count=sample_count)
+        elif self.variable_type == VariableType.SPARK_DATAFRAME:
+            return self.__read_spark_parquet(sample=sample, sample_count=sample_count, spark=spark)
         elif self.variable_type == VariableType.DATAFRAME_ANALYSIS:
             return await self.__read_dataframe_analysis_async(
                 dataframe_analysis_keys=dataframe_analysis_keys,
@@ -367,7 +369,7 @@ class Variable:
     def __read_spark_parquet(self, sample: bool = False, sample_count: int = None, spark=None):
         if spark is None:
             return None
-        return (
+        df = (
             spark.read
             .format('csv')
             .option('header', 'true')
@@ -375,6 +377,9 @@ class Variable:
             .option('delimiter', ',')
             .load(self.variable_path)
         )
+        if sample and sample_count:
+            df = df.limit(sample_count)
+        return df
     def __write_geo_dataframe(self, data) -> None:
         os.makedirs(self.variable_path, exist_ok=True)

mage_ai/data_preparation/repo_manager.py CHANGED Viewed

@@ -114,12 +114,15 @@ def init_repo(repo_path: str) -> None:
     if os.path.exists(repo_path):
         raise FileExistsError(f'Repository {repo_path} already exists')
-    os.makedirs(os.getenv(MAGE_DATA_DIR_ENV_VAR, DEFAULT_MAGE_DATA_DIR), exist_ok=True)
+    os.makedirs(
+        os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR,
+        exist_ok=True,
+    )
     copy_template_directory('repo', repo_path)
 def get_data_dir() -> str:
-    return os.getenv(MAGE_DATA_DIR_ENV_VAR, DEFAULT_MAGE_DATA_DIR)
+    return os.getenv(MAGE_DATA_DIR_ENV_VAR) or DEFAULT_MAGE_DATA_DIR
 def get_repo_name() -> str:

mage_ai/data_preparation/shared/secrets.py CHANGED Viewed

@@ -66,6 +66,9 @@ def get_secret_value(name: str) -> str:
     from mage_ai.orchestration.db.models import Secret
     fernet = Fernet(get_encryption_key())
-    secret = Secret.query.filter(Secret.name == name).one_or_none()
-    if secret:
-        return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
+    try:
+        secret = Secret.query.filter(Secret.name == name).one_or_none()
+        if secret:
+            return fernet.decrypt(secret.value.encode('utf-8')).decode('utf-8')
+    except Exception:
+        print(f'WARNING: Could not find secret value for secret {name}')

mage_ai/data_preparation/templates/sensors/bigquery.py ADDED Viewed

@@ -0,0 +1,32 @@
+from mage_ai.data_preparation.repo_manager import get_repo_path
+from mage_ai.io.bigquery import BigQuery
+from mage_ai.io.config import ConfigFileLoader
+from os import path
+if 'sensor' not in globals():
+    from mage_ai.data_preparation.decorators import sensor
+@sensor
+def query_bigquery_and_check_condition(**kwargs) -> bool:
+    """
+    Template code for checking the results of a BigQuery query.
+    Specify your configuration settings in 'io_config.yaml'.
+    Return: True if the sensor should complete, False if it should
+    keep waiting
+    """
+    config_path = path.join(get_repo_path(), 'io_config.yaml')
+    config_profile = 'default'
+    query = 'Your BigQuery query'  # Specify your SQL query here
+    loader = BigQuery.with_config(ConfigFileLoader(config_path, config_profile))
+    df = loader.load(query)
+    # Add your checks here
+    if df.empty:
+        return False
+    return True

mage_ai/data_preparation/templates/sensors/mysql.py ADDED Viewed

@@ -0,0 +1,33 @@
+from mage_ai.data_preparation.repo_manager import get_repo_path
+from mage_ai.io.config import ConfigFileLoader
+from mage_ai.io.mysql import MySQL
+from os import path
+if 'sensor' not in globals():
+    from mage_ai.data_preparation.decorators import sensor
+@sensor
+def query_mysql_and_check_condition(**kwargs) -> bool:
+    """
+    Template code for checking the results of a MySQL query.
+    Specify your configuration settings in 'io_config.yaml'.
+    Return: True if the sensor should complete, False if it should
+    keep waiting
+    """
+    config_path = path.join(get_repo_path(), 'io_config.yaml')
+    config_profile = 'default'
+    query = 'Your MySQL query'  # Specify your SQL query here
+    with MySQL.with_config(
+            ConfigFileLoader(config_path, config_profile)) as loader:
+        df = loader.load(query)
+        # Add your checks here
+        if df.empty:
+            return False
+    return True

mage_ai/data_preparation/templates/sensors/postgres.py ADDED Viewed

@@ -0,0 +1,33 @@
+from mage_ai.data_preparation.repo_manager import get_repo_path
+from mage_ai.io.config import ConfigFileLoader
+from mage_ai.io.postgres import Postgres
+from os import path
+if 'sensor' not in globals():
+    from mage_ai.data_preparation.decorators import sensor
+@sensor
+def query_postgres_and_check_condition(**kwargs) -> bool:
+    """
+    Template code for checking the results of a Postgres query.
+    Specify your configuration settings in 'io_config.yaml'.
+    Return: True if the sensor should complete, False if it should
+    keep waiting
+    """
+    config_path = path.join(get_repo_path(), 'io_config.yaml')
+    config_profile = 'default'
+    query = 'Your Postgres query'  # Specify your SQL query here
+    with Postgres.with_config(
+            ConfigFileLoader(config_path, config_profile)) as loader:
+        df = loader.load(query)
+        # Add your checks here
+        if df.empty:
+            return False
+    return True

mage_ai/data_preparation/templates/sensors/redshift.py ADDED Viewed

@@ -0,0 +1,33 @@
+from mage_ai.data_preparation.repo_manager import get_repo_path
+from mage_ai.io.config import ConfigFileLoader
+from mage_ai.io.redshift import Redshift
+from os import path
+if 'sensor' not in globals():
+    from mage_ai.data_preparation.decorators import sensor
+@sensor
+def query_redshift_and_check_condition(**kwargs) -> bool:
+    """
+    Template code for checking the results of a Redshift query.
+    Specify your configuration settings in 'io_config.yaml'.
+    Return: True if the sensor should complete, False if it should
+    keep waiting
+    """
+    config_path = path.join(get_repo_path(), 'io_config.yaml')
+    config_profile = 'default'
+    query = 'Your Redshift query'  # Specify your SQL query here
+    with Redshift.with_config(
+            ConfigFileLoader(config_path, config_profile)) as loader:
+        df = loader.load(query)
+        # Add your checks here
+        if df.empty:
+            return False
+    return True

mage-ai 0.8.25__py3-none-any.whl → 0.8.27__py3-none-any.whl

Potentially problematic release.

mage-ai 0.8.25py3-none-any.whl → 0.8.27py3-none-any.whl