mage-ai 0.8.80__py3-none-any.whl → 0.8.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mage-ai might be problematic. Click here for more details.

Files changed (111) hide show
  1. mage_ai/api/presenters/SyncPresenter.py +1 -0
  2. mage_ai/api/resources/GitBranchResource.py +1 -1
  3. mage_ai/api/resources/SyncResource.py +83 -41
  4. mage_ai/data_preparation/executors/block_executor.py +9 -0
  5. mage_ai/data_preparation/models/block/__init__.py +40 -3
  6. mage_ai/data_preparation/models/pipelines/integration_pipeline.py +21 -9
  7. mage_ai/data_preparation/models/utils.py +5 -3
  8. mage_ai/data_preparation/models/variable.py +59 -26
  9. mage_ai/data_preparation/preferences.py +24 -16
  10. mage_ai/data_preparation/sync/__init__.py +23 -4
  11. mage_ai/data_preparation/sync/git_sync.py +7 -0
  12. mage_ai/data_preparation/templates/callbacks/base.jinja +2 -2
  13. mage_ai/data_preparation/templates/callbacks/orchestration/triggers/default.jinja +1 -1
  14. mage_ai/data_preparation/templates/repo/.gitignore +13 -0
  15. mage_ai/orchestration/db/models/oauth.py +16 -7
  16. mage_ai/orchestration/pipeline_scheduler.py +17 -1
  17. mage_ai/server/constants.py +1 -1
  18. mage_ai/server/frontend_dist/404.html +2 -2
  19. mage_ai/server/frontend_dist/404.html.html +2 -2
  20. mage_ai/server/frontend_dist/_next/static/QB08hs4sID1RqTCyVc19P/_buildManifest.js +1 -0
  21. mage_ai/server/frontend_dist/_next/static/chunks/1005-2ad5a1b201c2e742.js +1 -0
  22. mage_ai/server/frontend_dist/_next/static/chunks/{1424-fbc7f741303c0aa0.js → 1424-c6b0d89ffb4a10b9.js} +1 -1
  23. mage_ai/server/frontend_dist/_next/static/chunks/3077-5a56c22fbc5626c8.js +1 -0
  24. mage_ai/server/frontend_dist/_next/static/chunks/3714-d472531256ed5cf5.js +1 -0
  25. mage_ai/server/frontend_dist/_next/static/chunks/547-f3f2ccd9dcde9459.js +1 -0
  26. mage_ai/server/frontend_dist/_next/static/chunks/7400-e5e39ae07de554b2.js +1 -0
  27. mage_ai/server/frontend_dist/_next/static/chunks/{8180-325d712ecbed97ce.js → 8180-0c53312021451bb7.js} +1 -1
  28. mage_ai/server/frontend_dist/_next/static/chunks/8735-1a3d39946add4aea.js +1 -0
  29. mage_ai/server/frontend_dist/_next/static/chunks/894-6290def8665695eb.js +1 -0
  30. mage_ai/server/frontend_dist/_next/static/chunks/8957-6d3f5778166f4294.js +1 -0
  31. mage_ai/server/frontend_dist/_next/static/chunks/9270-e35a5eb3dae4c3ca.js +1 -0
  32. mage_ai/server/frontend_dist/_next/static/chunks/pages/{_app-b25383366d854e8e.js → _app-13fb8342c87983a1.js} +1 -1
  33. mage_ai/server/frontend_dist/_next/static/chunks/pages/files-8b81ef90680f3f72.js +1 -0
  34. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-9e96b956c26fbc92.js +1 -0
  35. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-3260a2dac8df672e.js +1 -0
  36. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/backfills/{[...slug]-9e519ad50f639e00.js → [...slug]-a4fbf12642923051.js} +1 -1
  37. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-fe65c89f1e86b4b8.js +1 -0
  38. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/{block-runs-302e1bd9fbf39994.js → block-runs-5c02b08404dbdd20.js} +1 -1
  39. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-25a125d43569c70d.js +1 -0
  40. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{monitors-5244d6c5d80af010.js → monitors-092f364100995ba6.js} +1 -1
  41. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/runs/{[run]-90170de1e6fe8f04.js → [run]-50e60f6b49eed3f0.js} +1 -1
  42. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{runs-88994d0bcf41cf9e.js → runs-ed9903520a27f4c8.js} +1 -1
  43. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-55748e7fcf738d91.js +1 -0
  44. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/triggers/{[...slug]-456d84426ffe5025.js → [...slug]-1725c0871027ae05.js} +1 -1
  45. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/{triggers-05f43f9eaf6528db.js → triggers-0d2c9f28ae21cde8.js} +1 -1
  46. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-2f4b987a8dba3484.js +1 -0
  47. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/account/profile-3ca183fdcb571bde.js +1 -0
  48. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-3b26db0dd0962783.js +1 -0
  49. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-bd5e22e49c1f7bc2.js +1 -0
  50. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-85073f9cc09fabf7.js +1 -0
  51. mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-0d2260ad08dfe44a.js +1 -0
  52. mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-d74d0f163157ba65.js +1 -0
  53. mage_ai/server/frontend_dist/files.html +25 -0
  54. mage_ai/server/frontend_dist/index.html +2 -2
  55. mage_ai/server/frontend_dist/manage.html +5 -5
  56. mage_ai/server/frontend_dist/pipeline-runs.html +5 -5
  57. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills/[...slug].html +5 -5
  58. mage_ai/server/frontend_dist/pipelines/[pipeline]/backfills.html +5 -5
  59. mage_ai/server/frontend_dist/pipelines/[pipeline]/edit.html +2 -2
  60. mage_ai/server/frontend_dist/pipelines/[pipeline]/logs.html +5 -5
  61. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runs.html +5 -5
  62. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors/block-runtime.html +5 -5
  63. mage_ai/server/frontend_dist/pipelines/[pipeline]/monitors.html +5 -5
  64. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs/[run].html +5 -5
  65. mage_ai/server/frontend_dist/pipelines/[pipeline]/runs.html +5 -5
  66. mage_ai/server/frontend_dist/pipelines/[pipeline]/settings.html +5 -5
  67. mage_ai/server/frontend_dist/pipelines/[pipeline]/syncs.html +5 -5
  68. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers/[...slug].html +5 -5
  69. mage_ai/server/frontend_dist/pipelines/[pipeline]/triggers.html +5 -5
  70. mage_ai/server/frontend_dist/pipelines/[pipeline].html +2 -2
  71. mage_ai/server/frontend_dist/pipelines.html +5 -5
  72. mage_ai/server/frontend_dist/settings/account/profile.html +5 -5
  73. mage_ai/server/frontend_dist/settings/workspace/preferences.html +5 -5
  74. mage_ai/server/frontend_dist/settings/workspace/sync-data.html +5 -5
  75. mage_ai/server/frontend_dist/settings/workspace/users.html +5 -5
  76. mage_ai/server/frontend_dist/settings.html +2 -2
  77. mage_ai/server/frontend_dist/sign-in.html +14 -14
  78. mage_ai/server/frontend_dist/terminal.html +5 -5
  79. mage_ai/server/frontend_dist/test.html +3 -3
  80. mage_ai/server/frontend_dist/triggers.html +5 -5
  81. mage_ai/tests/api/operations/test_syncs.py +97 -0
  82. mage_ai/tests/data_preparation/models/test_block.py +20 -14
  83. mage_ai/tests/orchestration/test_pipeline_scheduler.py +23 -2
  84. {mage_ai-0.8.80.dist-info → mage_ai-0.8.82.dist-info}/METADATA +2 -1
  85. {mage_ai-0.8.80.dist-info → mage_ai-0.8.82.dist-info}/RECORD +91 -85
  86. mage_ai/server/frontend_dist/_next/static/K62oaHK5x3k16vVxdvIWf/_buildManifest.js +0 -1
  87. mage_ai/server/frontend_dist/_next/static/chunks/1005-38fa861ca54679db.js +0 -1
  88. mage_ai/server/frontend_dist/_next/static/chunks/3077-187a4db6073d95df.js +0 -1
  89. mage_ai/server/frontend_dist/_next/static/chunks/547-833b70469e82cb2b.js +0 -1
  90. mage_ai/server/frontend_dist/_next/static/chunks/5540-1025c6b3e65558b3.js +0 -1
  91. mage_ai/server/frontend_dist/_next/static/chunks/6567-2488118bb39a9d99.js +0 -1
  92. mage_ai/server/frontend_dist/_next/static/chunks/8957-6edafc5a2521efdf.js +0 -1
  93. mage_ai/server/frontend_dist/_next/static/chunks/9129-8ae88897c3817909.js +0 -1
  94. mage_ai/server/frontend_dist/_next/static/chunks/pages/manage-eba67ac0edfdea1b.js +0 -1
  95. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipeline-runs-128dd171c6fff7ac.js +0 -1
  96. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/edit-d90d32812b2be89e.js +0 -1
  97. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/monitors/block-runtime-7635e7031335de0f.js +0 -1
  98. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines/[pipeline]/settings-3c42f05001416811.js +0 -1
  99. mage_ai/server/frontend_dist/_next/static/chunks/pages/pipelines-c73dab1d63317556.js +0 -1
  100. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/account/profile-f5626eb662337ad5.js +0 -1
  101. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/preferences-292e840e9386335e.js +0 -1
  102. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/sync-data-353dc1ef2fadf70b.js +0 -1
  103. mage_ai/server/frontend_dist/_next/static/chunks/pages/settings/workspace/users-cd838435ed1b8383.js +0 -1
  104. mage_ai/server/frontend_dist/_next/static/chunks/pages/terminal-d15ea617ef145b32.js +0 -1
  105. mage_ai/server/frontend_dist/_next/static/chunks/pages/triggers-df30a0006fd775b8.js +0 -1
  106. /mage_ai/server/frontend_dist/_next/static/{K62oaHK5x3k16vVxdvIWf → QB08hs4sID1RqTCyVc19P}/_middlewareManifest.js +0 -0
  107. /mage_ai/server/frontend_dist/_next/static/{K62oaHK5x3k16vVxdvIWf → QB08hs4sID1RqTCyVc19P}/_ssgManifest.js +0 -0
  108. {mage_ai-0.8.80.dist-info → mage_ai-0.8.82.dist-info}/LICENSE +0 -0
  109. {mage_ai-0.8.80.dist-info → mage_ai-0.8.82.dist-info}/WHEEL +0 -0
  110. {mage_ai-0.8.80.dist-info → mage_ai-0.8.82.dist-info}/entry_points.txt +0 -0
  111. {mage_ai-0.8.80.dist-info → mage_ai-0.8.82.dist-info}/top_level.txt +0 -0
@@ -14,4 +14,5 @@ class SyncPresenter(BasePresenter):
14
14
  'type',
15
15
  'access_token',
16
16
  'auth_type',
17
+ 'user_git_settings',
17
18
  ]
@@ -26,7 +26,7 @@ class GitBranchResource(GenericResource):
26
26
  async def member(self, pk, user, **kwargs):
27
27
  branch = None
28
28
  preferences = get_preferences(user=user)
29
- if preferences.has_valid_git_config():
29
+ if preferences.is_git_integration_enabled():
30
30
  git_manager = Git.get_manager(user=user)
31
31
  branch = git_manager.current_branch
32
32
  return self(dict(name=branch), user, **kwargs)
@@ -1,20 +1,21 @@
1
+ import os
2
+ from typing import Dict
3
+
1
4
  from mage_ai.api.resources.GenericResource import GenericResource
2
- from mage_ai.data_preparation.preferences import (
3
- get_preferences,
4
- Preferences,
5
- )
5
+ from mage_ai.data_preparation.preferences import get_preferences
6
+ from mage_ai.data_preparation.repo_manager import get_repo_path
6
7
  from mage_ai.data_preparation.shared.secrets import create_secret
7
8
  from mage_ai.data_preparation.sync import (
8
- GitConfig,
9
9
  GIT_ACCESS_TOKEN_SECRET_NAME,
10
10
  GIT_SSH_PRIVATE_KEY_SECRET_NAME,
11
11
  GIT_SSH_PUBLIC_KEY_SECRET_NAME,
12
+ GitConfig,
13
+ UserGitConfig,
12
14
  )
13
15
  from mage_ai.data_preparation.sync.git_sync import GitSync
14
16
  from mage_ai.orchestration.db import safe_db_query
15
17
  from mage_ai.orchestration.db.models.oauth import User
16
18
  from mage_ai.orchestration.db.models.secrets import Secret
17
- import os
18
19
 
19
20
 
20
21
  def get_ssh_public_key_secret_name(user: User = None) -> str:
@@ -35,8 +36,7 @@ def get_access_token_secret_name(user: User = None) -> str:
35
36
  class SyncResource(GenericResource):
36
37
  @classmethod
37
38
  def collection(self, query, meta, user, **kwargs):
38
- preferences = get_preferences(user=user)
39
- sync_config = preferences.sync_config
39
+ sync_config = self.get_project_sync_config(user)
40
40
  return self.build_result_set(
41
41
  [sync_config],
42
42
  user,
@@ -46,59 +46,101 @@ class SyncResource(GenericResource):
46
46
  @classmethod
47
47
  @safe_db_query
48
48
  def create(self, payload, user, **kwargs):
49
- ssh_public_key = payload.pop('ssh_public_key', None)
50
- ssh_private_key = payload.pop('ssh_private_key', None)
51
-
52
- if ssh_public_key:
53
- secret_name = get_ssh_public_key_secret_name(user=user)
54
- secret = Secret.query.filter(
55
- Secret.name == secret_name).one_or_none()
56
- if secret:
57
- secret.delete()
58
- create_secret(secret_name, ssh_public_key)
59
- payload['ssh_public_key_secret_name'] = secret_name
60
- if ssh_private_key:
61
- secret_name = get_ssh_private_key_secret_name(user=user)
62
- secret = Secret.query.filter(
63
- Secret.name == secret_name).one_or_none()
64
- if secret:
65
- secret.delete()
66
- create_secret(secret_name, ssh_private_key)
67
- payload['ssh_private_key_secret_name'] = secret_name
68
-
69
- access_token = payload.pop('access_token', None)
70
- if access_token:
71
- secret_name = get_access_token_secret_name(user=user)
72
- secret = Secret.query.filter(
73
- Secret.name == secret_name).one_or_none()
74
- if secret:
75
- secret.delete()
76
- create_secret(secret_name, access_token)
77
- payload['access_token_secret_name'] = secret_name
49
+ user_settings = payload.pop('user_git_settings', dict())
78
50
 
79
- preferences = Preferences(user=user) if user else get_preferences()
51
+ payload = self.update_user_settings(payload)
52
+ preferences = get_preferences()
80
53
  updated_config = dict(preferences.sync_config, **payload)
81
54
  # default repo_path to os.getcwd()
82
55
  if not updated_config.get('repo_path', None):
83
56
  updated_config['repo_path'] = os.getcwd()
84
- # Validate payload
57
+
58
+ # Validate payloads
59
+ user_payload = self.update_user_settings(user_settings, user=user)
60
+ UserGitConfig.load(config=user_payload)
85
61
  sync_config = GitConfig.load(config=updated_config)
86
62
 
63
+ # Update user git settings if they are included
64
+ if user:
65
+ repo_path = get_repo_path()
66
+ user_preferences = user.preferences or {}
67
+ user_git_settings = user.git_settings or {}
68
+ user_preferences[repo_path] = {
69
+ **user_preferences.get(repo_path, {}),
70
+ 'git_settings': {
71
+ **user_git_settings,
72
+ **user_payload,
73
+ }
74
+ }
75
+ user.refresh()
76
+ user.update(preferences=user_preferences)
77
+ else:
78
+ updated_config.update(user_payload)
79
+
87
80
  preferences.update_preferences(dict(sync_config=updated_config))
88
81
 
89
82
  GitSync(sync_config)
90
83
 
91
- return self(get_preferences(user=user).sync_config, user, **kwargs)
84
+ return self(get_preferences().sync_config, user, **kwargs)
92
85
 
93
86
  @classmethod
94
87
  def member(self, pk, user, **kwargs):
95
- return self(get_preferences(user=user).sync_config, user, **kwargs)
88
+ sync_config = self.get_project_sync_config(user)
89
+ return self(sync_config, user, **kwargs)
96
90
 
97
91
  def update(self, payload, **kwargs):
92
+ self.model.pop('user_git_settings')
98
93
  config = GitConfig.load(config=self.model)
99
94
  sync = GitSync(config)
100
95
  action_type = payload.get('action_type')
101
96
  if action_type == 'sync_data':
102
97
  sync.sync_data()
98
+ elif action_type == 'reset':
99
+ sync.reset()
103
100
 
104
101
  return self
102
+
103
+ @classmethod
104
+ def get_project_sync_config(self, user):
105
+ sync_config = get_preferences().sync_config
106
+ # Make it backwards compatible with storing all of the git settings in the user
107
+ # preferences field.
108
+ if user and user.git_settings:
109
+ sync_config['user_git_settings'] = user.git_settings
110
+ else:
111
+ sync_config['user_git_settings'] = UserGitConfig.from_dict(sync_config).to_dict()
112
+ return sync_config
113
+
114
+ @classmethod
115
+ def update_user_settings(self, payload, user=None) -> Dict:
116
+ user_payload = payload.copy()
117
+ ssh_public_key = user_payload.pop('ssh_public_key', None)
118
+ ssh_private_key = user_payload.pop('ssh_private_key', None)
119
+
120
+ if ssh_public_key:
121
+ secret_name = get_ssh_public_key_secret_name(user=user)
122
+ secret = Secret.query.filter(
123
+ Secret.name == secret_name).one_or_none()
124
+ if secret:
125
+ secret.delete()
126
+ create_secret(secret_name, ssh_public_key)
127
+ user_payload['ssh_public_key_secret_name'] = secret_name
128
+ if ssh_private_key:
129
+ secret_name = get_ssh_private_key_secret_name(user=user)
130
+ secret = Secret.query.filter(
131
+ Secret.name == secret_name).one_or_none()
132
+ if secret:
133
+ secret.delete()
134
+ create_secret(secret_name, ssh_private_key)
135
+ user_payload['ssh_private_key_secret_name'] = secret_name
136
+
137
+ access_token = user_payload.pop('access_token', None)
138
+ if access_token:
139
+ secret_name = get_access_token_secret_name(user=user)
140
+ secret = Secret.query.filter(
141
+ Secret.name == secret_name).one_or_none()
142
+ if secret:
143
+ secret.delete()
144
+ create_secret(secret_name, access_token)
145
+ user_payload['access_token_secret_name'] = secret_name
146
+ return user_payload
@@ -112,6 +112,8 @@ class BlockExecutor:
112
112
  )
113
113
  self._execute_callback(
114
114
  'on_failure',
115
+ dynamic_block_index=dynamic_block_index,
116
+ dynamic_upstream_block_uuids=dynamic_upstream_block_uuids,
115
117
  global_vars=global_vars,
116
118
  logging_tags=tags,
117
119
  pipeline_run=pipeline_run,
@@ -129,6 +131,8 @@ class BlockExecutor:
129
131
  )
130
132
  self._execute_callback(
131
133
  'on_success',
134
+ dynamic_block_index=dynamic_block_index,
135
+ dynamic_upstream_block_uuids=dynamic_upstream_block_uuids,
132
136
  global_vars=global_vars,
133
137
  logging_tags=tags,
134
138
  pipeline_run=pipeline_run,
@@ -194,6 +198,8 @@ class BlockExecutor:
194
198
  global_vars,
195
199
  logging_tags,
196
200
  pipeline_run,
201
+ dynamic_block_index: Union[int, None] = None,
202
+ dynamic_upstream_block_uuids: Union[List[str], None] = None,
197
203
  ):
198
204
  arr = []
199
205
  if self.block.callback_block:
@@ -206,6 +212,9 @@ class BlockExecutor:
206
212
  try:
207
213
  callback_block.execute_callback(
208
214
  callback,
215
+ dynamic_block_index=dynamic_block_index,
216
+ dynamic_upstream_block_uuids=dynamic_upstream_block_uuids,
217
+ execution_partition=self.execution_partition,
209
218
  global_vars=global_vars,
210
219
  logger=self.logger,
211
220
  logging_tags=logging_tags,
@@ -10,7 +10,7 @@ from datetime import datetime
10
10
  from inspect import Parameter, isfunction, signature
11
11
  from logging import Logger
12
12
  from queue import Queue
13
- from typing import Any, Callable, Dict, List, Set, Tuple
13
+ from typing import Any, Callable, Dict, List, Set, Tuple, Union
14
14
 
15
15
  import pandas as pd
16
16
  import simplejson
@@ -2132,6 +2132,9 @@ class CallbackBlock(Block):
2132
2132
  def execute_callback(
2133
2133
  self,
2134
2134
  callback: str,
2135
+ dynamic_block_index: Union[int, None] = None,
2136
+ dynamic_upstream_block_uuids: Union[List[str], None] = None,
2137
+ execution_partition: str = None,
2135
2138
  global_vars: Dict = None,
2136
2139
  logger: Logger = None,
2137
2140
  logging_tags: Dict = None,
@@ -2194,11 +2197,45 @@ class CallbackBlock(Block):
2194
2197
  callback_functions_legacy = success_functions
2195
2198
  callback_status = CallbackStatus.SUCCESS
2196
2199
 
2200
+ # Fetch input variables
2201
+ input_vars, kwargs_vars, upstream_block_uuids = self.fetch_input_variables(
2202
+ None,
2203
+ execution_partition,
2204
+ global_vars,
2205
+ dynamic_block_index=dynamic_block_index,
2206
+ dynamic_upstream_block_uuids=dynamic_upstream_block_uuids,
2207
+ )
2208
+
2209
+ # Copied logic from the method self.execute_block
2210
+ outputs_from_input_vars = {}
2211
+ upstream_block_uuids_length = len(upstream_block_uuids)
2212
+ for idx, input_var in enumerate(input_vars):
2213
+ if idx < upstream_block_uuids_length:
2214
+ upstream_block_uuid = upstream_block_uuids[idx]
2215
+ outputs_from_input_vars[upstream_block_uuid] = input_var
2216
+ outputs_from_input_vars[f'df_{idx + 1}'] = input_var
2217
+
2218
+ global_vars_copy = global_vars.copy()
2219
+ for kwargs_var in kwargs_vars:
2220
+ global_vars_copy.update(kwargs_var)
2221
+
2197
2222
  for callback_function in callback_functions_legacy:
2198
- callback_function(**global_vars)
2223
+ callback_function(**global_vars_copy)
2199
2224
 
2200
2225
  for callback_function in callback_functions:
2201
- callback_function(callback_status, **global_vars)
2226
+ try:
2227
+ # As of version 0.8.81, callback functions have access to the parent block’s
2228
+ # data output.
2229
+ callback_function(callback_status, *input_vars, **global_vars_copy)
2230
+ except TypeError:
2231
+ # This try except block will make the above code backwards compatible in case
2232
+ # a user has already written callback functions with only keyword arguments.
2233
+ callback_function(
2234
+ callback_status,
2235
+ **merge_dict(global_vars_copy, dict(
2236
+ __input=outputs_from_input_vars,
2237
+ )),
2238
+ )
2202
2239
 
2203
2240
  def update_content(self, content, widget=False):
2204
2241
  if not self.file.exists():
@@ -204,11 +204,14 @@ class IntegrationPipeline(Pipeline):
204
204
  stderr = e.stderr.decode('utf-8').split('\n')
205
205
 
206
206
  json_object = {}
207
+ error = ''
207
208
  for line in stderr:
208
209
  if line.startswith('ERROR'):
209
- json_object = next(extract_json_objects(line))
210
-
211
- error = dig(json_object, 'tags.error')
210
+ try:
211
+ json_object = next(extract_json_objects(line))
212
+ error = dig(json_object, 'tags.error')
213
+ except Exception:
214
+ error = line
212
215
  raise Exception(error)
213
216
 
214
217
  def preview_data(self, block_type: BlockType, streams: List[str] = None) -> List[str]:
@@ -274,11 +277,14 @@ class IntegrationPipeline(Pipeline):
274
277
  stderr = e.stderr.decode('utf-8').split('\n')
275
278
 
276
279
  json_object = {}
280
+ error = None
277
281
  for line in stderr:
278
282
  if line.startswith('ERROR'):
279
- json_object = next(extract_json_objects(line))
280
-
281
- error = dig(json_object, 'tags.error')
283
+ try:
284
+ json_object = next(extract_json_objects(line))
285
+ error = dig(json_object, 'tags.error')
286
+ except Exception:
287
+ error = line
282
288
  if not error:
283
289
  raise Exception('The sample data was not able to be loaded. Please check \
284
290
  if the stream still exists. If it does not, click the "View and \
@@ -366,17 +372,23 @@ class IntegrationPipeline(Pipeline):
366
372
  message = e.stderr.decode('utf-8')
367
373
  raise Exception(message)
368
374
 
369
- def streams(self, variables: Dict = {}) -> List[Dict]:
375
+ def streams(self, variables: Dict = None) -> List[Dict]:
376
+ if variables is None:
377
+ variables = {}
370
378
  return self.__catalog(variables)['streams']
371
379
 
372
- def __catalog(self, variables: Dict = {}) -> Dict:
380
+ def __catalog(self, variables: Dict = None) -> Dict:
381
+ if variables is None:
382
+ variables = {}
373
383
  return get_catalog(
374
384
  self.data_loader,
375
385
  self.__global_variables(variables),
376
386
  pipeline=self,
377
387
  )
378
388
 
379
- def __global_variables(self, variables: Dict = {}) -> Dict:
389
+ def __global_variables(self, variables: Dict = None) -> Dict:
390
+ if variables is None:
391
+ variables = {}
380
392
  d = get_global_variables(self.uuid) or dict()
381
393
  d.update(variables)
382
394
  return d
@@ -1,9 +1,11 @@
1
- from mage_ai.shared.parsers import encode_complex
1
+ import traceback
2
2
  from typing import Dict
3
+
3
4
  import dask.dataframe as dd
4
5
  import pandas as pd
5
6
  import simplejson
6
- import traceback
7
+
8
+ from mage_ai.shared.parsers import encode_complex
7
9
 
8
10
  MAX_PARTITION_BYTE_SIZE = 100 * 1024 * 1024
9
11
  JSON_SERIALIZABLE_COLUMN_TYPES = [
@@ -56,7 +58,7 @@ def deserialize_columns(row: pd.Series, column_types: Dict) -> pd.Series:
56
58
  continue
57
59
 
58
60
  val = row[column]
59
- if val is not None:
61
+ if val is not None and type(val) is str:
60
62
  row[column] = simplejson.loads(val)
61
63
 
62
64
  return row
@@ -1,17 +1,25 @@
1
+ import json
2
+ import os
3
+ import traceback
1
4
  from enum import Enum
2
- from mage_ai.data_cleaner.shared.utils import (
3
- is_geo_dataframe,
4
- is_spark_dataframe,
5
- )
5
+ from typing import Any, Dict, List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import polars as pl
10
+ from pandas.api.types import is_object_dtype
11
+ from pandas.core.indexes.range import RangeIndex
12
+
13
+ from mage_ai.data_cleaner.shared.utils import is_geo_dataframe, is_spark_dataframe
6
14
  from mage_ai.data_preparation.models.constants import (
7
15
  DATAFRAME_ANALYSIS_KEYS,
8
16
  DATAFRAME_SAMPLE_COUNT,
9
17
  DATAFRAME_SAMPLE_MAX_COLUMNS,
10
18
  VARIABLE_DIR,
11
19
  )
12
- from mage_ai.data_preparation.models.utils import (
20
+ from mage_ai.data_preparation.models.utils import ( # dask_from_pandas,
21
+ STRING_SERIALIZABLE_COLUMN_TYPES,
13
22
  apply_transform_pandas,
14
- # dask_from_pandas,
15
23
  cast_column_types,
16
24
  deserialize_columns,
17
25
  serialize_columns,
@@ -20,14 +28,6 @@ from mage_ai.data_preparation.storage.base_storage import BaseStorage
20
28
  from mage_ai.data_preparation.storage.local_storage import LocalStorage
21
29
  from mage_ai.shared.parsers import sample_output
22
30
  from mage_ai.shared.utils import clean_name
23
- from pandas.api.types import is_object_dtype
24
- from typing import Any, Dict, List
25
- import json
26
- import numpy as np
27
- import os
28
- import pandas as pd
29
- import polars as pl
30
- import traceback
31
31
 
32
32
  DATAFRAME_COLUMN_TYPES_FILE = 'data_column_types.json'
33
33
  DATAFRAME_PARQUET_FILE = 'data.parquet'
@@ -54,11 +54,14 @@ class Variable:
54
54
  block_uuid: str,
55
55
  partition: str = None,
56
56
  spark=None,
57
- storage: BaseStorage = LocalStorage(),
57
+ storage: BaseStorage = None,
58
58
  variable_type: VariableType = None
59
59
  ) -> None:
60
60
  self.uuid = uuid
61
- self.storage = storage
61
+ if storage is None:
62
+ self.storage = LocalStorage()
63
+ else:
64
+ self.storage = storage
62
65
  # if not self.storage.path_exists(pipeline_path):
63
66
  # raise Exception(f'Pipeline path {pipeline_path} does not exist.')
64
67
  self.pipeline_path = pipeline_path
@@ -269,7 +272,9 @@ class Variable:
269
272
  self.storage.remove(file_path)
270
273
  self.storage.remove_dir(self.variable_path)
271
274
 
272
- def __read_json(self, default_value={}, sample: bool = False) -> Dict:
275
+ def __read_json(self, default_value: Dict = None, sample: bool = False) -> Dict:
276
+ if default_value is None:
277
+ default_value = {}
273
278
  # For backward compatibility
274
279
  old_file_path = os.path.join(self.variable_dir_path, f'{self.uuid}.json')
275
280
  file_path = os.path.join(self.variable_path, JSON_FILE)
@@ -291,7 +296,9 @@ class Variable:
291
296
  data = sample_output(data)[0]
292
297
  return data
293
298
 
294
- async def __read_json_async(self, default_value={}, sample: bool = False) -> Dict:
299
+ async def __read_json_async(self, default_value: Dict = None, sample: bool = False) -> Dict:
300
+ if default_value is None:
301
+ default_value = {}
295
302
  # For backward compatibility
296
303
  old_file_path = os.path.join(self.variable_dir_path, f'{self.uuid}.json')
297
304
  file_path = os.path.join(self.variable_path, JSON_FILE)
@@ -352,7 +359,12 @@ class Variable:
352
359
  df = df.iloc[:sample_count]
353
360
  return df
354
361
 
355
- def __read_parquet(self, sample: bool = False, sample_count: int = None) -> pd.DataFrame:
362
+ def __read_parquet(
363
+ self,
364
+ sample: bool = False,
365
+ sample_count: int = None,
366
+ raise_exception: bool = False,
367
+ ) -> pd.DataFrame:
356
368
  file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE)
357
369
  sample_file_path = os.path.join(self.variable_path, DATAFRAME_PARQUET_SAMPLE_FILE)
358
370
 
@@ -361,12 +373,16 @@ class Variable:
361
373
  try:
362
374
  df = self.storage.read_parquet(sample_file_path, engine='pyarrow')
363
375
  read_sample_success = True
364
- except Exception:
376
+ except Exception as e:
377
+ if raise_exception:
378
+ raise e
365
379
  pass
366
380
  if not read_sample_success:
367
381
  try:
368
382
  df = self.storage.read_parquet(file_path, engine='pyarrow')
369
- except Exception:
383
+ except Exception as e:
384
+ if raise_exception:
385
+ raise e
370
386
  df = pd.DataFrame()
371
387
  if sample:
372
388
  sample_count = sample_count or DATAFRAME_SAMPLE_COUNT
@@ -415,10 +431,14 @@ class Variable:
415
431
  series_non_null = df_output[c].dropna()
416
432
  if len(series_non_null) > 0:
417
433
  coltype = type(series_non_null.iloc[0])
418
-
419
434
  if is_object_dtype(series_non_null.dtype):
435
+ if coltype.__name__ in STRING_SERIALIZABLE_COLUMN_TYPES:
436
+ cast_coltype = str
437
+ else:
438
+ cast_coltype = coltype
420
439
  try:
421
- df_output[c] = series_non_null.astype(coltype)
440
+ df_output[c] = series_non_null.astype(cast_coltype)
441
+ coltype = str
422
442
  except Exception:
423
443
  # Fall back to convert to string
424
444
  # df_output[c] = series_non_null.astype(str)
@@ -432,6 +452,22 @@ class Variable:
432
452
  column_types[c] = type(series_non_null.iloc[0].item()).__name__
433
453
 
434
454
  self.storage.makedirs(self.variable_path, exist_ok=True)
455
+ with open(os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE), 'w') as f:
456
+ f.write(json.dumps(column_types))
457
+
458
+ # Try using Polars to write the dataframe to improve performance
459
+ if type(df_output.index) is RangeIndex and df_output.index.start == 0 \
460
+ and df_output.index.stop == df_output.shape[0] and df_output.index.step == 1:
461
+ # Polars ignores any index
462
+ try:
463
+ pl_df = pl.from_pandas(df_output)
464
+ self.__write_polars_dataframe(pl_df)
465
+ # Test read dataframe from parquet
466
+ self. __read_parquet(sample=True, raise_exception=True)
467
+
468
+ return
469
+ except Exception:
470
+ pass
435
471
 
436
472
  # ddf = dask_from_pandas(df_output)
437
473
  df_output_serialized = apply_transform_pandas(
@@ -444,9 +480,6 @@ class Variable:
444
480
  os.path.join(self.variable_path, DATAFRAME_PARQUET_FILE),
445
481
  )
446
482
 
447
- with open(os.path.join(self.variable_path, DATAFRAME_COLUMN_TYPES_FILE), 'w') as f:
448
- f.write(json.dumps(column_types))
449
-
450
483
  try:
451
484
  df_sample_output = df_output_serialized.iloc[
452
485
  :DATAFRAME_SAMPLE_COUNT,
@@ -1,10 +1,13 @@
1
+ import os
2
+ import traceback
1
3
  from typing import Dict
4
+
5
+ import yaml
6
+
2
7
  from mage_ai.data_preparation.models.constants import PREFERENCES_FILE
3
8
  from mage_ai.data_preparation.repo_manager import get_repo_path
4
9
  from mage_ai.orchestration.db.models.oauth import User
5
- import os
6
- import traceback
7
- import yaml
10
+ from mage_ai.shared.hash import merge_dict
8
11
 
9
12
  # Git environment variables
10
13
  GIT_REPO_LINK_VAR = 'GIT_REPO_LINK'
@@ -27,19 +30,20 @@ class Preferences:
27
30
  self.preferences_file_path = \
28
31
  os.path.join(self.repo_path, PREFERENCES_FILE)
29
32
  self.user = user
30
- preferences = dict()
33
+ project_preferences = dict()
31
34
  try:
32
- if user:
33
- preferences = user.preferences or {}
35
+ if user and user.preferences and user.git_settings is None:
36
+ project_preferences = user.preferences
34
37
  elif config_dict:
35
- preferences = config_dict
38
+ project_preferences = config_dict
36
39
  elif os.path.exists(self.preferences_file_path):
37
40
  with open(self.preferences_file_path) as f:
38
- preferences = yaml.full_load(f.read()) or {}
41
+ project_preferences = yaml.full_load(f.read()) or {}
39
42
  except Exception:
40
43
  traceback.print_exc()
41
44
  pass
42
45
 
46
+ # Git settings
43
47
  if os.getenv(GIT_REPO_LINK_VAR):
44
48
  self.sync_config = dict(
45
49
  remote_repo_link=os.getenv(GIT_REPO_LINK_VAR),
@@ -51,19 +55,23 @@ class Preferences:
51
55
  sync_on_pipeline_run=bool(int(os.getenv(GIT_SYNC_ON_PIPELINE_RUN_TYPE) or 0)),
52
56
  )
53
57
  else:
54
- self.sync_config = preferences.get('sync_config', dict())
58
+ project_sync_config = project_preferences.get('sync_config', dict())
59
+ if user:
60
+ user_git_settings = user.git_settings or {}
61
+ self.sync_config = merge_dict(project_sync_config, user_git_settings)
62
+ else:
63
+ self.sync_config = project_sync_config
55
64
 
56
- def has_valid_git_config(self) -> bool:
57
- return 'remote_repo_link' in self.sync_config and 'repo_path' in self.sync_config
65
+ def is_git_integration_enabled(self) -> bool:
66
+ return 'remote_repo_link' in self.sync_config and \
67
+ 'repo_path' in self.sync_config and \
68
+ self.sync_config.get('branch') is None
58
69
 
59
70
  def update_preferences(self, updates: Dict):
60
71
  preferences = self.to_dict()
61
72
  preferences.update(updates)
62
- if self.user:
63
- self.user.update(preferences=preferences)
64
- else:
65
- with open(self.preferences_file_path, 'w') as f:
66
- yaml.dump(preferences, f)
73
+ with open(self.preferences_file_path, 'w') as f:
74
+ yaml.dump(preferences, f)
67
75
 
68
76
  def to_dict(self) -> Dict:
69
77
  return dict(
@@ -1,7 +1,9 @@
1
+ import inspect
2
+ import os
1
3
  from dataclasses import dataclass
2
4
  from enum import Enum
5
+
3
6
  from mage_ai.shared.config import BaseConfig
4
- import os
5
7
 
6
8
  GIT_ACCESS_TOKEN_SECRET_NAME = 'mage_git_access_token'
7
9
  GIT_SSH_PRIVATE_KEY_SECRET_NAME = 'mage_git_ssh_private_key_b64'
@@ -17,13 +19,30 @@ class AuthType(str, Enum):
17
19
  class GitConfig(BaseConfig):
18
20
  remote_repo_link: str
19
21
  repo_path: str = os.getcwd()
20
- username: str = ''
21
- email: str = ''
22
22
  branch: str = 'main'
23
23
  sync_on_pipeline_run: bool = False
24
+ auth_type: AuthType = AuthType.SSH
25
+ # User settings moved to UserGitConfig, these will be used for Git syncs
26
+ username: str = ''
27
+ email: str = ''
24
28
  ssh_private_key_secret_name: str = GIT_SSH_PRIVATE_KEY_SECRET_NAME
25
29
  ssh_public_key_secret_name: str = GIT_SSH_PUBLIC_KEY_SECRET_NAME
26
- auth_type: AuthType = AuthType.SSH
27
30
  access_token_secret_name: str = GIT_ACCESS_TOKEN_SECRET_NAME
28
31
  # This is not necessary anymore, but leaving it for backwards compatibility
29
32
  type: str = 'git'
33
+
34
+
35
+ @dataclass
36
+ class UserGitConfig(BaseConfig):
37
+ username: str = ''
38
+ email: str = ''
39
+ ssh_private_key_secret_name: str = GIT_SSH_PRIVATE_KEY_SECRET_NAME
40
+ ssh_public_key_secret_name: str = GIT_SSH_PUBLIC_KEY_SECRET_NAME
41
+ access_token_secret_name: str = GIT_ACCESS_TOKEN_SECRET_NAME
42
+
43
+ @classmethod
44
+ def from_dict(cls, config):
45
+ return cls(**{
46
+ k: v for k, v in config.items()
47
+ if k in inspect.signature(cls).parameters
48
+ })
@@ -16,3 +16,10 @@ class GitSync(BaseSync):
16
16
  verbose=True,
17
17
  ):
18
18
  self.git_manager.reset(self.branch)
19
+
20
+ def reset(self):
21
+ with VerboseFunctionExec(
22
+ f'Attempting to clone from remote repo {self.remote_repo_link}',
23
+ verbose=True,
24
+ ):
25
+ self.git_manager.clone()