analytic-workspace-client 1.1.1__tar.gz → 1.29.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/PKG-INFO +3 -2
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/README.md +1 -1
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/setup.py +6 -5
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/analytic_workspace_client.egg-info/PKG-INFO +3 -2
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/analytic_workspace_client.egg-info/SOURCES.txt +17 -1
- analytic_workspace_client-1.29.0/src/analytic_workspace_client.egg-info/requires.txt +12 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/analytic_workspace_client.egg-info/top_level.txt +1 -0
- analytic_workspace_client-1.29.0/src/aw_client/core/bundle.py +39 -0
- analytic_workspace_client-1.29.0/src/aw_client/core/compiler.py +165 -0
- analytic_workspace_client-1.29.0/src/aw_client/core/model_vault.py +14 -0
- analytic_workspace_client-1.29.0/src/aw_client/core/spark.py +15 -0
- analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/__init__.py +4 -0
- analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/application.py +49 -0
- analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/dto.py +85 -0
- analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/runtime.py +202 -0
- analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/test_data.py +17 -0
- analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/tools.py +99 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/exceptions.py +5 -1
- analytic_workspace_client-1.29.0/src/aw_client/model_dev/__init__.py +0 -0
- analytic_workspace_client-1.29.0/src/aw_client/models/__init__.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/session.py +41 -3
- analytic_workspace_client-1.29.0/src/aw_client/tools.py +36 -0
- analytic_workspace_client-1.29.0/src/aw_etl/__init__.py +0 -0
- analytic_workspace_client-1.29.0/src/aw_etl/compiler.py +17 -0
- analytic_workspace_client-1.29.0/src/aw_etl/etl_blocks.py +39 -0
- analytic_workspace_client-1.29.0/src/aw_etl/models.py +22 -0
- analytic_workspace_client-1.1.1/src/analytic_workspace_client.egg-info/requires.txt +0 -8
- analytic_workspace_client-1.1.1/src/aw_client/tools.py +0 -16
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/pyproject.toml +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/setup.cfg +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/analytic_workspace_client.egg-info/dependency_links.txt +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/__init__.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/cache.py +0 -0
- {analytic_workspace_client-1.1.1/src/aw_client/model_dev → analytic_workspace_client-1.29.0/src/aw_client/core}/__init__.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/__init__.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/base.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/v0.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/v1.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/v2.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/domain.py +0 -0
- /analytic_workspace_client-1.1.1/src/aw_client/models/__init__.py → /analytic_workspace_client-1.29.0/src/aw_client/etl_blocks/services.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/model_dev/application.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/model_dev/cache.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/model_dev/runner.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/model_dev/virtual_objects.py +0 -0
- {analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/models/model_schema.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: analytic_workspace_client
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.29.0
|
|
4
4
|
Summary: Библиотека для подключения к Analytic Workspace
|
|
5
5
|
Home-page: https://analyticworkspace.ru/
|
|
6
6
|
Author: Analytic Workspace
|
|
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.8,<4
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
Provides-Extra: dev
|
|
14
|
+
Provides-Extra: ml
|
|
14
15
|
|
|
15
16
|
# Библиотека для Analytic Workspace
|
|
16
17
|
|
|
@@ -34,7 +35,7 @@ session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
|
34
35
|
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
35
36
|
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
36
37
|
|
|
37
|
-
df = session.load() # df: pandas.DataFrame
|
|
38
|
+
df = session.load(model_id=123) # df: pandas.DataFrame
|
|
38
39
|
|
|
39
40
|
display(df)
|
|
40
41
|
```
|
|
@@ -20,7 +20,7 @@ session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
|
20
20
|
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
21
21
|
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
22
22
|
|
|
23
|
-
df = session.load() # df: pandas.DataFrame
|
|
23
|
+
df = session.load(model_id=123) # df: pandas.DataFrame
|
|
24
24
|
|
|
25
25
|
display(df)
|
|
26
26
|
```
|
|
@@ -10,7 +10,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name='analytic_workspace_client',
|
|
13
|
-
version='1.
|
|
13
|
+
version='1.29.0',
|
|
14
14
|
|
|
15
15
|
description='Библиотека для подключения к Analytic Workspace',
|
|
16
16
|
long_description=long_description,
|
|
@@ -33,15 +33,16 @@ setup(
|
|
|
33
33
|
packages=find_packages(where='src'),
|
|
34
34
|
|
|
35
35
|
install_requires=[
|
|
36
|
-
'python-dotenv>=0
|
|
37
|
-
'httpx>=0.
|
|
38
|
-
'pandas
|
|
36
|
+
'python-dotenv>=1.0,<1.1',
|
|
37
|
+
'httpx>=0.25,<1.0',
|
|
38
|
+
'pandas',
|
|
39
39
|
'pydantic>=1.10,<2.0',
|
|
40
40
|
'colorama>=0.4,<0.5'
|
|
41
41
|
],
|
|
42
42
|
|
|
43
43
|
extras_require={
|
|
44
|
-
'dev': 'pyspark==3.
|
|
44
|
+
'dev': ['pyspark==3.5.0', 'pytest>=8.2,<8.3'],
|
|
45
|
+
'ml': ['mlflow>=2.14,<2.15']
|
|
45
46
|
},
|
|
46
47
|
|
|
47
48
|
setup_requires=['wheel'],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: analytic-workspace-client
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.29.0
|
|
4
4
|
Summary: Библиотека для подключения к Analytic Workspace
|
|
5
5
|
Home-page: https://analyticworkspace.ru/
|
|
6
6
|
Author: Analytic Workspace
|
|
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.8,<4
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
Provides-Extra: dev
|
|
14
|
+
Provides-Extra: ml
|
|
14
15
|
|
|
15
16
|
# Библиотека для Analytic Workspace
|
|
16
17
|
|
|
@@ -34,7 +35,7 @@ session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
|
34
35
|
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
35
36
|
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
36
37
|
|
|
37
|
-
df = session.load() # df: pandas.DataFrame
|
|
38
|
+
df = session.load(model_id=123) # df: pandas.DataFrame
|
|
38
39
|
|
|
39
40
|
display(df)
|
|
40
41
|
```
|
|
@@ -12,15 +12,31 @@ src/aw_client/domain.py
|
|
|
12
12
|
src/aw_client/exceptions.py
|
|
13
13
|
src/aw_client/session.py
|
|
14
14
|
src/aw_client/tools.py
|
|
15
|
+
src/aw_client/core/__init__.py
|
|
16
|
+
src/aw_client/core/bundle.py
|
|
17
|
+
src/aw_client/core/compiler.py
|
|
18
|
+
src/aw_client/core/model_vault.py
|
|
19
|
+
src/aw_client/core/spark.py
|
|
15
20
|
src/aw_client/data_master/__init__.py
|
|
16
21
|
src/aw_client/data_master/base.py
|
|
17
22
|
src/aw_client/data_master/v0.py
|
|
18
23
|
src/aw_client/data_master/v1.py
|
|
19
24
|
src/aw_client/data_master/v2.py
|
|
25
|
+
src/aw_client/etl_blocks/__init__.py
|
|
26
|
+
src/aw_client/etl_blocks/application.py
|
|
27
|
+
src/aw_client/etl_blocks/dto.py
|
|
28
|
+
src/aw_client/etl_blocks/runtime.py
|
|
29
|
+
src/aw_client/etl_blocks/services.py
|
|
30
|
+
src/aw_client/etl_blocks/test_data.py
|
|
31
|
+
src/aw_client/etl_blocks/tools.py
|
|
20
32
|
src/aw_client/model_dev/__init__.py
|
|
21
33
|
src/aw_client/model_dev/application.py
|
|
22
34
|
src/aw_client/model_dev/cache.py
|
|
23
35
|
src/aw_client/model_dev/runner.py
|
|
24
36
|
src/aw_client/model_dev/virtual_objects.py
|
|
25
37
|
src/aw_client/models/__init__.py
|
|
26
|
-
src/aw_client/models/model_schema.py
|
|
38
|
+
src/aw_client/models/model_schema.py
|
|
39
|
+
src/aw_etl/__init__.py
|
|
40
|
+
src/aw_etl/compiler.py
|
|
41
|
+
src/aw_etl/etl_blocks.py
|
|
42
|
+
src/aw_etl/models.py
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import List, Dict, Union, Optional, Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NamedObjectsBundle:
|
|
5
|
+
""" """
|
|
6
|
+
def __init__(self, objs: Dict[str, Any]):
|
|
7
|
+
self._obj_list: List[Any] = [df for _, df in objs.items()]
|
|
8
|
+
self._obj_named: Dict[str, Any] = objs
|
|
9
|
+
|
|
10
|
+
def first(self) -> Optional[Any]:
|
|
11
|
+
""" """
|
|
12
|
+
return self._obj_list[0] if self._obj_list else None
|
|
13
|
+
|
|
14
|
+
def as_list(self) -> List[Any]:
|
|
15
|
+
return self._obj_list
|
|
16
|
+
|
|
17
|
+
def as_named(self) -> Dict[str, Any]:
|
|
18
|
+
return self._obj_named
|
|
19
|
+
|
|
20
|
+
def __getitem__(self, item):
|
|
21
|
+
if isinstance(item, int):
|
|
22
|
+
# если item указано целым числом, то возвращаем по индексу
|
|
23
|
+
return self._obj_list[item]
|
|
24
|
+
|
|
25
|
+
if not item in self._obj_named:
|
|
26
|
+
raise Exception(f'Данные объекта с именем "{item}" не найдена')
|
|
27
|
+
return self._obj_named[item]
|
|
28
|
+
|
|
29
|
+
def __iter__(self):
|
|
30
|
+
return iter(self._obj_named.values())
|
|
31
|
+
|
|
32
|
+
def __bool__(self):
|
|
33
|
+
return len(self._obj_list) > 0
|
|
34
|
+
|
|
35
|
+
def __len__(self):
|
|
36
|
+
return len(self._obj_list)
|
|
37
|
+
|
|
38
|
+
def items(self):
|
|
39
|
+
return self._obj_named.items()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from typing import List, Tuple, Optional
|
|
2
|
+
|
|
3
|
+
import builtins
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CompiledModule:
|
|
7
|
+
""" """
|
|
8
|
+
def __init__(self, compiled_globals: dict):
|
|
9
|
+
self.compiled_globals = compiled_globals
|
|
10
|
+
|
|
11
|
+
def __contains__(self, item):
|
|
12
|
+
return item in self.compiled_globals
|
|
13
|
+
|
|
14
|
+
def __getattr__(self, item):
|
|
15
|
+
return self.compiled_globals[item]
|
|
16
|
+
|
|
17
|
+
def __getitem__(self, item):
|
|
18
|
+
return self.compiled_globals[item]
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def is_empty(self) -> bool:
|
|
22
|
+
""" Возвращает True, если в модуле нет кода (только одни комментарии, например) """
|
|
23
|
+
return len(set(self.compiled_globals) - {'__builtins__'}) == 0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ScriptCompiler:
|
|
27
|
+
""" """
|
|
28
|
+
|
|
29
|
+
class CompilerException(Exception):
|
|
30
|
+
""" """
|
|
31
|
+
|
|
32
|
+
# Список исключений
|
|
33
|
+
class Misconfigured(CompilerException):
|
|
34
|
+
""" """
|
|
35
|
+
|
|
36
|
+
class ForbiddenImport(CompilerException):
|
|
37
|
+
""" Исключение, которое возникает при наличии некорректных импортов в коде """
|
|
38
|
+
|
|
39
|
+
class CannotCompile(CompilerException):
|
|
40
|
+
""" Исключение, которое вызывается при невозможности компиляции кода """
|
|
41
|
+
|
|
42
|
+
# Перечисление режимов работы компилятора
|
|
43
|
+
MODE_ETL = 1
|
|
44
|
+
MODE_VIRTUAL_OBJECT_SCHEMA = 2
|
|
45
|
+
MODE_ETL_BLOCK = 3
|
|
46
|
+
|
|
47
|
+
def __init__(self, mode: Optional[int] = None):
|
|
48
|
+
self.mode = mode
|
|
49
|
+
|
|
50
|
+
def compile(self, source_code: str, mode: int = None) -> CompiledModule:
|
|
51
|
+
""" """
|
|
52
|
+
try:
|
|
53
|
+
byte_code = compile(source_code, filename='<string>', mode='exec')
|
|
54
|
+
except SyntaxError as e:
|
|
55
|
+
raise ScriptCompiler.CannotCompile(f'Ошибка компиляции: {e.msg} (line {e.lineno}): {e.text}')
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
compiled_globals = self._get_globals(mode=mode if mode is not None else self.mode)
|
|
59
|
+
exec(byte_code, compiled_globals)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
raise ScriptCompiler.CompilerException(f'Ошибка компиляции: {e}')
|
|
62
|
+
|
|
63
|
+
return CompiledModule(compiled_globals=compiled_globals)
|
|
64
|
+
|
|
65
|
+
def _get_globals(self, mode: int) -> dict:
|
|
66
|
+
""" """
|
|
67
|
+
if mode == ScriptCompiler.MODE_ETL:
|
|
68
|
+
safe_names, safe_modules = self._safe_globals_for_model_etl_script()
|
|
69
|
+
elif mode == ScriptCompiler.MODE_ETL_BLOCK:
|
|
70
|
+
safe_names, safe_modules = self._safe_globals_for_etl_block()
|
|
71
|
+
elif mode == ScriptCompiler.MODE_VIRTUAL_OBJECT_SCHEMA:
|
|
72
|
+
safe_names, safe_modules = self._safe_globals_for_virtual_object_schema_script()
|
|
73
|
+
else:
|
|
74
|
+
raise ScriptCompiler.Misconfigured(f'Указан некорректный режим компиляции {self.mode}')
|
|
75
|
+
|
|
76
|
+
def safe_import(name, globals=None, locals=None, fromlist=(), level=0):
|
|
77
|
+
""" """
|
|
78
|
+
modules = name.split('.')
|
|
79
|
+
if modules[0] not in safe_modules:
|
|
80
|
+
raise ScriptCompiler.ForbiddenImport(f'Импорт модуля {modules[0]} запрещен')
|
|
81
|
+
|
|
82
|
+
return __import__(name, globals, locals, fromlist, level)
|
|
83
|
+
|
|
84
|
+
safe_builtins = {}
|
|
85
|
+
for name in safe_names:
|
|
86
|
+
safe_builtins[name] = getattr(builtins, name)
|
|
87
|
+
|
|
88
|
+
safe_builtins['__import__'] = safe_import
|
|
89
|
+
|
|
90
|
+
return {'__builtins__': safe_builtins}
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _safe_globals_for_virtual_object_schema_script() -> Tuple[List[str], List[str]]:
|
|
94
|
+
""" """
|
|
95
|
+
safe_names = [
|
|
96
|
+
'None', 'False', 'True', 'bool', 'bytes', 'chr', 'complex', 'float',
|
|
97
|
+
'hex', 'id', 'int', 'str', 'getattr', 'setattr', 'delattr',
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
safe_modules = ['pyspark', 'aw_etl']
|
|
101
|
+
|
|
102
|
+
return safe_names, safe_modules
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _safe_globals_full() -> Tuple[List[str], List[str]]:
|
|
106
|
+
""" Возвращает полный набор разрешенных модулей """
|
|
107
|
+
safe_names = [
|
|
108
|
+
'None', 'False', 'True',
|
|
109
|
+
'abs', 'all', 'any', 'ascii',
|
|
110
|
+
'bin', 'bool', 'bytes', 'bytearray',
|
|
111
|
+
'callable', 'chr', 'classmethod', 'complex',
|
|
112
|
+
'delattr', 'dict', 'divmod',
|
|
113
|
+
'enumerate',
|
|
114
|
+
'float', 'filter', 'format', 'frozenset',
|
|
115
|
+
'getattr',
|
|
116
|
+
'hasattr', 'hash', 'hex',
|
|
117
|
+
'id', 'int', 'isinstance', 'issubclass', 'iter',
|
|
118
|
+
'len', 'list',
|
|
119
|
+
'map', 'max', 'min',
|
|
120
|
+
'next',
|
|
121
|
+
'object', 'oct', 'ord',
|
|
122
|
+
'pow', 'print', 'property',
|
|
123
|
+
'range', 'repr', 'reversed', 'round',
|
|
124
|
+
'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
|
|
125
|
+
'type', 'tuple',
|
|
126
|
+
'vars',
|
|
127
|
+
'zip',
|
|
128
|
+
'__build_class__', '__name__',
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'BufferError', 'BytesWarning',
|
|
132
|
+
'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'FutureWarning',
|
|
133
|
+
'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
|
|
134
|
+
'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError',
|
|
135
|
+
'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning',
|
|
136
|
+
'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError',
|
|
137
|
+
'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
|
|
138
|
+
'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning', 'ZeroDivisionError',
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
safe_modules = [
|
|
142
|
+
'pyspark', 'requests', 'pandas', 'numpy', 'aw_etl', 'pyparsing', 'pydantic',
|
|
143
|
+
|
|
144
|
+
'mlflow', 'prophet', 'statmodels', 'torch', 'sklearn', 'numpy', 'catboost',
|
|
145
|
+
|
|
146
|
+
'array', 'calendar', 'codecs', 'collections', 'copy', 'csv', 'dataclasses', 'datetime', '_strptime',
|
|
147
|
+
'decimal', 'enum', 'functools', 'hashlib', 'itertools', 'json', 'math', 'queue', 'random', 're',
|
|
148
|
+
'statistics', 'string', 'time', 'urllib', 'xml',
|
|
149
|
+
'zoneinfo', 'typing', 'uuid', 'logging',
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
return safe_names, safe_modules
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def _safe_globals_for_model_etl_script() -> Tuple[List[str], List[str]]:
|
|
156
|
+
""" """
|
|
157
|
+
return ScriptCompiler._safe_globals_full()
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def _safe_globals_for_etl_block() -> Tuple[List[str], List[str]]:
|
|
161
|
+
""" """
|
|
162
|
+
safe_names, safe_modules = ScriptCompiler._safe_globals_full()
|
|
163
|
+
safe_modules.extend(['sqlglot', 'inspect'])
|
|
164
|
+
|
|
165
|
+
return safe_names, safe_modules
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from typing import Optional, Dict, Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Vault:
|
|
5
|
+
""" """
|
|
6
|
+
def __init__(self, values: Optional[dict] = None):
|
|
7
|
+
self._values = values or {}
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def values(self) -> dict:
|
|
11
|
+
return self._values
|
|
12
|
+
|
|
13
|
+
def get(self, name: str) -> Optional[Any]:
|
|
14
|
+
return self._values.get(name, None)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
|
|
2
|
+
try:
|
|
3
|
+
from pyspark.sql import SparkSession
|
|
4
|
+
except ImportError:
|
|
5
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_spark_session():
|
|
9
|
+
""" """
|
|
10
|
+
return SparkSession.builder \
|
|
11
|
+
.master('local[*]') \
|
|
12
|
+
.config('spark.driver.host', '127.0.0.1') \
|
|
13
|
+
.config('spark.ui.enabled', 'false') \
|
|
14
|
+
.getOrCreate()
|
|
15
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from typing import Optional, Callable
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from pyspark.sql import SparkSession
|
|
5
|
+
except ImportError:
|
|
6
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
7
|
+
|
|
8
|
+
from aw_client.core.model_vault import Vault
|
|
9
|
+
from aw_client.core.compiler import CompiledModule
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ETLBlockApplication:
|
|
13
|
+
""" """
|
|
14
|
+
def __init__(self,
|
|
15
|
+
spark_builder: Callable,
|
|
16
|
+
run_mode: str,
|
|
17
|
+
vault: Vault,
|
|
18
|
+
model_module: Optional[CompiledModule] = None):
|
|
19
|
+
self._spark_builder = spark_builder
|
|
20
|
+
self._spark = None
|
|
21
|
+
self._run_model = run_mode
|
|
22
|
+
self._model_module = model_module
|
|
23
|
+
self._vault = vault
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def spark(self) -> SparkSession:
|
|
27
|
+
if self._spark is None:
|
|
28
|
+
self._spark = self._spark_builder()
|
|
29
|
+
return self._spark
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def is_spark_initialized(self) -> bool:
|
|
33
|
+
return self._spark is not None
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def model_module(self) -> Optional[CompiledModule]:
|
|
37
|
+
""" """
|
|
38
|
+
return self._model_module
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def vault(self) -> Vault:
|
|
42
|
+
""" """
|
|
43
|
+
return self._vault
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def run_mode(self) -> str:
|
|
47
|
+
""" """
|
|
48
|
+
return self.run_mode
|
|
49
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from typing import Optional, Any, List, Union
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ETLBlockParamType(str, Enum):
|
|
10
|
+
""" """
|
|
11
|
+
STRING = 'string'
|
|
12
|
+
TEXT = 'text'
|
|
13
|
+
PASSWORD = 'password'
|
|
14
|
+
SQL_TEXT = 'sql_text'
|
|
15
|
+
NUMBER = 'number'
|
|
16
|
+
FLOAT = 'float'
|
|
17
|
+
BOOL = 'bool'
|
|
18
|
+
DATE = 'date'
|
|
19
|
+
DATETIME = 'datetime'
|
|
20
|
+
SELECT = 'select'
|
|
21
|
+
ACTION = 'action'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ETLBlockParamGroupType(str, Enum):
|
|
25
|
+
""" """
|
|
26
|
+
GROUP = 'group'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ETLBlockParamActionType(str, Enum):
|
|
30
|
+
""" """
|
|
31
|
+
ACTION = 'action'
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
35
|
+
# Метаданные ETL блока
|
|
36
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
37
|
+
class ETLBlockParam(BaseModel):
|
|
38
|
+
""" Параметр ETL блока """
|
|
39
|
+
code: str
|
|
40
|
+
name: str
|
|
41
|
+
type: ETLBlockParamType
|
|
42
|
+
description: Optional[str] = None
|
|
43
|
+
|
|
44
|
+
required: bool
|
|
45
|
+
mult: bool
|
|
46
|
+
domain: Optional[Any] = None
|
|
47
|
+
extra: Optional[Any] = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ETLBlockParamGroup(BaseModel):
|
|
51
|
+
""" Группа параметров ETL блока """
|
|
52
|
+
code: str
|
|
53
|
+
name: str
|
|
54
|
+
|
|
55
|
+
type: ETLBlockParamGroupType
|
|
56
|
+
description: Optional[str] = None
|
|
57
|
+
view_options: Optional[dict] = None
|
|
58
|
+
mult: bool
|
|
59
|
+
params: List[Union['ETLBlockParam', 'ETLBlockParamAction', 'ETLBlockParamAction']] = []
|
|
60
|
+
extra: Optional[Any] = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ETLBlockParamAction(BaseModel):
|
|
64
|
+
code: str
|
|
65
|
+
name: str
|
|
66
|
+
type: ETLBlockParamActionType
|
|
67
|
+
description: Optional[str] = None
|
|
68
|
+
action: str
|
|
69
|
+
extra: Optional[Any] = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class ETLBlockMeta(BaseModel):
|
|
73
|
+
""" Метаданные ETL-блока """
|
|
74
|
+
uid: str # уникальный идентификатор блока
|
|
75
|
+
name: str # название блока
|
|
76
|
+
version: str # версия блока
|
|
77
|
+
description: str # описание блока
|
|
78
|
+
author: str # автор блока
|
|
79
|
+
updated_at: datetime.datetime # дата и время последнего обновления
|
|
80
|
+
params: List[Union[ETLBlockParam, ETLBlockParamAction, ETLBlockParamGroup]]
|
|
81
|
+
engine_requires: Optional[List[str]] = []
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def verbose_name(self):
|
|
85
|
+
return f'{self.name} v{self.version}'
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from typing import Dict, Optional, Any, Union, List, Literal
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
import datetime
|
|
5
|
+
import inspect
|
|
6
|
+
|
|
7
|
+
from aw_client.core.compiler import ScriptCompiler
|
|
8
|
+
from aw_client.core.model_vault import Vault
|
|
9
|
+
from aw_client.core.spark import build_spark_session
|
|
10
|
+
from aw_client.core.bundle import NamedObjectsBundle
|
|
11
|
+
from aw_client.models.model_schema import ModelObject, ModelObjectField
|
|
12
|
+
from .application import ETLBlockApplication
|
|
13
|
+
from .test_data import ModelObjectTestData
|
|
14
|
+
from .tools import build_dataframe, build_model_object, build_spark_schema
|
|
15
|
+
from .dto import ETLBlockMeta
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from pyspark.sql import SparkSession, DataFrame
|
|
20
|
+
from pyspark.sql.types import DataType, StringType, DoubleType, TimestampType, LongType, BooleanType, \
|
|
21
|
+
ByteType, ShortType, IntegerType, DecimalType, FloatType, DateType, StructType, StructField
|
|
22
|
+
except ImportError:
|
|
23
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_etl_block_meta(block_path: Path) -> ETLBlockMeta:
|
|
28
|
+
""" """
|
|
29
|
+
block_meta_path = block_path / 'block_meta.json' if block_path.is_dir() else block_path
|
|
30
|
+
|
|
31
|
+
if not block_meta_path.exists():
|
|
32
|
+
raise Exception(f'Файл с метаданным блока не найден: {block_meta_path}')
|
|
33
|
+
|
|
34
|
+
with open(block_meta_path, 'rt') as f:
|
|
35
|
+
return ETLBlockMeta.parse_raw(f.read())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_etl_block_schema(block_path: Path,
|
|
39
|
+
test_data: Union[ModelObjectTestData, List[ModelObjectTestData]],
|
|
40
|
+
params: Optional[Dict] = None,
|
|
41
|
+
run_mode: Optional[Literal['']] = None,
|
|
42
|
+
vault: Optional[Vault] = None,
|
|
43
|
+
model_script_code: Optional[str] = None) -> StructType:
|
|
44
|
+
"""
|
|
45
|
+
Args:
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
block_code_path = block_path / 'block_code.py' if block_path.is_dir() else block_path
|
|
49
|
+
|
|
50
|
+
if not block_code_path.exists():
|
|
51
|
+
raise Exception(f'Файл с исходным кодом блока не найден: {block_code_path}')
|
|
52
|
+
|
|
53
|
+
with open(block_code_path, 'rt') as f:
|
|
54
|
+
block_code = f.read()
|
|
55
|
+
|
|
56
|
+
# Компиляция кода блока
|
|
57
|
+
try:
|
|
58
|
+
block_module = ScriptCompiler().compile(source_code=block_code, mode=ScriptCompiler.MODE_ETL_BLOCK)
|
|
59
|
+
except ScriptCompiler.CannotCompile as e:
|
|
60
|
+
raise Exception(f'Ошибка компиляции исходного кода блока: {e}')
|
|
61
|
+
|
|
62
|
+
# Компиляция кода модели
|
|
63
|
+
if model_script_code:
|
|
64
|
+
try:
|
|
65
|
+
model_module = ScriptCompiler().compile(source_code=model_script_code, mode=ScriptCompiler.MODE_ETL)
|
|
66
|
+
except ScriptCompiler.CannotCompile as e:
|
|
67
|
+
raise Exception(f'Ошибка компиляции исходного кода скрипта модели: {e}')
|
|
68
|
+
else:
|
|
69
|
+
model_module = None
|
|
70
|
+
|
|
71
|
+
spark = build_spark_session()
|
|
72
|
+
|
|
73
|
+
# Дочерние датафреймы
|
|
74
|
+
dataframes = OrderedDict()
|
|
75
|
+
for td in (test_data if isinstance(test_data, list) else [test_data]):
|
|
76
|
+
df = build_dataframe(spark, td)
|
|
77
|
+
if not dataframes:
|
|
78
|
+
dataframes['child'] = df
|
|
79
|
+
|
|
80
|
+
upstream_dataframes = NamedObjectsBundle(dataframes)
|
|
81
|
+
|
|
82
|
+
# Дочерние схемы
|
|
83
|
+
schemas = OrderedDict()
|
|
84
|
+
for td in (test_data if isinstance(test_data, list) else [test_data]):
|
|
85
|
+
schema = build_spark_schema(td)
|
|
86
|
+
if not schemas:
|
|
87
|
+
schemas['child'] = schema
|
|
88
|
+
schemas[td.model_name] = schema
|
|
89
|
+
|
|
90
|
+
upstream_schemas = NamedObjectsBundle(schemas)
|
|
91
|
+
|
|
92
|
+
block_schema_parameters = inspect.signature(block_module['block_schema']).parameters
|
|
93
|
+
|
|
94
|
+
app = ETLBlockApplication(
|
|
95
|
+
spark_builder=build_spark_session,
|
|
96
|
+
run_mode=run_mode or 'full',
|
|
97
|
+
vault=vault or Vault(),
|
|
98
|
+
model_module=model_module
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Определение параметров для передачи
|
|
102
|
+
block_schema_kwargs = {}
|
|
103
|
+
if 'params' in block_schema_parameters:
|
|
104
|
+
block_schema_kwargs['params'] = params
|
|
105
|
+
if 'app' in block_schema_parameters:
|
|
106
|
+
block_schema_kwargs['app'] = app
|
|
107
|
+
if 'model_object' in block_schema_parameters:
|
|
108
|
+
block_schema_kwargs['model_object'] = build_model_object(test_data[0] if isinstance(test_data, list) else test_data)
|
|
109
|
+
if 'schema' in block_schema_parameters:
|
|
110
|
+
block_schema_kwargs['schema'] = upstream_schemas.first()
|
|
111
|
+
if 'schemas' in block_schema_parameters:
|
|
112
|
+
block_schema_kwargs['schemas'] = upstream_schemas
|
|
113
|
+
if 'upstream_schema' in block_schema_parameters:
|
|
114
|
+
block_schema_kwargs['upstream_schema'] = upstream_schemas.first()
|
|
115
|
+
if 'upstream_schemas' in block_schema_parameters:
|
|
116
|
+
block_schema_kwargs['upstream_schemas'] = upstream_schemas
|
|
117
|
+
if 'df' in block_schema_parameters:
|
|
118
|
+
block_schema_kwargs['df'] = upstream_dataframes.first()
|
|
119
|
+
if 'dfs' in block_schema_parameters:
|
|
120
|
+
block_schema_kwargs['dfs'] = upstream_dataframes
|
|
121
|
+
if 'upstream_dataframe' in block_schema_parameters:
|
|
122
|
+
block_schema_kwargs['upstream_dataframe'] = upstream_dataframes.first()
|
|
123
|
+
if 'upstream_dataframes' in block_schema_parameters:
|
|
124
|
+
block_schema_kwargs['upstream_dataframes'] = upstream_dataframes
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
return block_module['block_schema'](**block_schema_kwargs)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_etl_block_data(block_path: Path,
|
|
131
|
+
test_data: Union[ModelObjectTestData, List[ModelObjectTestData]],
|
|
132
|
+
params: Optional[Dict] = None,
|
|
133
|
+
run_mode: Optional[Literal['']] = None,
|
|
134
|
+
vault: Optional[Vault] = None,
|
|
135
|
+
model_script_code: Optional[str] = None) -> StructType:
|
|
136
|
+
"""
|
|
137
|
+
Args:
|
|
138
|
+
|
|
139
|
+
"""
|
|
140
|
+
block_code_path = block_path / 'block_code.py' if block_path.is_dir() else block_path
|
|
141
|
+
|
|
142
|
+
if not block_code_path.exists():
|
|
143
|
+
raise Exception(f'Файл с исходным кодом блока не найден: {block_code_path}')
|
|
144
|
+
|
|
145
|
+
with open(block_code_path, 'rt') as f:
|
|
146
|
+
block_code = f.read()
|
|
147
|
+
|
|
148
|
+
# Компиляция кода блока
|
|
149
|
+
try:
|
|
150
|
+
block_module = ScriptCompiler().compile(source_code=block_code, mode=ScriptCompiler.MODE_ETL_BLOCK)
|
|
151
|
+
except ScriptCompiler.CannotCompile as e:
|
|
152
|
+
raise Exception(f'Ошибка компиляции исходного кода блока: {e}')
|
|
153
|
+
|
|
154
|
+
# Компиляция кода модели
|
|
155
|
+
if model_script_code:
|
|
156
|
+
try:
|
|
157
|
+
model_module = ScriptCompiler().compile(source_code=model_script_code, mode=ScriptCompiler.MODE_ETL)
|
|
158
|
+
except ScriptCompiler.CannotCompile as e:
|
|
159
|
+
raise Exception(f'Ошибка компиляции исходного кода скрипта модели: {e}')
|
|
160
|
+
else:
|
|
161
|
+
model_module = None
|
|
162
|
+
|
|
163
|
+
spark = build_spark_session()
|
|
164
|
+
|
|
165
|
+
# Дочерние датафреймы
|
|
166
|
+
dataframes = OrderedDict()
|
|
167
|
+
for td in (test_data if isinstance(test_data, list) else [test_data]):
|
|
168
|
+
df = build_dataframe(spark, td)
|
|
169
|
+
if not dataframes:
|
|
170
|
+
dataframes['child'] = df
|
|
171
|
+
dataframes[td.model_name] = df
|
|
172
|
+
|
|
173
|
+
upstream_dataframes = NamedObjectsBundle(dataframes)
|
|
174
|
+
|
|
175
|
+
block_data_parameters = inspect.signature(block_module['block_data']).parameters
|
|
176
|
+
|
|
177
|
+
app = ETLBlockApplication(
|
|
178
|
+
spark_builder=build_spark_session,
|
|
179
|
+
run_mode=run_mode or 'full',
|
|
180
|
+
vault=vault or Vault(),
|
|
181
|
+
model_module=model_module
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Определение параметров для передачи
|
|
185
|
+
block_schema_kwargs = {}
|
|
186
|
+
if 'params' in block_data_parameters:
|
|
187
|
+
block_schema_kwargs['params'] = params
|
|
188
|
+
if 'app' in block_data_parameters:
|
|
189
|
+
block_schema_kwargs['app'] = app
|
|
190
|
+
if 'model_object' in block_data_parameters:
|
|
191
|
+
block_schema_kwargs['model_object'] = build_model_object(test_data[0] if isinstance(test_data, list) else test_data)
|
|
192
|
+
if 'df' in block_data_parameters:
|
|
193
|
+
block_schema_kwargs['df'] = upstream_dataframes.first()
|
|
194
|
+
if 'dfs' in block_data_parameters:
|
|
195
|
+
block_schema_kwargs['dfs'] = upstream_dataframes
|
|
196
|
+
if 'upstream_dataframe' in block_data_parameters:
|
|
197
|
+
block_schema_kwargs['upstream_dataframe'] = upstream_dataframes.first()
|
|
198
|
+
if 'upstream_dataframes' in block_data_parameters:
|
|
199
|
+
block_schema_kwargs['upstream_dataframes'] = upstream_dataframes
|
|
200
|
+
|
|
201
|
+
return block_module['block_data'](**block_schema_kwargs)
|
|
202
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import List, TypedDict, Optional
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ModelObjectSchemaField(TypedDict):
|
|
7
|
+
""" """
|
|
8
|
+
model_name: str
|
|
9
|
+
simple_type: str
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ModelObjectTestData:
|
|
14
|
+
""" """
|
|
15
|
+
model_name: str
|
|
16
|
+
rows: List[dict] = field(default_factory=list)
|
|
17
|
+
schema: Optional[List[ModelObjectSchemaField]] = None
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from aw_client.models.model_schema import ModelObject, ModelObjectField
|
|
6
|
+
from .test_data import ModelObjectTestData
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from pyspark.sql import SparkSession, DataFrame
|
|
12
|
+
from pyspark.sql.types import DataType, StringType, DoubleType, TimestampType, LongType, BooleanType, \
|
|
13
|
+
ByteType, ShortType, IntegerType, DecimalType, FloatType, DateType, StructType, StructField
|
|
14
|
+
except ImportError:
|
|
15
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build_spark_schema(test_data: ModelObjectTestData) -> StructType:
|
|
19
|
+
""" """
|
|
20
|
+
if test_data.schema is not None:
|
|
21
|
+
return StructType(fields=[
|
|
22
|
+
StructField(sf['model_name'], spark_type_for_simple_type(sf['simple_type']), True) for sf in test_data.schema
|
|
23
|
+
])
|
|
24
|
+
return StructType(fields=[
|
|
25
|
+
StructField(n, spark_type_for_python_value(v), True) for n, v in test_data.rows[0].items()
|
|
26
|
+
])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_dataframe(spark: SparkSession, test_data: ModelObjectTestData) -> DataFrame:
|
|
30
|
+
""" """
|
|
31
|
+
return spark.createDataFrame(test_data.rows, schema=build_spark_schema(test_data))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_model_object(test_data: ModelObjectTestData):
|
|
35
|
+
""" """
|
|
36
|
+
if test_data.schema is not None:
|
|
37
|
+
fields = [
|
|
38
|
+
ModelObjectField(name=f['model_name'], model_name=f['model_name'], simple_type=f['simple_type']) for f in test_data.schema
|
|
39
|
+
]
|
|
40
|
+
else:
|
|
41
|
+
fields = [
|
|
42
|
+
ModelObjectField(name=n, model_name=n, simple_type=simple_type_for_python_value(v)) for n, v in test_data.rows[0].items()
|
|
43
|
+
]
|
|
44
|
+
return ModelObject(
|
|
45
|
+
name=test_data.model_name,
|
|
46
|
+
model_name=test_data.model_name,
|
|
47
|
+
type='table',
|
|
48
|
+
sql_text=None,
|
|
49
|
+
fields=fields
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def spark_type_for_simple_type(simple_type: str) -> DataType:
|
|
56
|
+
"""
|
|
57
|
+
"""
|
|
58
|
+
if simple_type == 'number':
|
|
59
|
+
return LongType()
|
|
60
|
+
elif simple_type == 'float':
|
|
61
|
+
return DoubleType()
|
|
62
|
+
elif simple_type == 'date':
|
|
63
|
+
return TimestampType()
|
|
64
|
+
elif simple_type == 'bool':
|
|
65
|
+
return BooleanType()
|
|
66
|
+
else:
|
|
67
|
+
return StringType()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def spark_type_for_python_value(value: Any) -> DataType:
|
|
71
|
+
"""
|
|
72
|
+
Returns Spark type
|
|
73
|
+
"""
|
|
74
|
+
if isinstance(value, datetime.date):
|
|
75
|
+
return DateType()
|
|
76
|
+
if isinstance(value, datetime.datetime):
|
|
77
|
+
return TimestampType()
|
|
78
|
+
if isinstance(value, int):
|
|
79
|
+
return LongType()
|
|
80
|
+
if isinstance(value, float):
|
|
81
|
+
return DoubleType()
|
|
82
|
+
if isinstance(value, bool):
|
|
83
|
+
return BooleanType()
|
|
84
|
+
return StringType()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def simple_type_for_python_value(value: Any) -> str:
|
|
88
|
+
""" """
|
|
89
|
+
if isinstance(value, datetime.date):
|
|
90
|
+
return 'date'
|
|
91
|
+
if isinstance(value, datetime.datetime):
|
|
92
|
+
return 'date'
|
|
93
|
+
if isinstance(value, int):
|
|
94
|
+
return 'number'
|
|
95
|
+
if isinstance(value, float):
|
|
96
|
+
return 'float'
|
|
97
|
+
if isinstance(value, bool):
|
|
98
|
+
return 'bool'
|
|
99
|
+
return 'string'
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/session.py
RENAMED
|
@@ -6,10 +6,14 @@ from pathlib import Path
|
|
|
6
6
|
import httpx
|
|
7
7
|
import pandas
|
|
8
8
|
from contextlib import contextmanager
|
|
9
|
+
from urllib.parse import urljoin
|
|
9
10
|
|
|
10
11
|
from aw_client.data_master import get_nearest_api
|
|
11
12
|
from aw_client.domain import APIConfig
|
|
12
13
|
from aw_client.models.model_schema import ModelSchema
|
|
14
|
+
from aw_client.exceptions import AwClientMisconfigured
|
|
15
|
+
|
|
16
|
+
from aw_client.tools import strip_column_name_prefix
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
class Session:
|
|
@@ -22,7 +26,13 @@ class Session:
|
|
|
22
26
|
def __init__(self, token: str = None, aw_url: str = None, version: int = None):
|
|
23
27
|
""" """
|
|
24
28
|
self.token = (token or os.getenv("AW_DATA_TOKEN") or '').strip()
|
|
25
|
-
self.aw_url = aw_url or os.getenv('AW_URL')
|
|
29
|
+
self.aw_url = (aw_url or os.getenv('AW_URL') or '').strip()
|
|
30
|
+
if not self.aw_url:
|
|
31
|
+
raise AwClientMisconfigured(
|
|
32
|
+
'Укажите URL к AnalyticWorkspace: Session(aw_url=\'http://aw.mydomain.ru\') или установите переменную '
|
|
33
|
+
'окружения AW_URL')
|
|
34
|
+
if not self.aw_url.endswith('/'):
|
|
35
|
+
self.aw_url += '/'
|
|
26
36
|
self.version = version if version is not None else None
|
|
27
37
|
|
|
28
38
|
def get_aw_version(self) -> int:
|
|
@@ -41,7 +51,7 @@ class Session:
|
|
|
41
51
|
|
|
42
52
|
return self.version
|
|
43
53
|
|
|
44
|
-
def load_model(self, model_id: int, **options) -> pandas.DataFrame:
|
|
54
|
+
def load_model(self, model_id: int, strip_prefix: bool = False, **options) -> pandas.DataFrame:
|
|
45
55
|
"""
|
|
46
56
|
Загружает данные модели в датафрейм pandas
|
|
47
57
|
"""
|
|
@@ -60,7 +70,11 @@ class Session:
|
|
|
60
70
|
if api is None:
|
|
61
71
|
raise Session.Error(f'Не удалось получить API для версии AW {self.version}')
|
|
62
72
|
|
|
63
|
-
|
|
73
|
+
df = api.load_model(model_id=model_id, **options)
|
|
74
|
+
if strip_prefix:
|
|
75
|
+
df = strip_column_name_prefix(df, inplace=True)
|
|
76
|
+
|
|
77
|
+
return df
|
|
64
78
|
|
|
65
79
|
def model_schema(self, model_id: int) -> ModelSchema:
|
|
66
80
|
""" """
|
|
@@ -114,3 +128,27 @@ class Session:
|
|
|
114
128
|
yield client
|
|
115
129
|
finally:
|
|
116
130
|
client.close()
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def mlflow(self):
|
|
134
|
+
""" """
|
|
135
|
+
try:
|
|
136
|
+
import mlflow
|
|
137
|
+
except ImportError:
|
|
138
|
+
raise AwClientMisconfigured(
|
|
139
|
+
'Для использованиея MLFlow установите библиотеку с опцией ml: pip install analytic-workspace-client[ml]')
|
|
140
|
+
|
|
141
|
+
if not self.token:
|
|
142
|
+
data_master_url = urljoin(self.aw_url, 'data-master/get-token')
|
|
143
|
+
raise AwClientMisconfigured(
|
|
144
|
+
f'Не указан токен доступа к AnalyticWorkspace. Пройдите по адресу {data_master_url} для получения '
|
|
145
|
+
f'токена')
|
|
146
|
+
|
|
147
|
+
tracking_url = urljoin(self.aw_url, 'mlflow')
|
|
148
|
+
|
|
149
|
+
if mlflow.get_tracking_uri() != tracking_url:
|
|
150
|
+
mlflow.set_tracking_uri(tracking_url)
|
|
151
|
+
|
|
152
|
+
os.environ['MLFLOW_TRACKING_TOKEN'] = self.token
|
|
153
|
+
|
|
154
|
+
return mlflow
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@contextmanager
|
|
10
|
+
def get_temp_folder(remove_on_exit: bool = True) -> Path:
|
|
11
|
+
""" """
|
|
12
|
+
temp_folder = tempfile.mkdtemp()
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
yield Path(temp_folder)
|
|
16
|
+
finally:
|
|
17
|
+
if remove_on_exit:
|
|
18
|
+
shutil.rmtree(temp_folder, ignore_errors=True)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def strip_column_name_prefix(df: pd.DataFrame, inplace: bool = False) -> pd.DataFrame:
|
|
22
|
+
"""
|
|
23
|
+
Вырезает общий префикс у столбцов датафрейма
|
|
24
|
+
"""
|
|
25
|
+
column_renames = {}
|
|
26
|
+
for column_name in df.columns:
|
|
27
|
+
if '__' in column_name:
|
|
28
|
+
_, column_renames[column_name] = column_name.rsplit('__', 1)
|
|
29
|
+
|
|
30
|
+
if column_renames:
|
|
31
|
+
if inplace:
|
|
32
|
+
df.rename(columns=column_renames, inplace=inplace)
|
|
33
|
+
else:
|
|
34
|
+
df.rename(columns=column_renames)
|
|
35
|
+
|
|
36
|
+
return df
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CompiledModule(Protocol):
|
|
5
|
+
""" """
|
|
6
|
+
def __contains__(self, item):
|
|
7
|
+
""" """
|
|
8
|
+
|
|
9
|
+
def __getattr__(self, item):
|
|
10
|
+
""" """
|
|
11
|
+
|
|
12
|
+
def __getitem__(self, item):
|
|
13
|
+
""" """
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def is_empty(self) -> bool:
|
|
17
|
+
""" """
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Protocol, Optional
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from pyspark.sql import SparkSession
|
|
5
|
+
except ImportError:
|
|
6
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from aw_etl.models import Vault
|
|
10
|
+
from aw_etl.compiler import CompiledModule
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidEtlBlock(Exception):
|
|
14
|
+
""" """
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ETLBlockApplication(Protocol):
|
|
18
|
+
""" """
|
|
19
|
+
@property
|
|
20
|
+
def spark(self) -> SparkSession:
|
|
21
|
+
""" """
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def is_spark_initialized(self) -> bool:
|
|
25
|
+
""" """
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def model_module(self) -> Optional[CompiledModule]:
|
|
29
|
+
""" """
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def vault(self) -> Vault:
|
|
33
|
+
""" """
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def run_mode(self) -> str:
|
|
37
|
+
""" """
|
|
38
|
+
|
|
39
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Protocol, List, Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ModelObjectField(Protocol):
|
|
5
|
+
""" """
|
|
6
|
+
@property
|
|
7
|
+
def simple_type(self) -> str:
|
|
8
|
+
""" """
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelObject(Protocol):
|
|
12
|
+
""" """
|
|
13
|
+
@property
|
|
14
|
+
def fields(self) -> List[ModelObjectField]:
|
|
15
|
+
""" """
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Vault(Protocol):
|
|
19
|
+
""" """
|
|
20
|
+
def get(self, name: str) -> Any:
|
|
21
|
+
""" """
|
|
22
|
+
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import tempfile
|
|
2
|
-
import shutil
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from contextlib import contextmanager
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@contextmanager
|
|
8
|
-
def get_temp_folder(remove_on_exit: bool = True) -> Path:
|
|
9
|
-
""" """
|
|
10
|
-
temp_folder = tempfile.mkdtemp()
|
|
11
|
-
|
|
12
|
-
try:
|
|
13
|
-
yield Path(temp_folder)
|
|
14
|
-
finally:
|
|
15
|
-
if remove_on_exit:
|
|
16
|
-
shutil.rmtree(temp_folder, ignore_errors=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/v0.py
RENAMED
|
File without changes
|
{analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/v1.py
RENAMED
|
File without changes
|
{analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/data_master/v2.py
RENAMED
|
File without changes
|
{analytic_workspace_client-1.1.1 → analytic_workspace_client-1.29.0}/src/aw_client/domain.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|