analytic-workspace-client 1.2.0__tar.gz → 1.29.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analytic_workspace_client-1.29.0rc1/PKG-INFO +41 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/setup.py +3 -3
- analytic_workspace_client-1.29.0rc1/src/analytic_workspace_client.egg-info/PKG-INFO +41 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/analytic_workspace_client.egg-info/SOURCES.txt +17 -1
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/analytic_workspace_client.egg-info/requires.txt +5 -4
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/analytic_workspace_client.egg-info/top_level.txt +1 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/core/bundle.py +39 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/core/compiler.py +165 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/core/model_vault.py +14 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/core/spark.py +15 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/__init__.py +4 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/application.py +49 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/dto.py +85 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/runtime.py +202 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/test_data.py +17 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/tools.py +99 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/model_dev/__init__.py +0 -0
- analytic_workspace_client-1.29.0rc1/src/aw_client/models/__init__.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/session.py +1 -1
- analytic_workspace_client-1.29.0rc1/src/aw_etl/__init__.py +0 -0
- analytic_workspace_client-1.29.0rc1/src/aw_etl/compiler.py +17 -0
- analytic_workspace_client-1.29.0rc1/src/aw_etl/etl_blocks.py +39 -0
- analytic_workspace_client-1.29.0rc1/src/aw_etl/models.py +22 -0
- analytic_workspace_client-1.2.0/PKG-INFO +0 -42
- analytic_workspace_client-1.2.0/src/analytic_workspace_client.egg-info/PKG-INFO +0 -42
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/README.md +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/pyproject.toml +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/setup.cfg +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/analytic_workspace_client.egg-info/dependency_links.txt +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/__init__.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/cache.py +0 -0
- {analytic_workspace_client-1.2.0/src/aw_client/model_dev → analytic_workspace_client-1.29.0rc1/src/aw_client/core}/__init__.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/data_master/__init__.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/data_master/base.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/data_master/v0.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/data_master/v1.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/data_master/v2.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/domain.py +0 -0
- /analytic_workspace_client-1.2.0/src/aw_client/models/__init__.py → /analytic_workspace_client-1.29.0rc1/src/aw_client/etl_blocks/services.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/exceptions.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/model_dev/application.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/model_dev/cache.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/model_dev/runner.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/model_dev/virtual_objects.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/models/model_schema.py +0 -0
- {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/tools.py +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: analytic_workspace_client
|
|
3
|
+
Version: 1.29.0rc1
|
|
4
|
+
Summary: Библиотека для подключения к Analytic Workspace
|
|
5
|
+
Home-page: https://analyticworkspace.ru/
|
|
6
|
+
Author: Analytic Workspace
|
|
7
|
+
Author-email: aw_help@analyticworkspace.ru
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8,<4
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Provides-Extra: ml
|
|
15
|
+
|
|
16
|
+
# Библиотека для Analytic Workspace
|
|
17
|
+
|
|
18
|
+
## Получение токена
|
|
19
|
+
|
|
20
|
+
Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
|
|
21
|
+
|
|
22
|
+
Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
|
|
23
|
+
|
|
24
|
+
## Пример использования
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from aw_client import Session
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
with open('aw_token', 'rt') as f:
|
|
31
|
+
aw_token = f.read()
|
|
32
|
+
|
|
33
|
+
session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
34
|
+
|
|
35
|
+
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
36
|
+
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
37
|
+
|
|
38
|
+
df = session.load() # df: pandas.DataFrame
|
|
39
|
+
|
|
40
|
+
display(df)
|
|
41
|
+
```
|
|
@@ -10,7 +10,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name='analytic_workspace_client',
|
|
13
|
-
version='1.
|
|
13
|
+
version='1.29.0rc1',
|
|
14
14
|
|
|
15
15
|
description='Библиотека для подключения к Analytic Workspace',
|
|
16
16
|
long_description=long_description,
|
|
@@ -41,8 +41,8 @@ setup(
|
|
|
41
41
|
],
|
|
42
42
|
|
|
43
43
|
extras_require={
|
|
44
|
-
'dev': ['pyspark==3.
|
|
45
|
-
'ml': ['mlflow>=2.
|
|
44
|
+
'dev': ['pyspark==3.5.0', 'pytest>=8.2,<8.3'],
|
|
45
|
+
'ml': ['mlflow>=2.14,<2.15']
|
|
46
46
|
},
|
|
47
47
|
|
|
48
48
|
setup_requires=['wheel'],
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: analytic-workspace-client
|
|
3
|
+
Version: 1.29.0rc1
|
|
4
|
+
Summary: Библиотека для подключения к Analytic Workspace
|
|
5
|
+
Home-page: https://analyticworkspace.ru/
|
|
6
|
+
Author: Analytic Workspace
|
|
7
|
+
Author-email: aw_help@analyticworkspace.ru
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8,<4
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Provides-Extra: ml
|
|
15
|
+
|
|
16
|
+
# Библиотека для Analytic Workspace
|
|
17
|
+
|
|
18
|
+
## Получение токена
|
|
19
|
+
|
|
20
|
+
Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
|
|
21
|
+
|
|
22
|
+
Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
|
|
23
|
+
|
|
24
|
+
## Пример использования
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from aw_client import Session
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
with open('aw_token', 'rt') as f:
|
|
31
|
+
aw_token = f.read()
|
|
32
|
+
|
|
33
|
+
session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
34
|
+
|
|
35
|
+
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
36
|
+
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
37
|
+
|
|
38
|
+
df = session.load() # df: pandas.DataFrame
|
|
39
|
+
|
|
40
|
+
display(df)
|
|
41
|
+
```
|
|
@@ -12,15 +12,31 @@ src/aw_client/domain.py
|
|
|
12
12
|
src/aw_client/exceptions.py
|
|
13
13
|
src/aw_client/session.py
|
|
14
14
|
src/aw_client/tools.py
|
|
15
|
+
src/aw_client/core/__init__.py
|
|
16
|
+
src/aw_client/core/bundle.py
|
|
17
|
+
src/aw_client/core/compiler.py
|
|
18
|
+
src/aw_client/core/model_vault.py
|
|
19
|
+
src/aw_client/core/spark.py
|
|
15
20
|
src/aw_client/data_master/__init__.py
|
|
16
21
|
src/aw_client/data_master/base.py
|
|
17
22
|
src/aw_client/data_master/v0.py
|
|
18
23
|
src/aw_client/data_master/v1.py
|
|
19
24
|
src/aw_client/data_master/v2.py
|
|
25
|
+
src/aw_client/etl_blocks/__init__.py
|
|
26
|
+
src/aw_client/etl_blocks/application.py
|
|
27
|
+
src/aw_client/etl_blocks/dto.py
|
|
28
|
+
src/aw_client/etl_blocks/runtime.py
|
|
29
|
+
src/aw_client/etl_blocks/services.py
|
|
30
|
+
src/aw_client/etl_blocks/test_data.py
|
|
31
|
+
src/aw_client/etl_blocks/tools.py
|
|
20
32
|
src/aw_client/model_dev/__init__.py
|
|
21
33
|
src/aw_client/model_dev/application.py
|
|
22
34
|
src/aw_client/model_dev/cache.py
|
|
23
35
|
src/aw_client/model_dev/runner.py
|
|
24
36
|
src/aw_client/model_dev/virtual_objects.py
|
|
25
37
|
src/aw_client/models/__init__.py
|
|
26
|
-
src/aw_client/models/model_schema.py
|
|
38
|
+
src/aw_client/models/model_schema.py
|
|
39
|
+
src/aw_etl/__init__.py
|
|
40
|
+
src/aw_etl/compiler.py
|
|
41
|
+
src/aw_etl/etl_blocks.py
|
|
42
|
+
src/aw_etl/models.py
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
python-dotenv<1.1,>=1.0
|
|
2
2
|
httpx<1.0,>=0.25
|
|
3
3
|
pandas
|
|
4
4
|
pydantic<2.0,>=1.10
|
|
5
|
-
|
|
5
|
+
colorama<0.5,>=0.4
|
|
6
6
|
|
|
7
7
|
[dev]
|
|
8
|
-
pyspark==3.
|
|
8
|
+
pyspark==3.5.0
|
|
9
|
+
pytest<8.3,>=8.2
|
|
9
10
|
|
|
10
11
|
[ml]
|
|
11
|
-
mlflow<2.
|
|
12
|
+
mlflow<2.15,>=2.14
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import List, Dict, Union, Optional, Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NamedObjectsBundle:
|
|
5
|
+
""" """
|
|
6
|
+
def __init__(self, objs: Dict[str, Any]):
|
|
7
|
+
self._obj_list: List[Any] = [df for _, df in objs.items()]
|
|
8
|
+
self._obj_named: Dict[str, Any] = objs
|
|
9
|
+
|
|
10
|
+
def first(self) -> Optional[Any]:
|
|
11
|
+
""" """
|
|
12
|
+
return self._obj_list[0] if self._obj_list else None
|
|
13
|
+
|
|
14
|
+
def as_list(self) -> List[Any]:
|
|
15
|
+
return self._obj_list
|
|
16
|
+
|
|
17
|
+
def as_named(self) -> Dict[str, Any]:
|
|
18
|
+
return self._obj_named
|
|
19
|
+
|
|
20
|
+
def __getitem__(self, item):
|
|
21
|
+
if isinstance(item, int):
|
|
22
|
+
# если item указано целым числом, то возвращаем по индексу
|
|
23
|
+
return self._obj_list[item]
|
|
24
|
+
|
|
25
|
+
if not item in self._obj_named:
|
|
26
|
+
raise Exception(f'Данные объекта с именем "{item}" не найдена')
|
|
27
|
+
return self._obj_named[item]
|
|
28
|
+
|
|
29
|
+
def __iter__(self):
|
|
30
|
+
return iter(self._obj_named.values())
|
|
31
|
+
|
|
32
|
+
def __bool__(self):
|
|
33
|
+
return len(self._obj_list) > 0
|
|
34
|
+
|
|
35
|
+
def __len__(self):
|
|
36
|
+
return len(self._obj_list)
|
|
37
|
+
|
|
38
|
+
def items(self):
|
|
39
|
+
return self._obj_named.items()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from typing import List, Tuple, Optional
|
|
2
|
+
|
|
3
|
+
import builtins
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CompiledModule:
|
|
7
|
+
""" """
|
|
8
|
+
def __init__(self, compiled_globals: dict):
|
|
9
|
+
self.compiled_globals = compiled_globals
|
|
10
|
+
|
|
11
|
+
def __contains__(self, item):
|
|
12
|
+
return item in self.compiled_globals
|
|
13
|
+
|
|
14
|
+
def __getattr__(self, item):
|
|
15
|
+
return self.compiled_globals[item]
|
|
16
|
+
|
|
17
|
+
def __getitem__(self, item):
|
|
18
|
+
return self.compiled_globals[item]
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def is_empty(self) -> bool:
|
|
22
|
+
""" Возвращает True, если в модуле нет кода (только одни комментарии, например) """
|
|
23
|
+
return len(set(self.compiled_globals) - {'__builtins__'}) == 0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ScriptCompiler:
|
|
27
|
+
""" """
|
|
28
|
+
|
|
29
|
+
class CompilerException(Exception):
|
|
30
|
+
""" """
|
|
31
|
+
|
|
32
|
+
# Список исключений
|
|
33
|
+
class Misconfigured(CompilerException):
|
|
34
|
+
""" """
|
|
35
|
+
|
|
36
|
+
class ForbiddenImport(CompilerException):
|
|
37
|
+
""" Исключение, которое возникает при наличии некорректных импортов в коде """
|
|
38
|
+
|
|
39
|
+
class CannotCompile(CompilerException):
|
|
40
|
+
""" Исключение, которое вызывается при невозможности компиляции кода """
|
|
41
|
+
|
|
42
|
+
# Перечисление режимов работы компилятора
|
|
43
|
+
MODE_ETL = 1
|
|
44
|
+
MODE_VIRTUAL_OBJECT_SCHEMA = 2
|
|
45
|
+
MODE_ETL_BLOCK = 3
|
|
46
|
+
|
|
47
|
+
def __init__(self, mode: Optional[int] = None):
|
|
48
|
+
self.mode = mode
|
|
49
|
+
|
|
50
|
+
def compile(self, source_code: str, mode: int = None) -> CompiledModule:
|
|
51
|
+
""" """
|
|
52
|
+
try:
|
|
53
|
+
byte_code = compile(source_code, filename='<string>', mode='exec')
|
|
54
|
+
except SyntaxError as e:
|
|
55
|
+
raise ScriptCompiler.CannotCompile(f'Ошибка компиляции: {e.msg} (line {e.lineno}): {e.text}')
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
compiled_globals = self._get_globals(mode=mode if mode is not None else self.mode)
|
|
59
|
+
exec(byte_code, compiled_globals)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
raise ScriptCompiler.CompilerException(f'Ошибка компиляции: {e}')
|
|
62
|
+
|
|
63
|
+
return CompiledModule(compiled_globals=compiled_globals)
|
|
64
|
+
|
|
65
|
+
def _get_globals(self, mode: int) -> dict:
|
|
66
|
+
""" """
|
|
67
|
+
if mode == ScriptCompiler.MODE_ETL:
|
|
68
|
+
safe_names, safe_modules = self._safe_globals_for_model_etl_script()
|
|
69
|
+
elif mode == ScriptCompiler.MODE_ETL_BLOCK:
|
|
70
|
+
safe_names, safe_modules = self._safe_globals_for_etl_block()
|
|
71
|
+
elif mode == ScriptCompiler.MODE_VIRTUAL_OBJECT_SCHEMA:
|
|
72
|
+
safe_names, safe_modules = self._safe_globals_for_virtual_object_schema_script()
|
|
73
|
+
else:
|
|
74
|
+
raise ScriptCompiler.Misconfigured(f'Указан некорректный режим компиляции {self.mode}')
|
|
75
|
+
|
|
76
|
+
def safe_import(name, globals=None, locals=None, fromlist=(), level=0):
|
|
77
|
+
""" """
|
|
78
|
+
modules = name.split('.')
|
|
79
|
+
if modules[0] not in safe_modules:
|
|
80
|
+
raise ScriptCompiler.ForbiddenImport(f'Импорт модуля {modules[0]} запрещен')
|
|
81
|
+
|
|
82
|
+
return __import__(name, globals, locals, fromlist, level)
|
|
83
|
+
|
|
84
|
+
safe_builtins = {}
|
|
85
|
+
for name in safe_names:
|
|
86
|
+
safe_builtins[name] = getattr(builtins, name)
|
|
87
|
+
|
|
88
|
+
safe_builtins['__import__'] = safe_import
|
|
89
|
+
|
|
90
|
+
return {'__builtins__': safe_builtins}
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _safe_globals_for_virtual_object_schema_script() -> Tuple[List[str], List[str]]:
|
|
94
|
+
""" """
|
|
95
|
+
safe_names = [
|
|
96
|
+
'None', 'False', 'True', 'bool', 'bytes', 'chr', 'complex', 'float',
|
|
97
|
+
'hex', 'id', 'int', 'str', 'getattr', 'setattr', 'delattr',
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
safe_modules = ['pyspark', 'aw_etl']
|
|
101
|
+
|
|
102
|
+
return safe_names, safe_modules
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _safe_globals_full() -> Tuple[List[str], List[str]]:
|
|
106
|
+
""" Возвращает полный набор разрешенных модулей """
|
|
107
|
+
safe_names = [
|
|
108
|
+
'None', 'False', 'True',
|
|
109
|
+
'abs', 'all', 'any', 'ascii',
|
|
110
|
+
'bin', 'bool', 'bytes', 'bytearray',
|
|
111
|
+
'callable', 'chr', 'classmethod', 'complex',
|
|
112
|
+
'delattr', 'dict', 'divmod',
|
|
113
|
+
'enumerate',
|
|
114
|
+
'float', 'filter', 'format', 'frozenset',
|
|
115
|
+
'getattr',
|
|
116
|
+
'hasattr', 'hash', 'hex',
|
|
117
|
+
'id', 'int', 'isinstance', 'issubclass', 'iter',
|
|
118
|
+
'len', 'list',
|
|
119
|
+
'map', 'max', 'min',
|
|
120
|
+
'next',
|
|
121
|
+
'object', 'oct', 'ord',
|
|
122
|
+
'pow', 'print', 'property',
|
|
123
|
+
'range', 'repr', 'reversed', 'round',
|
|
124
|
+
'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
|
|
125
|
+
'type', 'tuple',
|
|
126
|
+
'vars',
|
|
127
|
+
'zip',
|
|
128
|
+
'__build_class__', '__name__',
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'BufferError', 'BytesWarning',
|
|
132
|
+
'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'FutureWarning',
|
|
133
|
+
'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
|
|
134
|
+
'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError',
|
|
135
|
+
'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning',
|
|
136
|
+
'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError',
|
|
137
|
+
'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
|
|
138
|
+
'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning', 'ZeroDivisionError',
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
safe_modules = [
|
|
142
|
+
'pyspark', 'requests', 'pandas', 'numpy', 'aw_etl', 'pyparsing', 'pydantic',
|
|
143
|
+
|
|
144
|
+
'mlflow', 'prophet', 'statmodels', 'torch', 'sklearn', 'numpy', 'catboost',
|
|
145
|
+
|
|
146
|
+
'array', 'calendar', 'codecs', 'collections', 'copy', 'csv', 'dataclasses', 'datetime', '_strptime',
|
|
147
|
+
'decimal', 'enum', 'functools', 'hashlib', 'itertools', 'json', 'math', 'queue', 'random', 're',
|
|
148
|
+
'statistics', 'string', 'time', 'urllib', 'xml',
|
|
149
|
+
'zoneinfo', 'typing', 'uuid', 'logging',
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
return safe_names, safe_modules
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def _safe_globals_for_model_etl_script() -> Tuple[List[str], List[str]]:
|
|
156
|
+
""" """
|
|
157
|
+
return ScriptCompiler._safe_globals_full()
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def _safe_globals_for_etl_block() -> Tuple[List[str], List[str]]:
|
|
161
|
+
""" """
|
|
162
|
+
safe_names, safe_modules = ScriptCompiler._safe_globals_full()
|
|
163
|
+
safe_modules.extend(['sqlglot', 'inspect'])
|
|
164
|
+
|
|
165
|
+
return safe_names, safe_modules
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from typing import Optional, Dict, Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Vault:
|
|
5
|
+
""" """
|
|
6
|
+
def __init__(self, values: Optional[dict] = None):
|
|
7
|
+
self._values = values or {}
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def values(self) -> dict:
|
|
11
|
+
return self._values
|
|
12
|
+
|
|
13
|
+
def get(self, name: str) -> Optional[Any]:
|
|
14
|
+
return self._values.get(name, None)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
|
|
2
|
+
try:
|
|
3
|
+
from pyspark.sql import SparkSession
|
|
4
|
+
except ImportError:
|
|
5
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_spark_session():
|
|
9
|
+
""" """
|
|
10
|
+
return SparkSession.builder \
|
|
11
|
+
.master('local[*]') \
|
|
12
|
+
.config('spark.driver.host', '127.0.0.1') \
|
|
13
|
+
.config('spark.ui.enabled', 'false') \
|
|
14
|
+
.getOrCreate()
|
|
15
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from typing import Optional, Callable
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from pyspark.sql import SparkSession
|
|
5
|
+
except ImportError:
|
|
6
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
7
|
+
|
|
8
|
+
from aw_client.core.model_vault import Vault
|
|
9
|
+
from aw_client.core.compiler import CompiledModule
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ETLBlockApplication:
|
|
13
|
+
""" """
|
|
14
|
+
def __init__(self,
|
|
15
|
+
spark_builder: Callable,
|
|
16
|
+
run_mode: str,
|
|
17
|
+
vault: Vault,
|
|
18
|
+
model_module: Optional[CompiledModule] = None):
|
|
19
|
+
self._spark_builder = spark_builder
|
|
20
|
+
self._spark = None
|
|
21
|
+
self._run_model = run_mode
|
|
22
|
+
self._model_module = model_module
|
|
23
|
+
self._vault = vault
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def spark(self) -> SparkSession:
|
|
27
|
+
if self._spark is None:
|
|
28
|
+
self._spark = self._spark_builder()
|
|
29
|
+
return self._spark
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def is_spark_initialized(self) -> bool:
|
|
33
|
+
return self._spark is not None
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def model_module(self) -> Optional[CompiledModule]:
|
|
37
|
+
""" """
|
|
38
|
+
return self._model_module
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def vault(self) -> Vault:
|
|
42
|
+
""" """
|
|
43
|
+
return self._vault
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def run_mode(self) -> str:
|
|
47
|
+
""" """
|
|
48
|
+
return self.run_mode
|
|
49
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from typing import Optional, Any, List, Union
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ETLBlockParamType(str, Enum):
|
|
10
|
+
""" """
|
|
11
|
+
STRING = 'string'
|
|
12
|
+
TEXT = 'text'
|
|
13
|
+
PASSWORD = 'password'
|
|
14
|
+
SQL_TEXT = 'sql_text'
|
|
15
|
+
NUMBER = 'number'
|
|
16
|
+
FLOAT = 'float'
|
|
17
|
+
BOOL = 'bool'
|
|
18
|
+
DATE = 'date'
|
|
19
|
+
DATETIME = 'datetime'
|
|
20
|
+
SELECT = 'select'
|
|
21
|
+
ACTION = 'action'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ETLBlockParamGroupType(str, Enum):
|
|
25
|
+
""" """
|
|
26
|
+
GROUP = 'group'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ETLBlockParamActionType(str, Enum):
|
|
30
|
+
""" """
|
|
31
|
+
ACTION = 'action'
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
35
|
+
# Метаданные ETL блока
|
|
36
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
37
|
+
class ETLBlockParam(BaseModel):
|
|
38
|
+
""" Параметр ETL блока """
|
|
39
|
+
code: str
|
|
40
|
+
name: str
|
|
41
|
+
type: ETLBlockParamType
|
|
42
|
+
description: Optional[str] = None
|
|
43
|
+
|
|
44
|
+
required: bool
|
|
45
|
+
mult: bool
|
|
46
|
+
domain: Optional[Any] = None
|
|
47
|
+
extra: Optional[Any] = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ETLBlockParamGroup(BaseModel):
|
|
51
|
+
""" Группа параметров ETL блока """
|
|
52
|
+
code: str
|
|
53
|
+
name: str
|
|
54
|
+
|
|
55
|
+
type: ETLBlockParamGroupType
|
|
56
|
+
description: Optional[str] = None
|
|
57
|
+
view_options: Optional[dict] = None
|
|
58
|
+
mult: bool
|
|
59
|
+
params: List[Union['ETLBlockParam', 'ETLBlockParamAction', 'ETLBlockParamAction']] = []
|
|
60
|
+
extra: Optional[Any] = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ETLBlockParamAction(BaseModel):
|
|
64
|
+
code: str
|
|
65
|
+
name: str
|
|
66
|
+
type: ETLBlockParamActionType
|
|
67
|
+
description: Optional[str] = None
|
|
68
|
+
action: str
|
|
69
|
+
extra: Optional[Any] = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class ETLBlockMeta(BaseModel):
|
|
73
|
+
""" Метаданные ETL-блока """
|
|
74
|
+
uid: str # уникальный идентификатор блока
|
|
75
|
+
name: str # название блока
|
|
76
|
+
version: str # версия блока
|
|
77
|
+
description: str # описание блока
|
|
78
|
+
author: str # автор блока
|
|
79
|
+
updated_at: datetime.datetime # дата и время последнего обновления
|
|
80
|
+
params: List[Union[ETLBlockParam, ETLBlockParamAction, ETLBlockParamGroup]]
|
|
81
|
+
engine_requires: Optional[List[str]] = []
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def verbose_name(self):
|
|
85
|
+
return f'{self.name} v{self.version}'
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from typing import Dict, Optional, Any, Union, List, Literal
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
import datetime
|
|
5
|
+
import inspect
|
|
6
|
+
|
|
7
|
+
from aw_client.core.compiler import ScriptCompiler
|
|
8
|
+
from aw_client.core.model_vault import Vault
|
|
9
|
+
from aw_client.core.spark import build_spark_session
|
|
10
|
+
from aw_client.core.bundle import NamedObjectsBundle
|
|
11
|
+
from aw_client.models.model_schema import ModelObject, ModelObjectField
|
|
12
|
+
from .application import ETLBlockApplication
|
|
13
|
+
from .test_data import ModelObjectTestData
|
|
14
|
+
from .tools import build_dataframe, build_model_object, build_spark_schema
|
|
15
|
+
from .dto import ETLBlockMeta
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from pyspark.sql import SparkSession, DataFrame
|
|
20
|
+
from pyspark.sql.types import DataType, StringType, DoubleType, TimestampType, LongType, BooleanType, \
|
|
21
|
+
ByteType, ShortType, IntegerType, DecimalType, FloatType, DateType, StructType, StructField
|
|
22
|
+
except ImportError:
|
|
23
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_etl_block_meta(block_path: Path) -> ETLBlockMeta:
|
|
28
|
+
""" """
|
|
29
|
+
block_meta_path = block_path / 'block_meta.json' if block_path.is_dir() else block_path
|
|
30
|
+
|
|
31
|
+
if not block_meta_path.exists():
|
|
32
|
+
raise Exception(f'Файл с метаданным блока не найден: {block_meta_path}')
|
|
33
|
+
|
|
34
|
+
with open(block_meta_path, 'rt') as f:
|
|
35
|
+
return ETLBlockMeta.parse_raw(f.read())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_etl_block_schema(block_path: Path,
|
|
39
|
+
test_data: Union[ModelObjectTestData, List[ModelObjectTestData]],
|
|
40
|
+
params: Optional[Dict] = None,
|
|
41
|
+
run_mode: Optional[Literal['']] = None,
|
|
42
|
+
vault: Optional[Vault] = None,
|
|
43
|
+
model_script_code: Optional[str] = None) -> StructType:
|
|
44
|
+
"""
|
|
45
|
+
Args:
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
block_code_path = block_path / 'block_code.py' if block_path.is_dir() else block_path
|
|
49
|
+
|
|
50
|
+
if not block_code_path.exists():
|
|
51
|
+
raise Exception(f'Файл с исходным кодом блока не найден: {block_code_path}')
|
|
52
|
+
|
|
53
|
+
with open(block_code_path, 'rt') as f:
|
|
54
|
+
block_code = f.read()
|
|
55
|
+
|
|
56
|
+
# Компиляция кода блока
|
|
57
|
+
try:
|
|
58
|
+
block_module = ScriptCompiler().compile(source_code=block_code, mode=ScriptCompiler.MODE_ETL_BLOCK)
|
|
59
|
+
except ScriptCompiler.CannotCompile as e:
|
|
60
|
+
raise Exception(f'Ошибка компиляции исходного кода блока: {e}')
|
|
61
|
+
|
|
62
|
+
# Компиляция кода модели
|
|
63
|
+
if model_script_code:
|
|
64
|
+
try:
|
|
65
|
+
model_module = ScriptCompiler().compile(source_code=model_script_code, mode=ScriptCompiler.MODE_ETL)
|
|
66
|
+
except ScriptCompiler.CannotCompile as e:
|
|
67
|
+
raise Exception(f'Ошибка компиляции исходного кода скрипта модели: {e}')
|
|
68
|
+
else:
|
|
69
|
+
model_module = None
|
|
70
|
+
|
|
71
|
+
spark = build_spark_session()
|
|
72
|
+
|
|
73
|
+
# Дочерние датафреймы
|
|
74
|
+
dataframes = OrderedDict()
|
|
75
|
+
for td in (test_data if isinstance(test_data, list) else [test_data]):
|
|
76
|
+
df = build_dataframe(spark, td)
|
|
77
|
+
if not dataframes:
|
|
78
|
+
dataframes['child'] = df
|
|
79
|
+
|
|
80
|
+
upstream_dataframes = NamedObjectsBundle(dataframes)
|
|
81
|
+
|
|
82
|
+
# Дочерние схемы
|
|
83
|
+
schemas = OrderedDict()
|
|
84
|
+
for td in (test_data if isinstance(test_data, list) else [test_data]):
|
|
85
|
+
schema = build_spark_schema(td)
|
|
86
|
+
if not schemas:
|
|
87
|
+
schemas['child'] = schema
|
|
88
|
+
schemas[td.model_name] = schema
|
|
89
|
+
|
|
90
|
+
upstream_schemas = NamedObjectsBundle(schemas)
|
|
91
|
+
|
|
92
|
+
block_schema_parameters = inspect.signature(block_module['block_schema']).parameters
|
|
93
|
+
|
|
94
|
+
app = ETLBlockApplication(
|
|
95
|
+
spark_builder=build_spark_session,
|
|
96
|
+
run_mode=run_mode or 'full',
|
|
97
|
+
vault=vault or Vault(),
|
|
98
|
+
model_module=model_module
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Определение параметров для передачи
|
|
102
|
+
block_schema_kwargs = {}
|
|
103
|
+
if 'params' in block_schema_parameters:
|
|
104
|
+
block_schema_kwargs['params'] = params
|
|
105
|
+
if 'app' in block_schema_parameters:
|
|
106
|
+
block_schema_kwargs['app'] = app
|
|
107
|
+
if 'model_object' in block_schema_parameters:
|
|
108
|
+
block_schema_kwargs['model_object'] = build_model_object(test_data)
|
|
109
|
+
if 'schema' in block_schema_parameters:
|
|
110
|
+
block_schema_kwargs['schema'] = upstream_schemas.first()
|
|
111
|
+
if 'schemas' in block_schema_parameters:
|
|
112
|
+
block_schema_kwargs['schemas'] = upstream_schemas
|
|
113
|
+
if 'upstream_schema' in block_schema_parameters:
|
|
114
|
+
block_schema_kwargs['upstream_schema'] = upstream_schemas.first()
|
|
115
|
+
if 'upstream_schemas' in block_schema_parameters:
|
|
116
|
+
block_schema_kwargs['upstream_schemas'] = upstream_schemas
|
|
117
|
+
if 'df' in block_schema_parameters:
|
|
118
|
+
block_schema_kwargs['df'] = upstream_dataframes.first()
|
|
119
|
+
if 'dfs' in block_schema_parameters:
|
|
120
|
+
block_schema_kwargs['dfs'] = upstream_dataframes
|
|
121
|
+
if 'upstream_dataframe' in block_schema_parameters:
|
|
122
|
+
block_schema_kwargs['upstream_dataframe'] = upstream_dataframes.first()
|
|
123
|
+
if 'upstream_dataframes' in block_schema_parameters:
|
|
124
|
+
block_schema_kwargs['upstream_dataframes'] = upstream_dataframes
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
return block_module['block_schema'](**block_schema_kwargs)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_etl_block_data(block_path: Path,
|
|
131
|
+
test_data: Union[ModelObjectTestData, List[ModelObjectTestData]],
|
|
132
|
+
params: Optional[Dict] = None,
|
|
133
|
+
run_mode: Optional[Literal['']] = None,
|
|
134
|
+
vault: Optional[Vault] = None,
|
|
135
|
+
model_script_code: Optional[str] = None) -> StructType:
|
|
136
|
+
"""
|
|
137
|
+
Args:
|
|
138
|
+
|
|
139
|
+
"""
|
|
140
|
+
block_code_path = block_path / 'block_code.py' if block_path.is_dir() else block_path
|
|
141
|
+
|
|
142
|
+
if not block_code_path.exists():
|
|
143
|
+
raise Exception(f'Файл с исходным кодом блока не найден: {block_code_path}')
|
|
144
|
+
|
|
145
|
+
with open(block_code_path, 'rt') as f:
|
|
146
|
+
block_code = f.read()
|
|
147
|
+
|
|
148
|
+
# Компиляция кода блока
|
|
149
|
+
try:
|
|
150
|
+
block_module = ScriptCompiler().compile(source_code=block_code, mode=ScriptCompiler.MODE_ETL_BLOCK)
|
|
151
|
+
except ScriptCompiler.CannotCompile as e:
|
|
152
|
+
raise Exception(f'Ошибка компиляции исходного кода блока: {e}')
|
|
153
|
+
|
|
154
|
+
# Компиляция кода модели
|
|
155
|
+
if model_script_code:
|
|
156
|
+
try:
|
|
157
|
+
model_module = ScriptCompiler().compile(source_code=model_script_code, mode=ScriptCompiler.MODE_ETL)
|
|
158
|
+
except ScriptCompiler.CannotCompile as e:
|
|
159
|
+
raise Exception(f'Ошибка компиляции исходного кода скрипта модели: {e}')
|
|
160
|
+
else:
|
|
161
|
+
model_module = None
|
|
162
|
+
|
|
163
|
+
spark = build_spark_session()
|
|
164
|
+
|
|
165
|
+
# Дочерние датафреймы
|
|
166
|
+
dataframes = OrderedDict()
|
|
167
|
+
for td in (test_data if isinstance(test_data, list) else [test_data]):
|
|
168
|
+
df = build_dataframe(spark, td)
|
|
169
|
+
if not dataframes:
|
|
170
|
+
dataframes['child'] = df
|
|
171
|
+
dataframes[td.model_name] = df
|
|
172
|
+
|
|
173
|
+
upstream_dataframes = NamedObjectsBundle(dataframes)
|
|
174
|
+
|
|
175
|
+
block_data_parameters = inspect.signature(block_module['block_data']).parameters
|
|
176
|
+
|
|
177
|
+
app = ETLBlockApplication(
|
|
178
|
+
spark_builder=build_spark_session,
|
|
179
|
+
run_mode=run_mode or 'full',
|
|
180
|
+
vault=vault or Vault(),
|
|
181
|
+
model_module=model_module
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Определение параметров для передачи
|
|
185
|
+
block_schema_kwargs = {}
|
|
186
|
+
if 'params' in block_data_parameters:
|
|
187
|
+
block_schema_kwargs['params'] = params
|
|
188
|
+
if 'app' in block_data_parameters:
|
|
189
|
+
block_schema_kwargs['app'] = app
|
|
190
|
+
if 'model_object' in block_data_parameters:
|
|
191
|
+
block_schema_kwargs['model_object'] = build_model_object(test_data)
|
|
192
|
+
if 'df' in block_data_parameters:
|
|
193
|
+
block_schema_kwargs['df'] = upstream_dataframes.first()
|
|
194
|
+
if 'dfs' in block_data_parameters:
|
|
195
|
+
block_schema_kwargs['dfs'] = upstream_dataframes
|
|
196
|
+
if 'upstream_dataframe' in block_data_parameters:
|
|
197
|
+
block_schema_kwargs['upstream_dataframe'] = upstream_dataframes.first()
|
|
198
|
+
if 'upstream_dataframes' in block_data_parameters:
|
|
199
|
+
block_schema_kwargs['upstream_dataframes'] = upstream_dataframes
|
|
200
|
+
|
|
201
|
+
return block_module['block_data'](**block_schema_kwargs)
|
|
202
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import List, TypedDict, Optional
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ModelObjectSchemaField(TypedDict):
|
|
7
|
+
""" """
|
|
8
|
+
model_name: str
|
|
9
|
+
simple_type: str
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ModelObjectTestData:
|
|
14
|
+
""" """
|
|
15
|
+
model_name: str
|
|
16
|
+
rows: List[dict] = field(default_factory=list)
|
|
17
|
+
schema: Optional[List[ModelObjectSchemaField]] = None
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from aw_client.models.model_schema import ModelObject, ModelObjectField
|
|
6
|
+
from .test_data import ModelObjectTestData
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from pyspark.sql import SparkSession, DataFrame
|
|
12
|
+
from pyspark.sql.types import DataType, StringType, DoubleType, TimestampType, LongType, BooleanType, \
|
|
13
|
+
ByteType, ShortType, IntegerType, DecimalType, FloatType, DateType, StructType, StructField
|
|
14
|
+
except ImportError:
|
|
15
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build_spark_schema(test_data: ModelObjectTestData) -> StructType:
|
|
19
|
+
""" """
|
|
20
|
+
if test_data.schema is not None:
|
|
21
|
+
return StructType(fields=[
|
|
22
|
+
StructField(sf['model_name'], spark_type_for_simple_type(sf['simple_type']), True) for sf in test_data.schema
|
|
23
|
+
])
|
|
24
|
+
return StructType(fields=[
|
|
25
|
+
StructField(n, spark_type_for_python_value(v), True) for n, v in test_data.rows[0].items()
|
|
26
|
+
])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_dataframe(spark: SparkSession, test_data: ModelObjectTestData) -> DataFrame:
|
|
30
|
+
""" """
|
|
31
|
+
return spark.createDataFrame(test_data.rows, schema=build_spark_schema(test_data))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_model_object(test_data: ModelObjectTestData):
|
|
35
|
+
""" """
|
|
36
|
+
if test_data.schema is not None:
|
|
37
|
+
fields = [
|
|
38
|
+
ModelObjectField(name=f['model_name'], model_name=f['model_name'], simple_type=f['simple_type']) for f in test_data.schema
|
|
39
|
+
]
|
|
40
|
+
else:
|
|
41
|
+
fields = [
|
|
42
|
+
ModelObjectField(name=n, model_name=n, simple_type=simple_type_for_python_value(v)) for n, v in test_data.rows[0].items()
|
|
43
|
+
]
|
|
44
|
+
return ModelObject(
|
|
45
|
+
name=test_data.model_name,
|
|
46
|
+
model_name=test_data.model_name,
|
|
47
|
+
type='table',
|
|
48
|
+
sql_text=None,
|
|
49
|
+
fields=fields
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def spark_type_for_simple_type(simple_type: str) -> DataType:
|
|
56
|
+
"""
|
|
57
|
+
"""
|
|
58
|
+
if simple_type == 'number':
|
|
59
|
+
return LongType()
|
|
60
|
+
elif simple_type == 'float':
|
|
61
|
+
return DoubleType()
|
|
62
|
+
elif simple_type == 'date':
|
|
63
|
+
return TimestampType()
|
|
64
|
+
elif simple_type == 'bool':
|
|
65
|
+
return BooleanType()
|
|
66
|
+
else:
|
|
67
|
+
return StringType()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def spark_type_for_python_value(value: Any) -> DataType:
|
|
71
|
+
"""
|
|
72
|
+
Returns Spark type
|
|
73
|
+
"""
|
|
74
|
+
if isinstance(value, datetime.date):
|
|
75
|
+
return DateType()
|
|
76
|
+
if isinstance(value, datetime.datetime):
|
|
77
|
+
return TimestampType()
|
|
78
|
+
if isinstance(value, int):
|
|
79
|
+
return LongType()
|
|
80
|
+
if isinstance(value, float):
|
|
81
|
+
return DoubleType()
|
|
82
|
+
if isinstance(value, bool):
|
|
83
|
+
return BooleanType()
|
|
84
|
+
return StringType()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def simple_type_for_python_value(value: Any) -> str:
|
|
88
|
+
""" """
|
|
89
|
+
if isinstance(value, datetime.date):
|
|
90
|
+
return 'date'
|
|
91
|
+
if isinstance(value, datetime.datetime):
|
|
92
|
+
return 'date'
|
|
93
|
+
if isinstance(value, int):
|
|
94
|
+
return 'number'
|
|
95
|
+
if isinstance(value, float):
|
|
96
|
+
return 'float'
|
|
97
|
+
if isinstance(value, bool):
|
|
98
|
+
return 'bool'
|
|
99
|
+
return 'string'
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/session.py
RENAMED
|
@@ -136,7 +136,7 @@ class Session:
|
|
|
136
136
|
import mlflow
|
|
137
137
|
except ImportError:
|
|
138
138
|
raise AwClientMisconfigured(
|
|
139
|
-
'Для использованиея MLFlow установите с
|
|
139
|
+
'Для использованиея MLFlow установите библиотеку с опцией ml: pip install analytic-workspace-client[ml]')
|
|
140
140
|
|
|
141
141
|
if not self.token:
|
|
142
142
|
data_master_url = urljoin(self.aw_url, 'data-master/get-token')
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CompiledModule(Protocol):
|
|
5
|
+
""" """
|
|
6
|
+
def __contains__(self, item):
|
|
7
|
+
""" """
|
|
8
|
+
|
|
9
|
+
def __getattr__(self, item):
|
|
10
|
+
""" """
|
|
11
|
+
|
|
12
|
+
def __getitem__(self, item):
|
|
13
|
+
""" """
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def is_empty(self) -> bool:
|
|
17
|
+
""" """
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Protocol, Optional
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from pyspark.sql import SparkSession
|
|
5
|
+
except ImportError:
|
|
6
|
+
raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from aw_etl.models import Vault
|
|
10
|
+
from aw_etl.compiler import CompiledModule
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidEtlBlock(Exception):
|
|
14
|
+
""" """
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ETLBlockApplication(Protocol):
|
|
18
|
+
""" """
|
|
19
|
+
@property
|
|
20
|
+
def spark(self) -> SparkSession:
|
|
21
|
+
""" """
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def is_spark_initialized(self) -> bool:
|
|
25
|
+
""" """
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def model_module(self) -> Optional[CompiledModule]:
|
|
29
|
+
""" """
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def vault(self) -> Vault:
|
|
33
|
+
""" """
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def run_mode(self) -> str:
|
|
37
|
+
""" """
|
|
38
|
+
|
|
39
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Protocol, List, Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ModelObjectField(Protocol):
|
|
5
|
+
""" """
|
|
6
|
+
@property
|
|
7
|
+
def simple_type(self) -> str:
|
|
8
|
+
""" """
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelObject(Protocol):
|
|
12
|
+
""" """
|
|
13
|
+
@property
|
|
14
|
+
def fields(self) -> List[ModelObjectField]:
|
|
15
|
+
""" """
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Vault(Protocol):
|
|
19
|
+
""" """
|
|
20
|
+
def get(self, name: str) -> Any:
|
|
21
|
+
""" """
|
|
22
|
+
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: analytic_workspace_client
|
|
3
|
-
Version: 1.2.0
|
|
4
|
-
Summary: Библиотека для подключения к Analytic Workspace
|
|
5
|
-
Home-page: https://analyticworkspace.ru/
|
|
6
|
-
Author: Analytic Workspace
|
|
7
|
-
Author-email: aw_help@analyticworkspace.ru
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Description: # Библиотека для Analytic Workspace
|
|
10
|
-
|
|
11
|
-
## Получение токена
|
|
12
|
-
|
|
13
|
-
Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
|
|
14
|
-
|
|
15
|
-
Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
|
|
16
|
-
|
|
17
|
-
## Пример использования
|
|
18
|
-
|
|
19
|
-
```python
|
|
20
|
-
from aw_client import Session
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
with open('aw_token', 'rt') as f:
|
|
24
|
-
aw_token = f.read()
|
|
25
|
-
|
|
26
|
-
session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
27
|
-
|
|
28
|
-
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
29
|
-
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
30
|
-
|
|
31
|
-
df = session.load() # df: pandas.DataFrame
|
|
32
|
-
|
|
33
|
-
display(df)
|
|
34
|
-
```
|
|
35
|
-
Platform: UNKNOWN
|
|
36
|
-
Classifier: Programming Language :: Python :: 3
|
|
37
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
-
Classifier: Operating System :: OS Independent
|
|
39
|
-
Requires-Python: >=3.8,<4
|
|
40
|
-
Description-Content-Type: text/markdown
|
|
41
|
-
Provides-Extra: dev
|
|
42
|
-
Provides-Extra: ml
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: analytic-workspace-client
|
|
3
|
-
Version: 1.2.0
|
|
4
|
-
Summary: Библиотека для подключения к Analytic Workspace
|
|
5
|
-
Home-page: https://analyticworkspace.ru/
|
|
6
|
-
Author: Analytic Workspace
|
|
7
|
-
Author-email: aw_help@analyticworkspace.ru
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Description: # Библиотека для Analytic Workspace
|
|
10
|
-
|
|
11
|
-
## Получение токена
|
|
12
|
-
|
|
13
|
-
Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
|
|
14
|
-
|
|
15
|
-
Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
|
|
16
|
-
|
|
17
|
-
## Пример использования
|
|
18
|
-
|
|
19
|
-
```python
|
|
20
|
-
from aw_client import Session
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
with open('aw_token', 'rt') as f:
|
|
24
|
-
aw_token = f.read()
|
|
25
|
-
|
|
26
|
-
session = Session(token=aw_token, aw_url='https://aw.example.ru')
|
|
27
|
-
|
|
28
|
-
# Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
|
|
29
|
-
# без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
|
|
30
|
-
|
|
31
|
-
df = session.load() # df: pandas.DataFrame
|
|
32
|
-
|
|
33
|
-
display(df)
|
|
34
|
-
```
|
|
35
|
-
Platform: UNKNOWN
|
|
36
|
-
Classifier: Programming Language :: Python :: 3
|
|
37
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
-
Classifier: Operating System :: OS Independent
|
|
39
|
-
Requires-Python: >=3.8,<4
|
|
40
|
-
Description-Content-Type: text/markdown
|
|
41
|
-
Provides-Extra: dev
|
|
42
|
-
Provides-Extra: ml
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/__init__.py
RENAMED
|
File without changes
|
{analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/cache.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/domain.py
RENAMED
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/exceptions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc1}/src/aw_client/tools.py
RENAMED
|
File without changes
|