analytic-workspace-client 1.2.0__tar.gz → 1.29.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. analytic_workspace_client-1.29.0rc2/PKG-INFO +41 -0
  2. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/README.md +1 -1
  3. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/setup.py +3 -3
  4. analytic_workspace_client-1.29.0rc2/src/analytic_workspace_client.egg-info/PKG-INFO +41 -0
  5. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/analytic_workspace_client.egg-info/SOURCES.txt +17 -1
  6. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/analytic_workspace_client.egg-info/requires.txt +5 -4
  7. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/analytic_workspace_client.egg-info/top_level.txt +1 -0
  8. analytic_workspace_client-1.29.0rc2/src/aw_client/core/bundle.py +39 -0
  9. analytic_workspace_client-1.29.0rc2/src/aw_client/core/compiler.py +165 -0
  10. analytic_workspace_client-1.29.0rc2/src/aw_client/core/model_vault.py +14 -0
  11. analytic_workspace_client-1.29.0rc2/src/aw_client/core/spark.py +15 -0
  12. analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/__init__.py +4 -0
  13. analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/application.py +49 -0
  14. analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/dto.py +85 -0
  15. analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/runtime.py +202 -0
  16. analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/test_data.py +17 -0
  17. analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/tools.py +99 -0
  18. analytic_workspace_client-1.29.0rc2/src/aw_client/model_dev/__init__.py +0 -0
  19. analytic_workspace_client-1.29.0rc2/src/aw_client/models/__init__.py +0 -0
  20. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/session.py +1 -1
  21. analytic_workspace_client-1.29.0rc2/src/aw_etl/__init__.py +0 -0
  22. analytic_workspace_client-1.29.0rc2/src/aw_etl/compiler.py +17 -0
  23. analytic_workspace_client-1.29.0rc2/src/aw_etl/etl_blocks.py +39 -0
  24. analytic_workspace_client-1.29.0rc2/src/aw_etl/models.py +22 -0
  25. analytic_workspace_client-1.2.0/PKG-INFO +0 -42
  26. analytic_workspace_client-1.2.0/src/analytic_workspace_client.egg-info/PKG-INFO +0 -42
  27. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/pyproject.toml +0 -0
  28. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/setup.cfg +0 -0
  29. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/analytic_workspace_client.egg-info/dependency_links.txt +0 -0
  30. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/__init__.py +0 -0
  31. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/cache.py +0 -0
  32. {analytic_workspace_client-1.2.0/src/aw_client/model_dev → analytic_workspace_client-1.29.0rc2/src/aw_client/core}/__init__.py +0 -0
  33. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/data_master/__init__.py +0 -0
  34. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/data_master/base.py +0 -0
  35. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/data_master/v0.py +0 -0
  36. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/data_master/v1.py +0 -0
  37. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/data_master/v2.py +0 -0
  38. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/domain.py +0 -0
  39. /analytic_workspace_client-1.2.0/src/aw_client/models/__init__.py → /analytic_workspace_client-1.29.0rc2/src/aw_client/etl_blocks/services.py +0 -0
  40. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/exceptions.py +0 -0
  41. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/model_dev/application.py +0 -0
  42. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/model_dev/cache.py +0 -0
  43. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/model_dev/runner.py +0 -0
  44. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/model_dev/virtual_objects.py +0 -0
  45. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/models/model_schema.py +0 -0
  46. {analytic_workspace_client-1.2.0 → analytic_workspace_client-1.29.0rc2}/src/aw_client/tools.py +0 -0
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.1
2
+ Name: analytic_workspace_client
3
+ Version: 1.29.0rc2
4
+ Summary: Библиотека для подключения к Analytic Workspace
5
+ Home-page: https://analyticworkspace.ru/
6
+ Author: Analytic Workspace
7
+ Author-email: aw_help@analyticworkspace.ru
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8,<4
12
+ Description-Content-Type: text/markdown
13
+ Provides-Extra: dev
14
+ Provides-Extra: ml
15
+
16
+ # Библиотека для Analytic Workspace
17
+
18
+ ## Получение токена
19
+
20
+ Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
21
+
22
+ Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
23
+
24
+ ## Пример использования
25
+
26
+ ```python
27
+ from aw_client import Session
28
+
29
+
30
+ with open('aw_token', 'rt') as f:
31
+ aw_token = f.read()
32
+
33
+ session = Session(token=aw_token, aw_url='https://aw.example.ru')
34
+
35
+ # Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
36
+ # без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
37
+
38
+ df = session.load(model_id=123) # df: pandas.DataFrame
39
+
40
+ display(df)
41
+ ```
@@ -20,7 +20,7 @@ session = Session(token=aw_token, aw_url='https://aw.example.ru')
20
20
  # Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
21
21
  # без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
22
22
 
23
- df = session.load() # df: pandas.DataFrame
23
+ df = session.load(model_id=123) # df: pandas.DataFrame
24
24
 
25
25
  display(df)
26
26
  ```
@@ -10,7 +10,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
10
10
 
11
11
  setup(
12
12
  name='analytic_workspace_client',
13
- version='1.2.0',
13
+ version='1.29.0rc2',
14
14
 
15
15
  description='Библиотека для подключения к Analytic Workspace',
16
16
  long_description=long_description,
@@ -41,8 +41,8 @@ setup(
41
41
  ],
42
42
 
43
43
  extras_require={
44
- 'dev': ['pyspark==3.4.1'],
45
- 'ml': ['mlflow>=2.7,<2.8']
44
+ 'dev': ['pyspark==3.5.0', 'pytest>=8.2,<8.3'],
45
+ 'ml': ['mlflow>=2.14,<2.15']
46
46
  },
47
47
 
48
48
  setup_requires=['wheel'],
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.1
2
+ Name: analytic-workspace-client
3
+ Version: 1.29.0rc2
4
+ Summary: Библиотека для подключения к Analytic Workspace
5
+ Home-page: https://analyticworkspace.ru/
6
+ Author: Analytic Workspace
7
+ Author-email: aw_help@analyticworkspace.ru
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8,<4
12
+ Description-Content-Type: text/markdown
13
+ Provides-Extra: dev
14
+ Provides-Extra: ml
15
+
16
+ # Библиотека для Analytic Workspace
17
+
18
+ ## Получение токена
19
+
20
+ Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
21
+
22
+ Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
23
+
24
+ ## Пример использования
25
+
26
+ ```python
27
+ from aw_client import Session
28
+
29
+
30
+ with open('aw_token', 'rt') as f:
31
+ aw_token = f.read()
32
+
33
+ session = Session(token=aw_token, aw_url='https://aw.example.ru')
34
+
35
+ # Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
36
+ # без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
37
+
38
+ df = session.load(model_id=123) # df: pandas.DataFrame
39
+
40
+ display(df)
41
+ ```
@@ -12,15 +12,31 @@ src/aw_client/domain.py
12
12
  src/aw_client/exceptions.py
13
13
  src/aw_client/session.py
14
14
  src/aw_client/tools.py
15
+ src/aw_client/core/__init__.py
16
+ src/aw_client/core/bundle.py
17
+ src/aw_client/core/compiler.py
18
+ src/aw_client/core/model_vault.py
19
+ src/aw_client/core/spark.py
15
20
  src/aw_client/data_master/__init__.py
16
21
  src/aw_client/data_master/base.py
17
22
  src/aw_client/data_master/v0.py
18
23
  src/aw_client/data_master/v1.py
19
24
  src/aw_client/data_master/v2.py
25
+ src/aw_client/etl_blocks/__init__.py
26
+ src/aw_client/etl_blocks/application.py
27
+ src/aw_client/etl_blocks/dto.py
28
+ src/aw_client/etl_blocks/runtime.py
29
+ src/aw_client/etl_blocks/services.py
30
+ src/aw_client/etl_blocks/test_data.py
31
+ src/aw_client/etl_blocks/tools.py
20
32
  src/aw_client/model_dev/__init__.py
21
33
  src/aw_client/model_dev/application.py
22
34
  src/aw_client/model_dev/cache.py
23
35
  src/aw_client/model_dev/runner.py
24
36
  src/aw_client/model_dev/virtual_objects.py
25
37
  src/aw_client/models/__init__.py
26
- src/aw_client/models/model_schema.py
38
+ src/aw_client/models/model_schema.py
39
+ src/aw_etl/__init__.py
40
+ src/aw_etl/compiler.py
41
+ src/aw_etl/etl_blocks.py
42
+ src/aw_etl/models.py
@@ -1,11 +1,12 @@
1
- colorama<0.5,>=0.4
1
+ python-dotenv<1.1,>=1.0
2
2
  httpx<1.0,>=0.25
3
3
  pandas
4
4
  pydantic<2.0,>=1.10
5
- python-dotenv<1.1,>=1.0
5
+ colorama<0.5,>=0.4
6
6
 
7
7
  [dev]
8
- pyspark==3.4.1
8
+ pyspark==3.5.0
9
+ pytest<8.3,>=8.2
9
10
 
10
11
  [ml]
11
- mlflow<2.8,>=2.7
12
+ mlflow<2.15,>=2.14
@@ -0,0 +1,39 @@
1
+ from typing import List, Dict, Union, Optional, Any
2
+
3
+
4
+ class NamedObjectsBundle:
5
+ """ """
6
+ def __init__(self, objs: Dict[str, Any]):
7
+ self._obj_list: List[Any] = [df for _, df in objs.items()]
8
+ self._obj_named: Dict[str, Any] = objs
9
+
10
+ def first(self) -> Optional[Any]:
11
+ """ """
12
+ return self._obj_list[0] if self._obj_list else None
13
+
14
+ def as_list(self) -> List[Any]:
15
+ return self._obj_list
16
+
17
+ def as_named(self) -> Dict[str, Any]:
18
+ return self._obj_named
19
+
20
+ def __getitem__(self, item):
21
+ if isinstance(item, int):
22
+ # если item указано целым числом, то возвращаем по индексу
23
+ return self._obj_list[item]
24
+
25
+ if not item in self._obj_named:
26
+ raise Exception(f'Данные объекта с именем "{item}" не найдена')
27
+ return self._obj_named[item]
28
+
29
+ def __iter__(self):
30
+ return iter(self._obj_named.values())
31
+
32
+ def __bool__(self):
33
+ return len(self._obj_list) > 0
34
+
35
+ def __len__(self):
36
+ return len(self._obj_list)
37
+
38
+ def items(self):
39
+ return self._obj_named.items()
@@ -0,0 +1,165 @@
1
+ from typing import List, Tuple, Optional
2
+
3
+ import builtins
4
+
5
+
6
+ class CompiledModule:
7
+ """ """
8
+ def __init__(self, compiled_globals: dict):
9
+ self.compiled_globals = compiled_globals
10
+
11
+ def __contains__(self, item):
12
+ return item in self.compiled_globals
13
+
14
+ def __getattr__(self, item):
15
+ return self.compiled_globals[item]
16
+
17
+ def __getitem__(self, item):
18
+ return self.compiled_globals[item]
19
+
20
+ @property
21
+ def is_empty(self) -> bool:
22
+ """ Возвращает True, если в модуле нет кода (только одни комментарии, например) """
23
+ return len(set(self.compiled_globals) - {'__builtins__'}) == 0
24
+
25
+
26
+ class ScriptCompiler:
27
+ """ """
28
+
29
+ class CompilerException(Exception):
30
+ """ """
31
+
32
+ # Список исключений
33
+ class Misconfigured(CompilerException):
34
+ """ """
35
+
36
+ class ForbiddenImport(CompilerException):
37
+ """ Исключение, которое возникает при наличии некорректных импортов в коде """
38
+
39
+ class CannotCompile(CompilerException):
40
+ """ Исключение, которое вызывается при невозможности компиляции кода """
41
+
42
+ # Перечисление режимов работы компилятора
43
+ MODE_ETL = 1
44
+ MODE_VIRTUAL_OBJECT_SCHEMA = 2
45
+ MODE_ETL_BLOCK = 3
46
+
47
+ def __init__(self, mode: Optional[int] = None):
48
+ self.mode = mode
49
+
50
+ def compile(self, source_code: str, mode: int = None) -> CompiledModule:
51
+ """ """
52
+ try:
53
+ byte_code = compile(source_code, filename='<string>', mode='exec')
54
+ except SyntaxError as e:
55
+ raise ScriptCompiler.CannotCompile(f'Ошибка компиляции: {e.msg} (line {e.lineno}): {e.text}')
56
+
57
+ try:
58
+ compiled_globals = self._get_globals(mode=mode if mode is not None else self.mode)
59
+ exec(byte_code, compiled_globals)
60
+ except Exception as e:
61
+ raise ScriptCompiler.CompilerException(f'Ошибка компиляции: {e}')
62
+
63
+ return CompiledModule(compiled_globals=compiled_globals)
64
+
65
+ def _get_globals(self, mode: int) -> dict:
66
+ """ """
67
+ if mode == ScriptCompiler.MODE_ETL:
68
+ safe_names, safe_modules = self._safe_globals_for_model_etl_script()
69
+ elif mode == ScriptCompiler.MODE_ETL_BLOCK:
70
+ safe_names, safe_modules = self._safe_globals_for_etl_block()
71
+ elif mode == ScriptCompiler.MODE_VIRTUAL_OBJECT_SCHEMA:
72
+ safe_names, safe_modules = self._safe_globals_for_virtual_object_schema_script()
73
+ else:
74
+ raise ScriptCompiler.Misconfigured(f'Указан некорректный режим компиляции {self.mode}')
75
+
76
+ def safe_import(name, globals=None, locals=None, fromlist=(), level=0):
77
+ """ """
78
+ modules = name.split('.')
79
+ if modules[0] not in safe_modules:
80
+ raise ScriptCompiler.ForbiddenImport(f'Импорт модуля {modules[0]} запрещен')
81
+
82
+ return __import__(name, globals, locals, fromlist, level)
83
+
84
+ safe_builtins = {}
85
+ for name in safe_names:
86
+ safe_builtins[name] = getattr(builtins, name)
87
+
88
+ safe_builtins['__import__'] = safe_import
89
+
90
+ return {'__builtins__': safe_builtins}
91
+
92
+ @staticmethod
93
+ def _safe_globals_for_virtual_object_schema_script() -> Tuple[List[str], List[str]]:
94
+ """ """
95
+ safe_names = [
96
+ 'None', 'False', 'True', 'bool', 'bytes', 'chr', 'complex', 'float',
97
+ 'hex', 'id', 'int', 'str', 'getattr', 'setattr', 'delattr',
98
+ ]
99
+
100
+ safe_modules = ['pyspark', 'aw_etl']
101
+
102
+ return safe_names, safe_modules
103
+
104
+ @staticmethod
105
+ def _safe_globals_full() -> Tuple[List[str], List[str]]:
106
+ """ Возвращает полный набор разрешенных модулей """
107
+ safe_names = [
108
+ 'None', 'False', 'True',
109
+ 'abs', 'all', 'any', 'ascii',
110
+ 'bin', 'bool', 'bytes', 'bytearray',
111
+ 'callable', 'chr', 'classmethod', 'complex',
112
+ 'delattr', 'dict', 'divmod',
113
+ 'enumerate',
114
+ 'float', 'filter', 'format', 'frozenset',
115
+ 'getattr',
116
+ 'hasattr', 'hash', 'hex',
117
+ 'id', 'int', 'isinstance', 'issubclass', 'iter',
118
+ 'len', 'list',
119
+ 'map', 'max', 'min',
120
+ 'next',
121
+ 'object', 'oct', 'ord',
122
+ 'pow', 'print', 'property',
123
+ 'range', 'repr', 'reversed', 'round',
124
+ 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
125
+ 'type', 'tuple',
126
+ 'vars',
127
+ 'zip',
128
+ '__build_class__', '__name__',
129
+
130
+
131
+ 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'BufferError', 'BytesWarning',
132
+ 'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'FutureWarning',
133
+ 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
134
+ 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError',
135
+ 'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning',
136
+ 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError',
137
+ 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
138
+ 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning', 'ZeroDivisionError',
139
+ ]
140
+
141
+ safe_modules = [
142
+ 'pyspark', 'requests', 'pandas', 'numpy', 'aw_etl', 'pyparsing', 'pydantic',
143
+
144
+ 'mlflow', 'prophet', 'statmodels', 'torch', 'sklearn', 'numpy', 'catboost',
145
+
146
+ 'array', 'calendar', 'codecs', 'collections', 'copy', 'csv', 'dataclasses', 'datetime', '_strptime',
147
+ 'decimal', 'enum', 'functools', 'hashlib', 'itertools', 'json', 'math', 'queue', 'random', 're',
148
+ 'statistics', 'string', 'time', 'urllib', 'xml',
149
+ 'zoneinfo', 'typing', 'uuid', 'logging',
150
+ ]
151
+
152
+ return safe_names, safe_modules
153
+
154
+ @staticmethod
155
+ def _safe_globals_for_model_etl_script() -> Tuple[List[str], List[str]]:
156
+ """ """
157
+ return ScriptCompiler._safe_globals_full()
158
+
159
+ @staticmethod
160
+ def _safe_globals_for_etl_block() -> Tuple[List[str], List[str]]:
161
+ """ """
162
+ safe_names, safe_modules = ScriptCompiler._safe_globals_full()
163
+ safe_modules.extend(['sqlglot', 'inspect'])
164
+
165
+ return safe_names, safe_modules
@@ -0,0 +1,14 @@
1
+ from typing import Optional, Dict, Any
2
+
3
+
4
+ class Vault:
5
+ """ """
6
+ def __init__(self, values: Optional[dict] = None):
7
+ self._values = values or {}
8
+
9
+ @property
10
+ def values(self) -> dict:
11
+ return self._values
12
+
13
+ def get(self, name: str) -> Optional[Any]:
14
+ return self._values.get(name, None)
@@ -0,0 +1,15 @@
1
+
2
+ try:
3
+ from pyspark.sql import SparkSession
4
+ except ImportError:
5
+ raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
6
+
7
+
8
+ def build_spark_session():
9
+ """ """
10
+ return SparkSession.builder \
11
+ .master('local[*]') \
12
+ .config('spark.driver.host', '127.0.0.1') \
13
+ .config('spark.ui.enabled', 'false') \
14
+ .getOrCreate()
15
+
@@ -0,0 +1,4 @@
1
+ from .runtime import get_etl_block_schema, get_etl_block_data, get_etl_block_meta
2
+ from .test_data import ModelObjectTestData
3
+
4
+
@@ -0,0 +1,49 @@
1
+ from typing import Optional, Callable
2
+
3
+ try:
4
+ from pyspark.sql import SparkSession
5
+ except ImportError:
6
+ raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
7
+
8
+ from aw_client.core.model_vault import Vault
9
+ from aw_client.core.compiler import CompiledModule
10
+
11
+
12
+ class ETLBlockApplication:
13
+ """ """
14
+ def __init__(self,
15
+ spark_builder: Callable,
16
+ run_mode: str,
17
+ vault: Vault,
18
+ model_module: Optional[CompiledModule] = None):
19
+ self._spark_builder = spark_builder
20
+ self._spark = None
21
+ self._run_model = run_mode
22
+ self._model_module = model_module
23
+ self._vault = vault
24
+
25
+ @property
26
+ def spark(self) -> SparkSession:
27
+ if self._spark is None:
28
+ self._spark = self._spark_builder()
29
+ return self._spark
30
+
31
+ @property
32
+ def is_spark_initialized(self) -> bool:
33
+ return self._spark is not None
34
+
35
+ @property
36
+ def model_module(self) -> Optional[CompiledModule]:
37
+ """ """
38
+ return self._model_module
39
+
40
+ @property
41
+ def vault(self) -> Vault:
42
+ """ """
43
+ return self._vault
44
+
45
+ @property
46
+ def run_mode(self) -> str:
47
+ """ """
48
+ return self.run_mode
49
+
@@ -0,0 +1,85 @@
1
+ from typing import Optional, Any, List, Union
2
+
3
+ import datetime
4
+ from enum import Enum
5
+
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class ETLBlockParamType(str, Enum):
10
+ """ """
11
+ STRING = 'string'
12
+ TEXT = 'text'
13
+ PASSWORD = 'password'
14
+ SQL_TEXT = 'sql_text'
15
+ NUMBER = 'number'
16
+ FLOAT = 'float'
17
+ BOOL = 'bool'
18
+ DATE = 'date'
19
+ DATETIME = 'datetime'
20
+ SELECT = 'select'
21
+ ACTION = 'action'
22
+
23
+
24
+ class ETLBlockParamGroupType(str, Enum):
25
+ """ """
26
+ GROUP = 'group'
27
+
28
+
29
+ class ETLBlockParamActionType(str, Enum):
30
+ """ """
31
+ ACTION = 'action'
32
+
33
+
34
+ # ----------------------------------------------------------------------------------------------------------------------
35
+ # Метаданные ETL блока
36
+ # ----------------------------------------------------------------------------------------------------------------------
37
+ class ETLBlockParam(BaseModel):
38
+ """ Параметр ETL блока """
39
+ code: str
40
+ name: str
41
+ type: ETLBlockParamType
42
+ description: Optional[str] = None
43
+
44
+ required: bool
45
+ mult: bool
46
+ domain: Optional[Any] = None
47
+ extra: Optional[Any] = None
48
+
49
+
50
+ class ETLBlockParamGroup(BaseModel):
51
+ """ Группа параметров ETL блока """
52
+ code: str
53
+ name: str
54
+
55
+ type: ETLBlockParamGroupType
56
+ description: Optional[str] = None
57
+ view_options: Optional[dict] = None
58
+ mult: bool
59
+ params: List[Union['ETLBlockParam', 'ETLBlockParamAction', 'ETLBlockParamAction']] = []
60
+ extra: Optional[Any] = None
61
+
62
+
63
+ class ETLBlockParamAction(BaseModel):
64
+ code: str
65
+ name: str
66
+ type: ETLBlockParamActionType
67
+ description: Optional[str] = None
68
+ action: str
69
+ extra: Optional[Any] = None
70
+
71
+
72
+ class ETLBlockMeta(BaseModel):
73
+ """ Метаданные ETL-блока """
74
+ uid: str # уникальный идентификатор блока
75
+ name: str # название блока
76
+ version: str # версия блока
77
+ description: str # описание блока
78
+ author: str # автор блока
79
+ updated_at: datetime.datetime # дата и время последнего обновления
80
+ params: List[Union[ETLBlockParam, ETLBlockParamAction, ETLBlockParamGroup]]
81
+ engine_requires: Optional[List[str]] = []
82
+
83
+ @property
84
+ def verbose_name(self):
85
+ return f'{self.name} v{self.version}'
@@ -0,0 +1,202 @@
1
+ from typing import Dict, Optional, Any, Union, List, Literal
2
+ from pathlib import Path
3
+ from collections import OrderedDict
4
+ import datetime
5
+ import inspect
6
+
7
+ from aw_client.core.compiler import ScriptCompiler
8
+ from aw_client.core.model_vault import Vault
9
+ from aw_client.core.spark import build_spark_session
10
+ from aw_client.core.bundle import NamedObjectsBundle
11
+ from aw_client.models.model_schema import ModelObject, ModelObjectField
12
+ from .application import ETLBlockApplication
13
+ from .test_data import ModelObjectTestData
14
+ from .tools import build_dataframe, build_model_object, build_spark_schema
15
+ from .dto import ETLBlockMeta
16
+
17
+
18
+ try:
19
+ from pyspark.sql import SparkSession, DataFrame
20
+ from pyspark.sql.types import DataType, StringType, DoubleType, TimestampType, LongType, BooleanType, \
21
+ ByteType, ShortType, IntegerType, DecimalType, FloatType, DateType, StructType, StructField
22
+ except ImportError:
23
+ raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
24
+
25
+
26
+
27
+ def get_etl_block_meta(block_path: Path) -> ETLBlockMeta:
28
+ """ """
29
+ block_meta_path = block_path / 'block_meta.json' if block_path.is_dir() else block_path
30
+
31
+ if not block_meta_path.exists():
32
+ raise Exception(f'Файл с метаданным блока не найден: {block_meta_path}')
33
+
34
+ with open(block_meta_path, 'rt') as f:
35
+ return ETLBlockMeta.parse_raw(f.read())
36
+
37
+
38
+ def get_etl_block_schema(block_path: Path,
39
+ test_data: Union[ModelObjectTestData, List[ModelObjectTestData]],
40
+ params: Optional[Dict] = None,
41
+ run_mode: Optional[Literal['']] = None,
42
+ vault: Optional[Vault] = None,
43
+ model_script_code: Optional[str] = None) -> StructType:
44
+ """
45
+ Args:
46
+
47
+ """
48
+ block_code_path = block_path / 'block_code.py' if block_path.is_dir() else block_path
49
+
50
+ if not block_code_path.exists():
51
+ raise Exception(f'Файл с исходным кодом блока не найден: {block_code_path}')
52
+
53
+ with open(block_code_path, 'rt') as f:
54
+ block_code = f.read()
55
+
56
+ # Компиляция кода блока
57
+ try:
58
+ block_module = ScriptCompiler().compile(source_code=block_code, mode=ScriptCompiler.MODE_ETL_BLOCK)
59
+ except ScriptCompiler.CannotCompile as e:
60
+ raise Exception(f'Ошибка компиляции исходного кода блока: {e}')
61
+
62
+ # Компиляция кода модели
63
+ if model_script_code:
64
+ try:
65
+ model_module = ScriptCompiler().compile(source_code=model_script_code, mode=ScriptCompiler.MODE_ETL)
66
+ except ScriptCompiler.CannotCompile as e:
67
+ raise Exception(f'Ошибка компиляции исходного кода скрипта модели: {e}')
68
+ else:
69
+ model_module = None
70
+
71
+ spark = build_spark_session()
72
+
73
+ # Дочерние датафреймы
74
+ dataframes = OrderedDict()
75
+ for td in (test_data if isinstance(test_data, list) else [test_data]):
76
+ df = build_dataframe(spark, td)
77
+ if not dataframes:
78
+ dataframes['child'] = df
79
+
80
+ upstream_dataframes = NamedObjectsBundle(dataframes)
81
+
82
+ # Дочерние схемы
83
+ schemas = OrderedDict()
84
+ for td in (test_data if isinstance(test_data, list) else [test_data]):
85
+ schema = build_spark_schema(td)
86
+ if not schemas:
87
+ schemas['child'] = schema
88
+ schemas[td.model_name] = schema
89
+
90
+ upstream_schemas = NamedObjectsBundle(schemas)
91
+
92
+ block_schema_parameters = inspect.signature(block_module['block_schema']).parameters
93
+
94
+ app = ETLBlockApplication(
95
+ spark_builder=build_spark_session,
96
+ run_mode=run_mode or 'full',
97
+ vault=vault or Vault(),
98
+ model_module=model_module
99
+ )
100
+
101
+ # Определение параметров для передачи
102
+ block_schema_kwargs = {}
103
+ if 'params' in block_schema_parameters:
104
+ block_schema_kwargs['params'] = params
105
+ if 'app' in block_schema_parameters:
106
+ block_schema_kwargs['app'] = app
107
+ if 'model_object' in block_schema_parameters:
108
+ block_schema_kwargs['model_object'] = build_model_object(test_data[0] if isinstance(test_data, list) else test_data)
109
+ if 'schema' in block_schema_parameters:
110
+ block_schema_kwargs['schema'] = upstream_schemas.first()
111
+ if 'schemas' in block_schema_parameters:
112
+ block_schema_kwargs['schemas'] = upstream_schemas
113
+ if 'upstream_schema' in block_schema_parameters:
114
+ block_schema_kwargs['upstream_schema'] = upstream_schemas.first()
115
+ if 'upstream_schemas' in block_schema_parameters:
116
+ block_schema_kwargs['upstream_schemas'] = upstream_schemas
117
+ if 'df' in block_schema_parameters:
118
+ block_schema_kwargs['df'] = upstream_dataframes.first()
119
+ if 'dfs' in block_schema_parameters:
120
+ block_schema_kwargs['dfs'] = upstream_dataframes
121
+ if 'upstream_dataframe' in block_schema_parameters:
122
+ block_schema_kwargs['upstream_dataframe'] = upstream_dataframes.first()
123
+ if 'upstream_dataframes' in block_schema_parameters:
124
+ block_schema_kwargs['upstream_dataframes'] = upstream_dataframes
125
+
126
+
127
+ return block_module['block_schema'](**block_schema_kwargs)
128
+
129
+
130
+ def get_etl_block_data(block_path: Path,
131
+ test_data: Union[ModelObjectTestData, List[ModelObjectTestData]],
132
+ params: Optional[Dict] = None,
133
+ run_mode: Optional[Literal['']] = None,
134
+ vault: Optional[Vault] = None,
135
+ model_script_code: Optional[str] = None) -> StructType:
136
+ """
137
+ Args:
138
+
139
+ """
140
+ block_code_path = block_path / 'block_code.py' if block_path.is_dir() else block_path
141
+
142
+ if not block_code_path.exists():
143
+ raise Exception(f'Файл с исходным кодом блока не найден: {block_code_path}')
144
+
145
+ with open(block_code_path, 'rt') as f:
146
+ block_code = f.read()
147
+
148
+ # Компиляция кода блока
149
+ try:
150
+ block_module = ScriptCompiler().compile(source_code=block_code, mode=ScriptCompiler.MODE_ETL_BLOCK)
151
+ except ScriptCompiler.CannotCompile as e:
152
+ raise Exception(f'Ошибка компиляции исходного кода блока: {e}')
153
+
154
+ # Компиляция кода модели
155
+ if model_script_code:
156
+ try:
157
+ model_module = ScriptCompiler().compile(source_code=model_script_code, mode=ScriptCompiler.MODE_ETL)
158
+ except ScriptCompiler.CannotCompile as e:
159
+ raise Exception(f'Ошибка компиляции исходного кода скрипта модели: {e}')
160
+ else:
161
+ model_module = None
162
+
163
+ spark = build_spark_session()
164
+
165
+ # Дочерние датафреймы
166
+ dataframes = OrderedDict()
167
+ for td in (test_data if isinstance(test_data, list) else [test_data]):
168
+ df = build_dataframe(spark, td)
169
+ if not dataframes:
170
+ dataframes['child'] = df
171
+ dataframes[td.model_name] = df
172
+
173
+ upstream_dataframes = NamedObjectsBundle(dataframes)
174
+
175
+ block_data_parameters = inspect.signature(block_module['block_data']).parameters
176
+
177
+ app = ETLBlockApplication(
178
+ spark_builder=build_spark_session,
179
+ run_mode=run_mode or 'full',
180
+ vault=vault or Vault(),
181
+ model_module=model_module
182
+ )
183
+
184
+ # Определение параметров для передачи
185
+ block_schema_kwargs = {}
186
+ if 'params' in block_data_parameters:
187
+ block_schema_kwargs['params'] = params
188
+ if 'app' in block_data_parameters:
189
+ block_schema_kwargs['app'] = app
190
+ if 'model_object' in block_data_parameters:
191
+ block_schema_kwargs['model_object'] = build_model_object(test_data[0] if isinstance(test_data, list) else test_data)
192
+ if 'df' in block_data_parameters:
193
+ block_schema_kwargs['df'] = upstream_dataframes.first()
194
+ if 'dfs' in block_data_parameters:
195
+ block_schema_kwargs['dfs'] = upstream_dataframes
196
+ if 'upstream_dataframe' in block_data_parameters:
197
+ block_schema_kwargs['upstream_dataframe'] = upstream_dataframes.first()
198
+ if 'upstream_dataframes' in block_data_parameters:
199
+ block_schema_kwargs['upstream_dataframes'] = upstream_dataframes
200
+
201
+ return block_module['block_data'](**block_schema_kwargs)
202
+
@@ -0,0 +1,17 @@
1
+ from typing import List, TypedDict, Optional
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+
6
+ class ModelObjectSchemaField(TypedDict):
7
+ """ """
8
+ model_name: str
9
+ simple_type: str
10
+
11
+
12
+ @dataclass
13
+ class ModelObjectTestData:
14
+ """ """
15
+ model_name: str
16
+ rows: List[dict] = field(default_factory=list)
17
+ schema: Optional[List[ModelObjectSchemaField]] = None
@@ -0,0 +1,99 @@
1
+ from typing import Any
2
+ import datetime
3
+
4
+
5
+ from aw_client.models.model_schema import ModelObject, ModelObjectField
6
+ from .test_data import ModelObjectTestData
7
+
8
+
9
+
10
+ try:
11
+ from pyspark.sql import SparkSession, DataFrame
12
+ from pyspark.sql.types import DataType, StringType, DoubleType, TimestampType, LongType, BooleanType, \
13
+ ByteType, ShortType, IntegerType, DecimalType, FloatType, DateType, StructType, StructField
14
+ except ImportError:
15
+ raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
16
+
17
+
18
+ def build_spark_schema(test_data: ModelObjectTestData) -> StructType:
19
+ """ """
20
+ if test_data.schema is not None:
21
+ return StructType(fields=[
22
+ StructField(sf['model_name'], spark_type_for_simple_type(sf['simple_type']), True) for sf in test_data.schema
23
+ ])
24
+ return StructType(fields=[
25
+ StructField(n, spark_type_for_python_value(v), True) for n, v in test_data.rows[0].items()
26
+ ])
27
+
28
+
29
+ def build_dataframe(spark: SparkSession, test_data: ModelObjectTestData) -> DataFrame:
30
+ """ """
31
+ return spark.createDataFrame(test_data.rows, schema=build_spark_schema(test_data))
32
+
33
+
34
+ def build_model_object(test_data: ModelObjectTestData):
35
+ """ """
36
+ if test_data.schema is not None:
37
+ fields = [
38
+ ModelObjectField(name=f['model_name'], model_name=f['model_name'], simple_type=f['simple_type']) for f in test_data.schema
39
+ ]
40
+ else:
41
+ fields = [
42
+ ModelObjectField(name=n, model_name=n, simple_type=simple_type_for_python_value(v)) for n, v in test_data.rows[0].items()
43
+ ]
44
+ return ModelObject(
45
+ name=test_data.model_name,
46
+ model_name=test_data.model_name,
47
+ type='table',
48
+ sql_text=None,
49
+ fields=fields
50
+ )
51
+
52
+
53
+
54
+
55
+ def spark_type_for_simple_type(simple_type: str) -> DataType:
56
+ """
57
+ """
58
+ if simple_type == 'number':
59
+ return LongType()
60
+ elif simple_type == 'float':
61
+ return DoubleType()
62
+ elif simple_type == 'date':
63
+ return TimestampType()
64
+ elif simple_type == 'bool':
65
+ return BooleanType()
66
+ else:
67
+ return StringType()
68
+
69
+
70
+ def spark_type_for_python_value(value: Any) -> DataType:
71
+ """
72
+ Returns Spark type
73
+ """
74
+ if isinstance(value, datetime.date):
75
+ return DateType()
76
+ if isinstance(value, datetime.datetime):
77
+ return TimestampType()
78
+ if isinstance(value, int):
79
+ return LongType()
80
+ if isinstance(value, float):
81
+ return DoubleType()
82
+ if isinstance(value, bool):
83
+ return BooleanType()
84
+ return StringType()
85
+
86
+
87
+ def simple_type_for_python_value(value: Any) -> str:
88
+ """ """
89
+ if isinstance(value, datetime.date):
90
+ return 'date'
91
+ if isinstance(value, datetime.datetime):
92
+ return 'date'
93
+ if isinstance(value, int):
94
+ return 'number'
95
+ if isinstance(value, float):
96
+ return 'float'
97
+ if isinstance(value, bool):
98
+ return 'bool'
99
+ return 'string'
@@ -136,7 +136,7 @@ class Session:
136
136
  import mlflow
137
137
  except ImportError:
138
138
  raise AwClientMisconfigured(
139
- 'Для использованиея MLFlow установите с зависимостью ml: pip install aw_client[ml]')
139
+ 'Для использованиея MLFlow установите библиотеку с опцией ml: pip install analytic-workspace-client[ml]')
140
140
 
141
141
  if not self.token:
142
142
  data_master_url = urljoin(self.aw_url, 'data-master/get-token')
@@ -0,0 +1,17 @@
1
+ from typing import Protocol
2
+
3
+
4
+ class CompiledModule(Protocol):
5
+ """ """
6
+ def __contains__(self, item):
7
+ """ """
8
+
9
+ def __getattr__(self, item):
10
+ """ """
11
+
12
+ def __getitem__(self, item):
13
+ """ """
14
+
15
+ @property
16
+ def is_empty(self) -> bool:
17
+ """ """
@@ -0,0 +1,39 @@
1
+ from typing import Protocol, Optional
2
+
3
+ try:
4
+ from pyspark.sql import SparkSession
5
+ except ImportError:
6
+ raise Exception('Для использования Spark установите библиотеку с опцией [dev]: `pip install analytic-workspace-client[dev]`')
7
+
8
+
9
+ from aw_etl.models import Vault
10
+ from aw_etl.compiler import CompiledModule
11
+
12
+
13
+ class InvalidEtlBlock(Exception):
14
+ """ """
15
+
16
+
17
+ class ETLBlockApplication(Protocol):
18
+ """ """
19
+ @property
20
+ def spark(self) -> SparkSession:
21
+ """ """
22
+
23
+ @property
24
+ def is_spark_initialized(self) -> bool:
25
+ """ """
26
+
27
+ @property
28
+ def model_module(self) -> Optional[CompiledModule]:
29
+ """ """
30
+
31
+ @property
32
+ def vault(self) -> Vault:
33
+ """ """
34
+
35
+ @property
36
+ def run_mode(self) -> str:
37
+ """ """
38
+
39
+
@@ -0,0 +1,22 @@
1
+ from typing import Protocol, List, Any
2
+
3
+
4
+ class ModelObjectField(Protocol):
5
+ """ """
6
+ @property
7
+ def simple_type(self) -> str:
8
+ """ """
9
+
10
+
11
+ class ModelObject(Protocol):
12
+ """ """
13
+ @property
14
+ def fields(self) -> List[ModelObjectField]:
15
+ """ """
16
+
17
+
18
+ class Vault(Protocol):
19
+ """ """
20
+ def get(self, name: str) -> Any:
21
+ """ """
22
+
@@ -1,42 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: analytic_workspace_client
3
- Version: 1.2.0
4
- Summary: Библиотека для подключения к Analytic Workspace
5
- Home-page: https://analyticworkspace.ru/
6
- Author: Analytic Workspace
7
- Author-email: aw_help@analyticworkspace.ru
8
- License: UNKNOWN
9
- Description: # Библиотека для Analytic Workspace
10
-
11
- ## Получение токена
12
-
13
- Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
14
-
15
- Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
16
-
17
- ## Пример использования
18
-
19
- ```python
20
- from aw_client import Session
21
-
22
-
23
- with open('aw_token', 'rt') as f:
24
- aw_token = f.read()
25
-
26
- session = Session(token=aw_token, aw_url='https://aw.example.ru')
27
-
28
- # Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
29
- # без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
30
-
31
- df = session.load() # df: pandas.DataFrame
32
-
33
- display(df)
34
- ```
35
- Platform: UNKNOWN
36
- Classifier: Programming Language :: Python :: 3
37
- Classifier: License :: OSI Approved :: MIT License
38
- Classifier: Operating System :: OS Independent
39
- Requires-Python: >=3.8,<4
40
- Description-Content-Type: text/markdown
41
- Provides-Extra: dev
42
- Provides-Extra: ml
@@ -1,42 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: analytic-workspace-client
3
- Version: 1.2.0
4
- Summary: Библиотека для подключения к Analytic Workspace
5
- Home-page: https://analyticworkspace.ru/
6
- Author: Analytic Workspace
7
- Author-email: aw_help@analyticworkspace.ru
8
- License: UNKNOWN
9
- Description: # Библиотека для Analytic Workspace
10
-
11
- ## Получение токена
12
-
13
- Перейдите по ссылке https://aw.example.ru/data-master/get-token (вместо https://aw.example.ru/ укажите адрес вашего сервера Analytic Workspace).
14
-
15
- Значение токена лучше всего сохранить в отдельном файл или поместить в переменную окружения `AW_DATA_TOKEN`.
16
-
17
- ## Пример использования
18
-
19
- ```python
20
- from aw_client import Session
21
-
22
-
23
- with open('aw_token', 'rt') as f:
24
- aw_token = f.read()
25
-
26
- session = Session(token=aw_token, aw_url='https://aw.example.ru')
27
-
28
- # Если токен доступа указан в переменной окружения AW_DATA_TOKEN, то объект сессии можно создавать
29
- # без явного указания параметра token: session = Session(aw_url='https://aw.example.ru')
30
-
31
- df = session.load() # df: pandas.DataFrame
32
-
33
- display(df)
34
- ```
35
- Platform: UNKNOWN
36
- Classifier: Programming Language :: Python :: 3
37
- Classifier: License :: OSI Approved :: MIT License
38
- Classifier: Operating System :: OS Independent
39
- Requires-Python: >=3.8,<4
40
- Description-Content-Type: text/markdown
41
- Provides-Extra: dev
42
- Provides-Extra: ml