daedaluspy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- daedaluspy/__init__.py +6 -0
- daedaluspy/__main__.py +10 -0
- daedaluspy/cli.py +172 -0
- daedaluspy/data_lib/__init__.py +38 -0
- daedaluspy/data_lib/data/__init__.py +0 -0
- daedaluspy/data_lib/data/base_template.py +370 -0
- daedaluspy/data_lib/data/cloud_aws/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_aws/template/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_aws/template/container_files/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_aws/template/container_files/entity_template.py +20 -0
- daedaluspy/data_lib/data/cloud_azure/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_azure/template/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_azure/template/container_files/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_azure/template/container_files/entity_template.py +21 -0
- daedaluspy/data_lib/data/cloud_google/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_google/template/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_google/template/container_files/__init__.py +0 -0
- daedaluspy/data_lib/data/cloud_google/template/container_files/entity_template.py +20 -0
- daedaluspy/data_lib/generator/__init__.py +14 -0
- daedaluspy/data_lib/generator/data_entity_oop.py +108 -0
- daedaluspy/data_lib/generator/library_generator.py +190 -0
- daedaluspy/data_lib/generator/service_generator_oop.py +64 -0
- daedaluspy/data_lib/generator/setup_file.py +168 -0
- daedaluspy/data_lib/generator/templates/__init__.py +0 -0
- daedaluspy/data_lib/generator/templates/config_templates.py +181 -0
- daedaluspy/data_lib/generator/templates/data_quality_templates.py +347 -0
- daedaluspy/data_lib/generator/templates/logger_templates.py +730 -0
- daedaluspy/data_lib/generator/templates/setup_template.py +0 -0
- daedaluspy/data_lib/services/__init__.py +0 -0
- daedaluspy/data_lib/services/generator/corporate_service_generator.py +0 -0
- daedaluspy/data_lib/services/template/__init__.py +0 -0
- daedaluspy/data_lib/services/template/apis/__init__.py +0 -0
- daedaluspy/data_lib/services/template/apis/auth_template.py +32 -0
- daedaluspy/data_lib/services/template/apis/models_template.py +14 -0
- daedaluspy/data_lib/services/template/apis/service_template.py +43 -0
- daedaluspy/data_lib/services/template/database/__init__.py +0 -0
- daedaluspy/data_lib/services/template/database/service_template.py +135 -0
- daedaluspy/data_pipeline/__init__.py +0 -0
- daedaluspy/data_pipeline/generator/__init__.py +8 -0
- daedaluspy/data_pipeline/generator/pipeline_generator_oop.py +114 -0
- daedaluspy/data_pipeline/template/__init__.py +35 -0
- daedaluspy/data_pipeline/template/complete_templates.py +87 -0
- daedaluspy/data_pipeline/template/templates/base_template.py +67 -0
- daedaluspy/data_pipeline/template/templates/config_template.py +64 -0
- daedaluspy/data_pipeline/template/templates/flowbuilder_template.py +112 -0
- daedaluspy/data_pipeline/template/templates/gitignore_template.py +163 -0
- daedaluspy/data_pipeline/template/templates/init_template.py +33 -0
- daedaluspy/data_pipeline/template/templates/main_template.py +43 -0
- daedaluspy/data_pipeline/template/templates/readme_template.py +275 -0
- daedaluspy/data_pipeline/template/templates/requirements_template.py +13 -0
- daedaluspy/data_pipeline/template/templates/steps_template.py +187 -0
- daedaluspy-1.0.0.dist-info/METADATA +179 -0
- daedaluspy-1.0.0.dist-info/RECORD +57 -0
- daedaluspy-1.0.0.dist-info/WHEEL +5 -0
- daedaluspy-1.0.0.dist-info/entry_points.txt +2 -0
- daedaluspy-1.0.0.dist-info/licenses/LICENSE +21 -0
- daedaluspy-1.0.0.dist-info/top_level.txt +1 -0
daedaluspy/__init__.py
ADDED
daedaluspy/__main__.py
ADDED
daedaluspy/cli.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from daedaluspy.data_pipeline.template.complete_templates import get_template_set
|
|
2
|
+
from daedaluspy.data_pipeline.generator.pipeline_generator_oop import PipelineGenerator, PipelineTier, CloudProvider as PipelineCloudProvider
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from daedaluspy.data_lib.generator.library_generator import LibraryGenerator
|
|
7
|
+
from daedaluspy.data_lib.generator.data_entity_oop import DataEntityGenerator, CloudProvider
|
|
8
|
+
from daedaluspy.data_lib.generator.service_generator_oop import ServiceGenerator
|
|
9
|
+
|
|
10
|
+
class Tier(Enum):
|
|
11
|
+
RAW = "raw"
|
|
12
|
+
CLEAR = "clear"
|
|
13
|
+
MODEL = "model"
|
|
14
|
+
|
|
15
|
+
class ServiceType(Enum):
|
|
16
|
+
API = "api"
|
|
17
|
+
DATABASE = "database"
|
|
18
|
+
|
|
19
|
+
class Command:
|
|
20
|
+
def execute(self, args):
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
class CreateLibCommand(Command):
|
|
24
|
+
def execute(self, args):
|
|
25
|
+
generator = LibraryGenerator(
|
|
26
|
+
name=args.name,
|
|
27
|
+
data_name=args.dataname,
|
|
28
|
+
system_name=args.systemname
|
|
29
|
+
)
|
|
30
|
+
generator.generate()
|
|
31
|
+
print(f"Estrutura da biblioteca '{args.name}' criada com sucesso!")
|
|
32
|
+
|
|
33
|
+
class CreateDataCommand(Command):
|
|
34
|
+
def execute(self, args):
|
|
35
|
+
if not args.tier:
|
|
36
|
+
print("Erro: --tier é obrigatório (raw, clear, model)")
|
|
37
|
+
sys.exit(1)
|
|
38
|
+
tier = Tier(args.tier)
|
|
39
|
+
generator = DataEntityGenerator(
|
|
40
|
+
classname=args.name,
|
|
41
|
+
tier=tier,
|
|
42
|
+
cloud_provider=CloudProvider(args.cloud),
|
|
43
|
+
columns=args.columns or [],
|
|
44
|
+
output_path=args.output or f"./{args.name.lower()}/{tier.value}"
|
|
45
|
+
)
|
|
46
|
+
generator.generate()
|
|
47
|
+
print(f"Entidade '{args.name}' criada com sucesso na camada {tier.value}.")
|
|
48
|
+
|
|
49
|
+
class CreateServiceCommand(Command):
|
|
50
|
+
def execute(self, args):
|
|
51
|
+
service_type = ServiceType(args.type)
|
|
52
|
+
# Permite argumentos opcionais futuros
|
|
53
|
+
extra_kwargs = {}
|
|
54
|
+
if hasattr(args, 'output') and args.output:
|
|
55
|
+
extra_kwargs['output_path'] = args.output
|
|
56
|
+
# Converter models de lista de strings JSON para lista de dicts
|
|
57
|
+
import json
|
|
58
|
+
models = []
|
|
59
|
+
if args.models:
|
|
60
|
+
for m in args.models:
|
|
61
|
+
if isinstance(m, dict):
|
|
62
|
+
models.append(m)
|
|
63
|
+
else:
|
|
64
|
+
try:
|
|
65
|
+
models.append(json.loads(m))
|
|
66
|
+
except Exception:
|
|
67
|
+
pass
|
|
68
|
+
generator = ServiceGenerator(
|
|
69
|
+
service_name=args.name,
|
|
70
|
+
service_type=service_type,
|
|
71
|
+
models=models,
|
|
72
|
+
**extra_kwargs
|
|
73
|
+
)
|
|
74
|
+
generator.generate()
|
|
75
|
+
print(f"Serviço '{args.name}' ({service_type.value}) criado com sucesso.")
|
|
76
|
+
|
|
77
|
+
class CreatePipelineCommand(Command):
|
|
78
|
+
def execute(self, args):
|
|
79
|
+
tier = PipelineTier(args.tier)
|
|
80
|
+
# Permitir todos os parâmetros opcionais igual ao create-pipeline-project
|
|
81
|
+
def enum_from_value(enum_cls, value):
|
|
82
|
+
for e in enum_cls:
|
|
83
|
+
if value == e.value or value == e.name or value.upper() == e.name:
|
|
84
|
+
return e
|
|
85
|
+
raise ValueError(f"{value} não é válido para {enum_cls.__name__}")
|
|
86
|
+
|
|
87
|
+
generator = PipelineGenerator(
|
|
88
|
+
system_name=args.system_name,
|
|
89
|
+
dataname=args.dataname,
|
|
90
|
+
tier=tier,
|
|
91
|
+
output_path=args.output if args.output else ".",
|
|
92
|
+
template_type=getattr(args, "template_type", "all"),
|
|
93
|
+
lib_name=getattr(args, "lib_name", "atlaspy"),
|
|
94
|
+
cloud_provider=enum_from_value(PipelineCloudProvider, getattr(args, "cloud_provider", "cloud_azure")),
|
|
95
|
+
entity_target=getattr(args, "entity_target", None),
|
|
96
|
+
entity_target_class=getattr(args, "entity_target_class", None)
|
|
97
|
+
)
|
|
98
|
+
generator.generate()
|
|
99
|
+
print(f"Pipeline '{args.system_name}/{args.dataname}/{tier.value}' criado com sucesso.")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def main():
|
|
103
|
+
parser = argparse.ArgumentParser(description="DaedalusPy CLI")
|
|
104
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
105
|
+
# create pipeline project (completo)
|
|
106
|
+
parser_pipeline_project = subparsers.add_parser("create-pipeline-project")
|
|
107
|
+
parser_pipeline_project.add_argument("system_name")
|
|
108
|
+
parser_pipeline_project.add_argument("dataname")
|
|
109
|
+
parser_pipeline_project.add_argument("tier", choices=["raw", "clear", "model"])
|
|
110
|
+
parser_pipeline_project.add_argument("--output", "-o", default=".")
|
|
111
|
+
parser_pipeline_project.add_argument("--template-type", "-t", choices=["essential", "dev", "all"], default="all")
|
|
112
|
+
parser_pipeline_project.add_argument("--lib-name", default="atlaspy")
|
|
113
|
+
parser_pipeline_project.add_argument("--cloud-provider", default="cloud_azure")
|
|
114
|
+
parser_pipeline_project.add_argument("--entity-target", default="azure_sql_database")
|
|
115
|
+
parser_pipeline_project.add_argument("--entity-target-class", default="AzureSQLDatabase")
|
|
116
|
+
|
|
117
|
+
# create lib
|
|
118
|
+
parser_lib = subparsers.add_parser("create-lib")
|
|
119
|
+
parser_lib.add_argument("name")
|
|
120
|
+
parser_lib.add_argument("--dataname")
|
|
121
|
+
parser_lib.add_argument("--systemname")
|
|
122
|
+
|
|
123
|
+
# create data
|
|
124
|
+
parser_data = subparsers.add_parser("create-data")
|
|
125
|
+
parser_data.add_argument("name")
|
|
126
|
+
parser_data.add_argument("--tier", required=True, choices=[t.value for t in Tier])
|
|
127
|
+
parser_data.add_argument("--cloud", required=True)
|
|
128
|
+
parser_data.add_argument("--extension")
|
|
129
|
+
parser_data.add_argument("--columns", nargs="*")
|
|
130
|
+
parser_data.add_argument("--imports")
|
|
131
|
+
parser_data.add_argument("--read_code")
|
|
132
|
+
parser_data.add_argument("--write_code")
|
|
133
|
+
parser_data.add_argument("--output")
|
|
134
|
+
parser_data.add_argument("--lib")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# create service
|
|
138
|
+
parser_service = subparsers.add_parser("create-service")
|
|
139
|
+
parser_service.add_argument("name")
|
|
140
|
+
parser_service.add_argument("--type", required=True, choices=[t.value for t in ServiceType])
|
|
141
|
+
parser_service.add_argument("--models", nargs="*")
|
|
142
|
+
parser_service.add_argument("--output")
|
|
143
|
+
|
|
144
|
+
# create pipeline
|
|
145
|
+
parser_pipeline = subparsers.add_parser("create-pipeline")
|
|
146
|
+
parser_pipeline.add_argument("system_name")
|
|
147
|
+
parser_pipeline.add_argument("dataname")
|
|
148
|
+
parser_pipeline.add_argument("--tier", required=True, choices=[t.value for t in PipelineTier])
|
|
149
|
+
parser_pipeline.add_argument("--output", default=".")
|
|
150
|
+
parser_pipeline.add_argument("--template_type", default="all")
|
|
151
|
+
parser_pipeline.add_argument("--lib_name", default="atlaspy")
|
|
152
|
+
parser_pipeline.add_argument("--cloud_provider", default="cloud_azure")
|
|
153
|
+
parser_pipeline.add_argument("--entity_target", default="azure_sql_database")
|
|
154
|
+
parser_pipeline.add_argument("--entity_target_class", default="AzureSQLDatabase")
|
|
155
|
+
|
|
156
|
+
args = parser.parse_args()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
commands = {
|
|
160
|
+
"create-lib": CreateLibCommand(),
|
|
161
|
+
"create-data": CreateDataCommand(),
|
|
162
|
+
"create-service": CreateServiceCommand(),
|
|
163
|
+
"create-pipeline": CreatePipelineCommand(),
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if args.command in commands:
|
|
167
|
+
commands[args.command].execute(args)
|
|
168
|
+
else:
|
|
169
|
+
parser.print_help()
|
|
170
|
+
|
|
171
|
+
if __name__ == "__main__":
|
|
172
|
+
main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DaedalusPy - Framework para Engenharia de Dados Multi-Cloud
|
|
3
|
+
|
|
4
|
+
Este módulo fornece ferramentas para automatizar e padronizar o desenvolvimento
|
|
5
|
+
de soluções de engenharia de dados em múltiplas plataformas cloud.
|
|
6
|
+
|
|
7
|
+
Módulos principais:
|
|
8
|
+
- data: Biblioteca para templates e estruturas de dados
|
|
9
|
+
- generator: Biblioteca para geração de entidades e integrações
|
|
10
|
+
- services: Biblioteca para templates de serviços
|
|
11
|
+
|
|
12
|
+
Exemplo de uso:
|
|
13
|
+
from data_lib.generator.data_entity import generate_entity_file
|
|
14
|
+
|
|
15
|
+
generate_entity_file(
|
|
16
|
+
cloud_provider="azure",
|
|
17
|
+
classname="MinhaEntidade",
|
|
18
|
+
file_extension="parquet",
|
|
19
|
+
columns=['col1', 'col2'],
|
|
20
|
+
imports="import pandas as pd",
|
|
21
|
+
read_code="return pd.read_parquet(buffer)",
|
|
22
|
+
write_code="data.to_parquet(buffer, index=False)",
|
|
23
|
+
output_path="./output"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
Autor: Golden Valley Consulting LTDA
|
|
27
|
+
Licença: MIT
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
__version__ = "1.0.0"
|
|
31
|
+
__author__ = "Golden Valley Consulting LTDA"
|
|
32
|
+
|
|
33
|
+
# Importações principais
|
|
34
|
+
from . import data
|
|
35
|
+
from . import generator
|
|
36
|
+
from . import services
|
|
37
|
+
|
|
38
|
+
__all__ = ['data', 'generator', 'services']
|
|
File without changes
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Template para base.py - classes base para entidades de dados
|
|
3
|
+
"""
|
|
4
|
+
BASE_DATA_TEMPLATE = '''"""
|
|
5
|
+
Base classes para entidades de dados
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Dict, Optional, Type
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from io import BytesIO
|
|
13
|
+
from enum import StrEnum
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FileExtensionHandler(ABC):
|
|
19
|
+
"""Classe abstrata para handlers de extensões de arquivo"""
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def extension(self) -> str:
|
|
24
|
+
"""Retorna a extensão do arquivo"""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def read_data(self, buffer: BytesIO) -> Any:
|
|
29
|
+
"""Lê dados do buffer"""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def write_data(self, data: Any, buffer: BytesIO) -> None:
|
|
34
|
+
"""Escreve dados no buffer"""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class StorageProvider(ABC):
|
|
39
|
+
"""Classe abstrata para providers de storage"""
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def provider_name(self) -> str:
|
|
44
|
+
"""Retorna o nome do provider"""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def get_data_buffer(self, path: str, storage_client) -> BytesIO:
|
|
49
|
+
"""Obtém buffer de dados do storage pelo caminho"""
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def save_data_buffer(self, buffer: BytesIO, path: str, storage_client) -> bool:
|
|
54
|
+
"""Salva buffer de dados no storage pelo caminho"""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def validate_client(self, storage_client) -> bool:
|
|
59
|
+
"""Valida se o cliente de storage é válido para este provider"""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class CSVHandler(FileExtensionHandler):
|
|
64
|
+
"""Handler para arquivos CSV"""
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def extension(self) -> str:
|
|
68
|
+
return "csv"
|
|
69
|
+
|
|
70
|
+
def read_data(self, buffer: BytesIO) -> pd.DataFrame:
|
|
71
|
+
return pd.read_csv(buffer)
|
|
72
|
+
|
|
73
|
+
def write_data(self, data: pd.DataFrame, buffer: BytesIO) -> None:
|
|
74
|
+
data.to_csv(buffer, index=False)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class JSONHandler(FileExtensionHandler):
|
|
78
|
+
"""Handler para arquivos JSON"""
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def extension(self) -> str:
|
|
82
|
+
return "json"
|
|
83
|
+
|
|
84
|
+
def read_data(self, buffer: BytesIO) -> Dict[str, Any]:
|
|
85
|
+
return json.load(buffer)
|
|
86
|
+
|
|
87
|
+
def write_data(self, data: Dict[str, Any], buffer: BytesIO) -> None:
|
|
88
|
+
json.dump(data, buffer, ensure_ascii=False, indent=2)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ParquetHandler(FileExtensionHandler):
|
|
92
|
+
"""Handler para arquivos Parquet"""
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def extension(self) -> str:
|
|
96
|
+
return "parquet"
|
|
97
|
+
|
|
98
|
+
def read_data(self, buffer: BytesIO) -> pd.DataFrame:
|
|
99
|
+
return pd.read_parquet(buffer)
|
|
100
|
+
|
|
101
|
+
def write_data(self, data: pd.DataFrame, buffer: BytesIO) -> None:
|
|
102
|
+
data.to_parquet(buffer, index=False)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ExcelHandler(FileExtensionHandler):
|
|
106
|
+
"""Handler para arquivos Excel"""
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def extension(self) -> str:
|
|
110
|
+
return "xlsx"
|
|
111
|
+
|
|
112
|
+
def read_data(self, buffer: BytesIO) -> pd.DataFrame:
|
|
113
|
+
return pd.read_excel(buffer)
|
|
114
|
+
|
|
115
|
+
def write_data(self, data: pd.DataFrame, buffer: BytesIO) -> None:
|
|
116
|
+
data.to_excel(buffer, index=False)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class AzureStorageProvider(StorageProvider):
|
|
120
|
+
"""Provider para Azure Blob Storage"""
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def provider_name(self) -> str:
|
|
124
|
+
return "azure"
|
|
125
|
+
|
|
126
|
+
def get_data_buffer(self, path: str, storage_client) -> BytesIO:
|
|
127
|
+
"""Obtém dados do Azure Blob Storage"""
|
|
128
|
+
blob = storage_client.get_blob_client(blob=path)
|
|
129
|
+
return BytesIO(blob.download_blob().readall())
|
|
130
|
+
|
|
131
|
+
def save_data_buffer(self, buffer: BytesIO, path: str, storage_client) -> bool:
|
|
132
|
+
"""Salva dados no Azure Blob Storage"""
|
|
133
|
+
try:
|
|
134
|
+
blob = storage_client.get_blob_client(blob=path)
|
|
135
|
+
buffer.seek(0)
|
|
136
|
+
blob.upload_blob(buffer.read(), overwrite=True)
|
|
137
|
+
return True
|
|
138
|
+
except Exception as e:
|
|
139
|
+
print(f"Erro ao salvar no Azure: {e}")
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
def validate_client(self, storage_client) -> bool:
|
|
143
|
+
"""Valida se é um cliente Azure válido"""
|
|
144
|
+
return hasattr(storage_client, 'get_blob_client')
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class AWSStorageProvider(StorageProvider):
|
|
148
|
+
"""Provider para AWS S3"""
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def provider_name(self) -> str:
|
|
152
|
+
return "aws"
|
|
153
|
+
|
|
154
|
+
def get_data_buffer(self, path: str, storage_client) -> BytesIO:
|
|
155
|
+
"""Obtém dados do AWS S3"""
|
|
156
|
+
bucket, key = self._parse_s3_path(path)
|
|
157
|
+
response = storage_client.get_object(Bucket=bucket, Key=key)
|
|
158
|
+
return BytesIO(response['Body'].read())
|
|
159
|
+
|
|
160
|
+
def save_data_buffer(self, buffer: BytesIO, path: str, storage_client) -> bool:
|
|
161
|
+
"""Salva dados no AWS S3"""
|
|
162
|
+
try:
|
|
163
|
+
bucket, key = self._parse_s3_path(path)
|
|
164
|
+
buffer.seek(0)
|
|
165
|
+
storage_client.put_object(Bucket=bucket, Key=key, Body=buffer.read())
|
|
166
|
+
return True
|
|
167
|
+
except Exception as e:
|
|
168
|
+
print(f"Erro ao salvar no AWS: {e}")
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
def validate_client(self, storage_client) -> bool:
|
|
172
|
+
"""Valida se é um cliente AWS S3 válido"""
|
|
173
|
+
return hasattr(storage_client, 'get_object') and hasattr(storage_client, 'put_object')
|
|
174
|
+
|
|
175
|
+
def _parse_s3_path(self, path: str) -> tuple:
|
|
176
|
+
"""Parse do caminho S3 para bucket e key"""
|
|
177
|
+
# Implementar lógica de parsing do path S3
|
|
178
|
+
# Por exemplo: s3://bucket/path/file.csv -> (bucket, path/file.csv)
|
|
179
|
+
if path.startswith('s3://'):
|
|
180
|
+
path = path[5:]
|
|
181
|
+
parts = path.split('/', 1)
|
|
182
|
+
return parts[0], parts[1] if len(parts) > 1 else ''
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class GoogleStorageProvider(StorageProvider):
|
|
186
|
+
"""Provider para Google Cloud Storage"""
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def provider_name(self) -> str:
|
|
190
|
+
return "google"
|
|
191
|
+
|
|
192
|
+
def get_data_buffer(self, path: str, storage_client) -> BytesIO:
|
|
193
|
+
"""Obtém dados do Google Cloud Storage"""
|
|
194
|
+
bucket_name, blob_name = self._parse_gcs_path(path)
|
|
195
|
+
bucket = storage_client.bucket(bucket_name)
|
|
196
|
+
blob = bucket.blob(blob_name)
|
|
197
|
+
return BytesIO(blob.download_as_bytes())
|
|
198
|
+
|
|
199
|
+
def save_data_buffer(self, buffer: BytesIO, path: str, storage_client) -> bool:
|
|
200
|
+
"""Salva dados no Google Cloud Storage"""
|
|
201
|
+
try:
|
|
202
|
+
bucket_name, blob_name = self._parse_gcs_path(path)
|
|
203
|
+
bucket = storage_client.bucket(bucket_name)
|
|
204
|
+
blob = bucket.blob(blob_name)
|
|
205
|
+
buffer.seek(0)
|
|
206
|
+
blob.upload_from_file(buffer)
|
|
207
|
+
return True
|
|
208
|
+
except Exception as e:
|
|
209
|
+
print(f"Erro ao salvar no Google: {e}")
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
def validate_client(self, storage_client) -> bool:
|
|
213
|
+
"""Valida se é um cliente Google Cloud válido"""
|
|
214
|
+
return hasattr(storage_client, 'bucket')
|
|
215
|
+
|
|
216
|
+
def _parse_gcs_path(self, path: str) -> tuple:
|
|
217
|
+
"""Parse do caminho GCS para bucket e blob"""
|
|
218
|
+
# Implementar lógica de parsing do path GCS
|
|
219
|
+
# Por exemplo: gs://bucket/path/file.csv -> (bucket, path/file.csv)
|
|
220
|
+
if path.startswith('gs://'):
|
|
221
|
+
path = path[5:]
|
|
222
|
+
parts = path.split('/', 1)
|
|
223
|
+
return parts[0], parts[1] if len(parts) > 1 else ''
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class DataLayerOptions(StrEnum):
|
|
227
|
+
"""Opções de camada de dados"""
|
|
228
|
+
RAW = "raw"
|
|
229
|
+
CLEAR = "clear"
|
|
230
|
+
MODEL = "model"
|
|
231
|
+
|
|
232
|
+
@dataclass
|
|
233
|
+
class BaseEntity(ABC):
|
|
234
|
+
"""Classe base para todas as entidades de dados"""
|
|
235
|
+
|
|
236
|
+
# Propriedades base que devem ser definidas nas classes filhas
|
|
237
|
+
data_layer: DataLayerOptions
|
|
238
|
+
path_prefix: str
|
|
239
|
+
filename: str
|
|
240
|
+
file_handler: FileExtensionHandler
|
|
241
|
+
storage_provider: StorageProvider
|
|
242
|
+
date_reference: Optional[datetime] = None # data de referência
|
|
243
|
+
path_sufix: Optional[str] = None # sufixo opcional para o caminho
|
|
244
|
+
|
|
245
|
+
class Columns:
|
|
246
|
+
"""Classe para definir colunas de entidades"""
|
|
247
|
+
pass
|
|
248
|
+
|
|
249
|
+
def _build_path_by_date(self) -> str:
|
|
250
|
+
"""Constrói caminho baseado na data de referência"""
|
|
251
|
+
return "/".join([
|
|
252
|
+
self.data_layer.value,
|
|
253
|
+
self.path_prefix,
|
|
254
|
+
f"{self.date_reference.year:04d}",
|
|
255
|
+
f"{self.date_reference.month:02d}",
|
|
256
|
+
f"{self.date_reference.year:04d}.{self.date_reference.month:02d}.{self.date_reference.day:02d}",
|
|
257
|
+
f"{self.filename}.{self.file_handler.extension}"
|
|
258
|
+
])
|
|
259
|
+
|
|
260
|
+
def _build_path_by_sufix(self) -> str:
|
|
261
|
+
"""Constrói caminho baseado no sufixo"""
|
|
262
|
+
return "/".join([
|
|
263
|
+
self.data_layer.value,
|
|
264
|
+
self.path_prefix,
|
|
265
|
+
self.path_sufix,
|
|
266
|
+
f"{self.filename}.{self.file_handler.extension}"
|
|
267
|
+
])
|
|
268
|
+
|
|
269
|
+
def read_data_by_date(self, storage_client=None) -> Any:
|
|
270
|
+
"""Lê dados baseado na data de referência"""
|
|
271
|
+
if not self.date_reference:
|
|
272
|
+
raise ValueError("date_reference deve ser definida para leitura por data")
|
|
273
|
+
|
|
274
|
+
if not self.storage_provider.validate_client(storage_client):
|
|
275
|
+
raise ValueError(f"Cliente de storage inválido para provider {self.storage_provider.provider_name}")
|
|
276
|
+
|
|
277
|
+
# Constrói o caminho e obtém os dados do storage
|
|
278
|
+
path = self._build_path_by_date()
|
|
279
|
+
buffer = self.storage_provider.get_data_buffer(path, storage_client)
|
|
280
|
+
|
|
281
|
+
# Usa o file handler para processar os dados
|
|
282
|
+
return self.file_handler.read_data(buffer)
|
|
283
|
+
|
|
284
|
+
def write_data_by_date(self, data: Any, storage_client=None) -> bool:
|
|
285
|
+
"""Escreve dados baseado na data de referência"""
|
|
286
|
+
if not self.date_reference:
|
|
287
|
+
raise ValueError("date_reference deve ser definida para escrita por data")
|
|
288
|
+
|
|
289
|
+
if not self.storage_provider.validate_client(storage_client):
|
|
290
|
+
raise ValueError(f"Cliente de storage inválido para provider {self.storage_provider.provider_name}")
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
# Cria buffer e usa o file handler para escrever os dados
|
|
294
|
+
buffer = BytesIO()
|
|
295
|
+
self.file_handler.write_data(data, buffer)
|
|
296
|
+
|
|
297
|
+
# Constrói o caminho e salva no storage
|
|
298
|
+
path = self._build_path_by_date()
|
|
299
|
+
return self.storage_provider.save_data_buffer(buffer, path, storage_client)
|
|
300
|
+
except Exception as e:
|
|
301
|
+
print(f"Erro ao escrever dados por data: {e}")
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
def read_data_by_sufix(self, storage_client=None) -> Any:
|
|
305
|
+
"""Lê dados baseado no sufixo"""
|
|
306
|
+
if not self.path_sufix:
|
|
307
|
+
raise ValueError("path_sufix deve ser definido para leitura por sufixo")
|
|
308
|
+
|
|
309
|
+
if not self.storage_provider.validate_client(storage_client):
|
|
310
|
+
raise ValueError(f"Cliente de storage inválido para provider {self.storage_provider.provider_name}")
|
|
311
|
+
|
|
312
|
+
# Constrói o caminho e obtém os dados do storage
|
|
313
|
+
path = self._build_path_by_sufix()
|
|
314
|
+
buffer = self.storage_provider.get_data_buffer(path, storage_client)
|
|
315
|
+
|
|
316
|
+
# Usa o file handler para processar os dados
|
|
317
|
+
return self.file_handler.read_data(buffer)
|
|
318
|
+
|
|
319
|
+
def write_data_by_sufix(self, data: Any, storage_client=None) -> bool:
|
|
320
|
+
"""Escreve dados baseado no sufixo"""
|
|
321
|
+
if not self.path_sufix:
|
|
322
|
+
raise ValueError("path_sufix deve ser definido para escrita por sufixo")
|
|
323
|
+
|
|
324
|
+
if not self.storage_provider.validate_client(storage_client):
|
|
325
|
+
raise ValueError(f"Cliente de storage inválido para provider {self.storage_provider.provider_name}")
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
# Cria buffer e usa o file handler para escrever os dados
|
|
329
|
+
buffer = BytesIO()
|
|
330
|
+
self.file_handler.write_data(data, buffer)
|
|
331
|
+
|
|
332
|
+
# Constrói o caminho e salva no storage
|
|
333
|
+
path = self._build_path_by_sufix()
|
|
334
|
+
return self.storage_provider.save_data_buffer(buffer, path, storage_client)
|
|
335
|
+
except Exception as e:
|
|
336
|
+
print(f"Erro ao escrever dados por sufixo: {e}")
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
class HandlerFactory:
|
|
340
|
+
"""Factory para criar handlers e storage providers"""
|
|
341
|
+
|
|
342
|
+
_file_handlers = {
|
|
343
|
+
"csv": CSVHandler,
|
|
344
|
+
"json": JSONHandler,
|
|
345
|
+
"parquet": ParquetHandler,
|
|
346
|
+
"xlsx": ExcelHandler
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
_storage_providers = {
|
|
350
|
+
"aws": AWSStorageProvider,
|
|
351
|
+
"azure": AzureStorageProvider,
|
|
352
|
+
"google": GoogleStorageProvider
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
@classmethod
|
|
356
|
+
def get_file_handler(cls, extension: str) -> FileExtensionHandler:
|
|
357
|
+
"""Cria handler baseado na extensão"""
|
|
358
|
+
handler_class = cls._file_handlers.get(extension.lower())
|
|
359
|
+
if not handler_class:
|
|
360
|
+
raise ValueError(f"Handler não encontrado para extensão: {extension}")
|
|
361
|
+
return handler_class()
|
|
362
|
+
|
|
363
|
+
@classmethod
|
|
364
|
+
def get_storage_provider(cls, provider_name: str) -> StorageProvider:
|
|
365
|
+
"""Cria storage provider baseado no nome"""
|
|
366
|
+
provider_class = cls._storage_providers.get(provider_name.lower())
|
|
367
|
+
if not provider_class:
|
|
368
|
+
raise ValueError(f"Storage provider não encontrado: {provider_name}")
|
|
369
|
+
return provider_class()
|
|
370
|
+
'''
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
TEMPLATE_ENTITY_AWS = '''from datetime import datetime
|
|
2
|
+
from {lib_name}.data.base import BaseEntity, DataLayerOptions, HandlerFactory, AWSStorageProvider
|
|
3
|
+
{imports}
|
|
4
|
+
|
|
5
|
+
class {classname}(BaseEntity):
|
|
6
|
+
"""Entidade de dados para {classname}"""
|
|
7
|
+
|
|
8
|
+
class Columns:
|
|
9
|
+
{columns}
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Configurações da entidade"""
|
|
13
|
+
self.data_layer = DataLayerOptions.{data_layer_upper}
|
|
14
|
+
self.path_prefix = "{path_prefix}"
|
|
15
|
+
self.filename = "{filename}"
|
|
16
|
+
self.file_handler = HandlerFactory.get_file_handler("{file_extension}")
|
|
17
|
+
self.storage_provider = AWSStorageProvider()
|
|
18
|
+
self.date_reference = datetime.now()
|
|
19
|
+
self.path_sufix = None
|
|
20
|
+
'''
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
TEMPLATE_ENTITY_AZURE = '''from datetime import datetime
|
|
2
|
+
from azure.storage.blob import BlobClient
|
|
3
|
+
from {lib_name}.data.base import BaseEntity, DataLayerOptions, HandlerFactory, AzureStorageProvider
|
|
4
|
+
{imports}
|
|
5
|
+
|
|
6
|
+
class {classname}(BaseEntity):
|
|
7
|
+
"""Entidade de dados para {classname}"""
|
|
8
|
+
|
|
9
|
+
class Columns:
|
|
10
|
+
{columns}
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Configurações da entidade"""
|
|
14
|
+
self.data_layer = DataLayerOptions.{data_layer_upper}
|
|
15
|
+
self.path_prefix = "{path_prefix}"
|
|
16
|
+
self.filename = "{filename}"
|
|
17
|
+
self.file_handler = HandlerFactory.get_file_handler("{file_extension}")
|
|
18
|
+
self.storage_provider = AzureStorageProvider()
|
|
19
|
+
self.date_reference = datetime.now()
|
|
20
|
+
self.path_sufix = None
|
|
21
|
+
'''
|
|
File without changes
|
|
File without changes
|
|
File without changes
|