csc-cia-stne 0.1.27__py3-none-any.whl → 0.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csc_cia_stne/__init__.py +2 -0
- csc_cia_stne/gcp_document_ai.py +100 -0
- csc_cia_stne/google_drive.py +1 -1
- {csc_cia_stne-0.1.27.dist-info → csc_cia_stne-0.1.29.dist-info}/METADATA +3 -1
- {csc_cia_stne-0.1.27.dist-info → csc_cia_stne-0.1.29.dist-info}/RECORD +8 -7
- {csc_cia_stne-0.1.27.dist-info → csc_cia_stne-0.1.29.dist-info}/WHEEL +0 -0
- {csc_cia_stne-0.1.27.dist-info → csc_cia_stne-0.1.29.dist-info}/licenses/LICENCE +0 -0
- {csc_cia_stne-0.1.27.dist-info → csc_cia_stne-0.1.29.dist-info}/top_level.txt +0 -0
csc_cia_stne/__init__.py
CHANGED
@@ -16,6 +16,7 @@ from .web import web_screen
|
|
16
16
|
from .wacess import Waccess
|
17
17
|
from .gcp_bucket import GCPBucket
|
18
18
|
from .ftp import FTP
|
19
|
+
from .gcp_document_ai import GCPDocumentAIClient
|
19
20
|
|
20
21
|
# Define os itens disponíveis para importação
|
21
22
|
__all__ = [
|
@@ -30,6 +31,7 @@ __all__ = [
|
|
30
31
|
"Provio",
|
31
32
|
"Email",
|
32
33
|
"GoogleDrive",
|
34
|
+
"GCPDocumentAIClient"
|
33
35
|
"Slack",
|
34
36
|
"web_screen",
|
35
37
|
"Waccess",
|
@@ -0,0 +1,100 @@
|
|
1
|
+
from typing import Optional, Dict, Union
|
2
|
+
from google.cloud import documentai_v1beta3 as documentai
|
3
|
+
from google.oauth2 import service_account
|
4
|
+
|
5
|
+
class GCPDocumentAIClient:
|
6
|
+
def __init__(self, credential_json: Optional[dict] = None, processor_id: Optional[str] = None) -> None:
|
7
|
+
"""
|
8
|
+
Inicializa o cliente do Google Cloud Document AI.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
credential_json (Optional[dict]): Dicionário contendo as credenciais do Google Cloud.
|
12
|
+
processor_id (Optional[str]): ID do processador do Document AI.
|
13
|
+
|
14
|
+
Attributes:
|
15
|
+
credential_json (dict): Credenciais do Google Cloud
|
16
|
+
project_id (str): ID do projeto extraído das credenciais
|
17
|
+
location (str): Localização do processador (fixo: "us")
|
18
|
+
processor_id (str): ID do processador do Document AI
|
19
|
+
client (documentai.DocumentProcessorServiceClient): Cliente do Document AI
|
20
|
+
is_connected (bool): Status da conexão
|
21
|
+
error (str|None): Mensagem de erro se houver falha na inicialização
|
22
|
+
"""
|
23
|
+
self.credential_json: dict = credential_json
|
24
|
+
self.project_id: str = self.credential_json.get("project_id")
|
25
|
+
self.location: str = "us"
|
26
|
+
self.processor_id: str = processor_id
|
27
|
+
|
28
|
+
try:
|
29
|
+
self.client: documentai.DocumentProcessorServiceClient = self._get_document_ai_client(self.credential_json)
|
30
|
+
self.is_connected: bool = True
|
31
|
+
self.error = None
|
32
|
+
|
33
|
+
except Exception as e:
|
34
|
+
error_msg = f"Erro ao inicializar o cliente do Document AI: {e}"
|
35
|
+
self.is_connected = False
|
36
|
+
self.error = error_msg
|
37
|
+
|
38
|
+
def _get_document_ai_client(self, credential_json: dict) -> documentai.DocumentProcessorServiceClient:
|
39
|
+
"""
|
40
|
+
Cria e retorna o cliente do Document AI.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
credential_json (dict): Dicionário contendo as credenciais do Google Cloud.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
documentai.DocumentProcessorServiceClient: Cliente autenticado do Document AI.
|
47
|
+
|
48
|
+
Raises:
|
49
|
+
Exception: Se houver erro na autenticação ou inicialização do cliente.
|
50
|
+
"""
|
51
|
+
try:
|
52
|
+
credential = service_account.Credentials.from_service_account_info(credential_json)
|
53
|
+
except Exception as e:
|
54
|
+
error_msg = f"Erro ao criar credenciais do Document AI: {e}"
|
55
|
+
raise Exception(error_msg)
|
56
|
+
|
57
|
+
return documentai.DocumentProcessorServiceClient(credentials=credential)
|
58
|
+
|
59
|
+
|
60
|
+
def ler_documento(self, file_bytes: bytes, mime_type: str) -> Dict[str, Union[bool, str, documentai.Document, None]]:
|
61
|
+
"""
|
62
|
+
Processa um documento PDF usando o Google Cloud Document AI para extrair texto.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
file_bytes (bytes): Bytes do arquivo PDF a ser processado.
|
66
|
+
mime_type (str): Tipo MIME do arquivo (ex.: "application/pdf").
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
Dict[str, Union[bool, str, documentai.Document, None]]: Resultado do processamento
|
70
|
+
- success (bool): True se o processamento foi bem-sucedido
|
71
|
+
- error (str|None): Mensagem de erro se houver falha
|
72
|
+
- data (documentai.Document|None): Documento processado pelo Document AI
|
73
|
+
|
74
|
+
Example:
|
75
|
+
>>> client = GCPDocumentAIClient(creds, processor_id)
|
76
|
+
>>> with open("documento.pdf", "rb") as f:
|
77
|
+
... resultado = client.ler_documento(f.read(), "application/pdf")
|
78
|
+
>>> if resultado["success"]:
|
79
|
+
... texto = resultado["data"].text
|
80
|
+
... print(f"Texto extraído: {texto[:100]}...")
|
81
|
+
|
82
|
+
Note:
|
83
|
+
- Utiliza o processador configurado no __init__
|
84
|
+
- Processa o documento completo enviado em file_bytes
|
85
|
+
- Para limitar páginas, use extrair_x_paginas_pdf antes desta função
|
86
|
+
"""
|
87
|
+
try:
|
88
|
+
name = self.client.processor_path(self.project_id, self.location, self.processor_id)
|
89
|
+
request = documentai.ProcessRequest(
|
90
|
+
name=name,
|
91
|
+
raw_document=documentai.RawDocument(content=file_bytes, mime_type=mime_type)
|
92
|
+
)
|
93
|
+
|
94
|
+
result = self.client.process_document(request=request)
|
95
|
+
return {"success": True, "error": None, "data": result.document}
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
error_msg = f"Erro ao processar o documento com o Document AI: {str(e)}"
|
99
|
+
return {"success": False, "error": error_msg, "data": None}
|
100
|
+
|
csc_cia_stne/google_drive.py
CHANGED
@@ -677,7 +677,7 @@ class GoogleDrive:
|
|
677
677
|
request = self.service.files().export_media(
|
678
678
|
fileId=file.get("id"), mimeType=mimeType
|
679
679
|
)
|
680
|
-
file_path = f"{path}{file[
|
680
|
+
file_path = f"{path}{file['name']}"
|
681
681
|
with open(file_path, "wb") as f:
|
682
682
|
f.write(request.execute())
|
683
683
|
return {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: csc_cia_stne
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.29
|
4
4
|
Summary: Biblioteca do time CSC-CIA utilizada no desenvolvimento de RPAs
|
5
5
|
License: MIT
|
6
6
|
Keywords: karavela,csc,cia,stone,rpa,botcity,stne
|
@@ -17,9 +17,11 @@ Requires-Dist: zeep
|
|
17
17
|
Requires-Dist: google-cloud-bigquery
|
18
18
|
Requires-Dist: google-cloud-storage
|
19
19
|
Requires-Dist: google-cloud-bigquery-storage
|
20
|
+
Requires-Dist: google-auth
|
20
21
|
Requires-Dist: google-auth-oauthlib
|
21
22
|
Requires-Dist: google-auth-httplib2
|
22
23
|
Requires-Dist: google-api-python-client
|
24
|
+
Requires-Dist: google-cloud-documentai
|
23
25
|
Requires-Dist: pyjwt
|
24
26
|
Requires-Dist: PyYAML
|
25
27
|
Requires-Dist: python-dotenv
|
@@ -1,11 +1,12 @@
|
|
1
|
-
csc_cia_stne/__init__.py,sha256=
|
1
|
+
csc_cia_stne/__init__.py,sha256=LITCLPqafF-VUV85wUvJ047ozLnfd82vw5AuuKXYi2s,2713
|
2
2
|
csc_cia_stne/bc_correios.py,sha256=s2XjJ0iokMlcUv0mAy9saU3pc_G-6X8wltb_lFHIL6o,24717
|
3
3
|
csc_cia_stne/bc_sta.py,sha256=S4EtkSEHP-wTMWRjmmSBH9XY5SnVQ1TwwZFSOE6tI2Q,29551
|
4
4
|
csc_cia_stne/email.py,sha256=y4xyPAe6_Mga5Wf6qAsDzYgn0f-zf2KshfItlWe58z8,8481
|
5
5
|
csc_cia_stne/ftp.py,sha256=eNkOUEXdw-9NYfuZjNo6Oh7EduD54g8N0cfD0LuOiTU,11516
|
6
6
|
csc_cia_stne/gcp_bigquery.py,sha256=foq8azvvv_f7uikMDslX9RcUIrx7RAS-Sn0AGW0QFQc,7231
|
7
7
|
csc_cia_stne/gcp_bucket.py,sha256=vMALWiW7IoBCuJAR8bUCpOV6BuBzI9AhRRk3b72OdMk,11515
|
8
|
-
csc_cia_stne/
|
8
|
+
csc_cia_stne/gcp_document_ai.py,sha256=Dzlk7YR3M_LxE0sHn-Lxz-PA1NsUZN2hgY5PyUfs0IQ,4506
|
9
|
+
csc_cia_stne/google_drive.py,sha256=7qwx4_RPEoSJgeVI02aLYNXA7o69_Z3qONvX5bfA4V0,44500
|
9
10
|
csc_cia_stne/karavela.py,sha256=jJCYX43D49gGuzmwwK6bN9XVnv2dXdp9iHnnV5H1LMQ,4794
|
10
11
|
csc_cia_stne/logger_json.py,sha256=CXxSCOFGMymDi8XE9SKnPKjW4D0wJLqDLnxqePS26i8,3187
|
11
12
|
csc_cia_stne/logger_rich.py,sha256=fklgkBb4rblKQd7YZ3q-eWfhGg9eflO2k2-z4pGh_yo,5201
|
@@ -38,8 +39,8 @@ csc_cia_stne/utilitarios/web_screen/__init__.py,sha256=5QcOPXKd95SvP2DoZiHS0gaU6
|
|
38
39
|
csc_cia_stne/utilitarios/web_screen/web_screen_abstract.py,sha256=PjL8Vgfj_JdKidia7RFyCkro3avYLQu4RZRos41sh3w,3241
|
39
40
|
csc_cia_stne/utilitarios/web_screen/web_screen_botcity.py,sha256=Xi5YJjl2pcxlX3OimqcBWRNXZEpAE7asyUjDJ4Oho5U,12297
|
40
41
|
csc_cia_stne/utilitarios/web_screen/web_screen_selenium.py,sha256=JLIcPJE9ZX3Pd6zG6oTRMqqUAY063UzLY3ReRlxmiSM,15581
|
41
|
-
csc_cia_stne-0.1.
|
42
|
-
csc_cia_stne-0.1.
|
43
|
-
csc_cia_stne-0.1.
|
44
|
-
csc_cia_stne-0.1.
|
45
|
-
csc_cia_stne-0.1.
|
42
|
+
csc_cia_stne-0.1.29.dist-info/licenses/LICENCE,sha256=LPGMtgKki2C3KEZP7hDhA1HBrlq5JCHkIeStUCLEMx4,1073
|
43
|
+
csc_cia_stne-0.1.29.dist-info/METADATA,sha256=sN19T2lC0fc6kk15iwjcobbX_1v_w9jhHWpbpHIvFCA,1530
|
44
|
+
csc_cia_stne-0.1.29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
45
|
+
csc_cia_stne-0.1.29.dist-info/top_level.txt,sha256=ldo7GVv3tQx5KJvwBzdZzzQmjPys2NDVVn1rv0BOF2Q,13
|
46
|
+
csc_cia_stne-0.1.29.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|