embedding-flow 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- embedding_flow/__init__.py +4 -0
- {contracts → embedding_flow/contracts}/contracts.py +2 -1
- {load → embedding_flow/load}/load.py +2 -2
- embedding_flow/main.py +22 -0
- {transform → embedding_flow/transform}/transform.py +2 -2
- {embedding_flow-0.1.0.dist-info → embedding_flow-0.1.1.dist-info}/METADATA +40 -1
- embedding_flow-0.1.1.dist-info/RECORD +13 -0
- embedding_flow-0.1.1.dist-info/top_level.txt +1 -0
- embedding_flow-0.1.0.dist-info/RECORD +0 -11
- embedding_flow-0.1.0.dist-info/top_level.txt +0 -3
- {contracts → embedding_flow/contracts}/__init__.py +0 -0
- {load → embedding_flow/load}/__init__.py +0 -0
- {transform → embedding_flow/transform}/__init__.py +0 -0
- {embedding_flow-0.1.0.dist-info → embedding_flow-0.1.1.dist-info}/WHEEL +0 -0
- {embedding_flow-0.1.0.dist-info → embedding_flow-0.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from contracts.contracts import load_data
|
1
|
+
from embedding_flow.contracts.contracts import load_data
|
2
2
|
from qdrant_client import QdrantClient
|
3
3
|
from qdrant_client.models import Distance, VectorParams, PointStruct
|
4
4
|
import pandas as pd
|
@@ -98,4 +98,4 @@ class load_embedding(load_data):
|
|
98
98
|
|
99
99
|
except Exception as e:
|
100
100
|
logger.error(f"❌ Error al cargar embeddings a Qdrant desde {parquet_path}: {e}", exc_info=True)
|
101
|
-
return False
|
101
|
+
return False
|
embedding_flow/main.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
from embedding_flow.transform.transform import transform_embedding
|
2
|
+
from embedding_flow.load.load import load_embedding
|
3
|
+
import logging
|
4
|
+
|
5
|
+
logging.basicConfig(
|
6
|
+
level=logging.INFO, # Nivel mínimo de logs a mostrar
|
7
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
8
|
+
filename='appMain.log', # Opcional: guarda los logs en un archivo
|
9
|
+
filemode='a' # 'a' append, 'w' overwrite
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
def embedding_flow(parquet_path: str)-> str | None :
|
14
|
+
|
15
|
+
transformer = transform_embedding(parquet_path)
|
16
|
+
of = load_embedding(transformer)
|
17
|
+
if of is None:
|
18
|
+
logging.error("❌ Pipeline failed")
|
19
|
+
return None
|
20
|
+
else:
|
21
|
+
logging.info("✅ Pipeline completed successfully")
|
22
|
+
return of
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from contracts.contracts import transform_data
|
1
|
+
from embedding_flow.contracts.contracts import transform_data
|
2
2
|
from pathlib import Path
|
3
3
|
import pandas as pd
|
4
4
|
from sentence_transformers import SentenceTransformer
|
@@ -39,4 +39,4 @@ class transform_embedding(transform_data):
|
|
39
39
|
|
40
40
|
except Exception as e:
|
41
41
|
logger.error(f"❌ Error al transformar en embeddings {parquet_path}: {e}", exc_info=True)
|
42
|
-
return None
|
42
|
+
return None
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: embedding-flow
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Pipeline to transform text chunks into embeddings and load to Qdrant
|
5
5
|
Author: facuvega
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Operating System :: OS Independent
|
12
12
|
Requires-Python: >=3.10
|
13
|
+
Description-Content-Type: text/markdown
|
13
14
|
License-File: LICENSE
|
14
15
|
Requires-Dist: pandas>=2.0.0
|
15
16
|
Requires-Dist: pyarrow>=12.0.0
|
@@ -20,3 +21,41 @@ Provides-Extra: dev
|
|
20
21
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
21
22
|
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
22
23
|
Dynamic: license-file
|
24
|
+
|
25
|
+
# embedding-flow
|
26
|
+
|
27
|
+
Biblioteca para transformar chunks de texto en embeddings de 768 dimensiones y cargarlos en Qdrant.
|
28
|
+
|
29
|
+
## Instalación
|
30
|
+
|
31
|
+
```bash
|
32
|
+
pip install embedding-flow
|
33
|
+
```
|
34
|
+
|
35
|
+
## Uso
|
36
|
+
|
37
|
+
```python
|
38
|
+
from embedding_flow import embedding_flow
|
39
|
+
|
40
|
+
# Recibe el path del parquet con chunks y carga embeddings a Qdrant
|
41
|
+
embedding_flow("/path/to/chunks.parquet")
|
42
|
+
```
|
43
|
+
|
44
|
+
## Variables de entorno
|
45
|
+
|
46
|
+
```bash
|
47
|
+
QDRANT_URL=http://localhost:6333
|
48
|
+
QDRANT_COLLECTION=embeddings_collection
|
49
|
+
VECTOR_SIZE=768
|
50
|
+
```
|
51
|
+
|
52
|
+
## Flujo
|
53
|
+
|
54
|
+
1. Lee chunks desde parquet
|
55
|
+
2. Genera embeddings (768 dim) con `all-mpnet-base-v2`
|
56
|
+
3. Carga embeddings a Qdrant (Docker local)
|
57
|
+
|
58
|
+
## Licencia
|
59
|
+
|
60
|
+
MIT
|
61
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
embedding_flow/__init__.py,sha256=2M-FOuekraoC7yxnhc86gX1iwEhfWuejbqnf_AZSsag,78
|
2
|
+
embedding_flow/main.py,sha256=e0VxDPrnAqxvcygWg5qTeUoQzm_Yt9QWBO5NbFDAjxg,752
|
3
|
+
embedding_flow/contracts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
embedding_flow/contracts/contracts.py,sha256=Sg1rsuFxMPx4ROHsLqIxsvhYhr371-QL5v1LiZrijXI,443
|
5
|
+
embedding_flow/load/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
embedding_flow/load/load.py,sha256=mNkl5o6EOiTEq3qzAipGv-IPopgRnKnk1CoRTaQnIys,3722
|
7
|
+
embedding_flow/transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
embedding_flow/transform/transform.py,sha256=BPLZnMaf9txbJwkw6oG-FeGI3wfuaxsxxVxoiV3dOPQ,1601
|
9
|
+
embedding_flow-0.1.1.dist-info/licenses/LICENSE,sha256=Vi3cItkblr6fZwGbNlp_HnBaMFwXSWYPkrVQLXX3LCs,1057
|
10
|
+
embedding_flow-0.1.1.dist-info/METADATA,sha256=-79fyZKjE1N68N5QRSm8s_G_rKpQQwaAvOVzM2rNy5w,1478
|
11
|
+
embedding_flow-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
embedding_flow-0.1.1.dist-info/top_level.txt,sha256=ryROgL4aUmHN8Ez_pskO2-kRHYNg-wBDMTp48633S7s,15
|
13
|
+
embedding_flow-0.1.1.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
embedding_flow
|
@@ -1,11 +0,0 @@
|
|
1
|
-
contracts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
contracts/contracts.py,sha256=im3wzTHMxZFVIVyjVVN1u4eZ5J_HxYGrcu3oQ8BXoOk,441
|
3
|
-
embedding_flow-0.1.0.dist-info/licenses/LICENSE,sha256=Vi3cItkblr6fZwGbNlp_HnBaMFwXSWYPkrVQLXX3LCs,1057
|
4
|
-
load/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
load/load.py,sha256=Y0JbC1x7SfG_EzsSDacDK8fuIcv7K1guQ8AD0birheE,3706
|
6
|
-
transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
transform/transform.py,sha256=GPeea6D2EbdNwmxdCDtDGgTGIPVLs_Z4VePYQJfzROc,1585
|
8
|
-
embedding_flow-0.1.0.dist-info/METADATA,sha256=uFM06vDN-pagU7V_GoZ1wNU6ZPsX8MzdlxNMjV2JoaA,802
|
9
|
-
embedding_flow-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
10
|
-
embedding_flow-0.1.0.dist-info/top_level.txt,sha256=VfSCJdxLtTjvyUSe1z1kQgIf0j2zFt3--FivJqWMxxA,25
|
11
|
-
embedding_flow-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|