embedding-flow 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from embedding_flow.main import embedding_flow
2
+
3
+ __all__ = ['embedding_flow']
4
+
@@ -11,4 +11,5 @@ class load_data(ABC):
11
11
  @abstractmethod
12
12
  def load_data(self, url: str) -> bool:
13
13
  """Carga datos y retorna True si fue exitoso, False si falló"""
14
- pass
14
+ pass
15
+
@@ -1,4 +1,4 @@
1
- from contracts.contracts import load_data
1
+ from embedding_flow.contracts.contracts import load_data
2
2
  from qdrant_client import QdrantClient
3
3
  from qdrant_client.models import Distance, VectorParams, PointStruct
4
4
  import pandas as pd
@@ -98,4 +98,4 @@ class load_embedding(load_data):
98
98
 
99
99
  except Exception as e:
100
100
  logger.error(f"❌ Error al cargar embeddings a Qdrant desde {parquet_path}: {e}", exc_info=True)
101
- return False
101
+ return False
embedding_flow/main.py ADDED
@@ -0,0 +1,22 @@
1
+ from embedding_flow.transform.transform import transform_embedding
2
+ from embedding_flow.load.load import load_embedding
3
+ import logging
4
+
5
+ logging.basicConfig(
6
+ level=logging.INFO, # Nivel mínimo de logs a mostrar
7
+ format='%(asctime)s - %(levelname)s - %(message)s',
8
+ filename='appMain.log', # Opcional: guarda los logs en un archivo
9
+ filemode='a' # 'a' append, 'w' overwrite
10
+ )
11
+
12
+
13
+ def embedding_flow(parquet_path: str)-> str | None :
14
+
15
+ transformer = transform_embedding(parquet_path)
16
+ of = load_embedding(transformer)
17
+ if of is None:
18
+ logging.error("❌ Pipeline failed")
19
+ return None
20
+ else:
21
+ logging.info("✅ Pipeline completed successfully")
22
+ return of
@@ -1,4 +1,4 @@
1
- from contracts.contracts import transform_data
1
+ from embedding_flow.contracts.contracts import transform_data
2
2
  from pathlib import Path
3
3
  import pandas as pd
4
4
  from sentence_transformers import SentenceTransformer
@@ -39,4 +39,4 @@ class transform_embedding(transform_data):
39
39
 
40
40
  except Exception as e:
41
41
  logger.error(f"❌ Error al transformar en embeddings {parquet_path}: {e}", exc_info=True)
42
- return None
42
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: embedding-flow
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Pipeline to transform text chunks into embeddings and load to Qdrant
5
5
  Author: facuvega
6
6
  Classifier: Programming Language :: Python :: 3
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
13
14
  License-File: LICENSE
14
15
  Requires-Dist: pandas>=2.0.0
15
16
  Requires-Dist: pyarrow>=12.0.0
@@ -20,3 +21,41 @@ Provides-Extra: dev
20
21
  Requires-Dist: pytest>=7.0.0; extra == "dev"
21
22
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
22
23
  Dynamic: license-file
24
+
25
+ # embedding-flow
26
+
27
+ Biblioteca para transformar chunks de texto en embeddings de 768 dimensiones y cargarlos en Qdrant.
28
+
29
+ ## Instalación
30
+
31
+ ```bash
32
+ pip install embedding-flow
33
+ ```
34
+
35
+ ## Uso
36
+
37
+ ```python
38
+ from embedding_flow import embedding_flow
39
+
40
+ # Recibe el path del parquet con chunks y carga embeddings a Qdrant
41
+ embedding_flow("/path/to/chunks.parquet")
42
+ ```
43
+
44
+ ## Variables de entorno
45
+
46
+ ```bash
47
+ QDRANT_URL=http://localhost:6333
48
+ QDRANT_COLLECTION=embeddings_collection
49
+ VECTOR_SIZE=768
50
+ ```
51
+
52
+ ## Flujo
53
+
54
+ 1. Lee chunks desde parquet
55
+ 2. Genera embeddings (768 dim) con `all-mpnet-base-v2`
56
+ 3. Carga embeddings a Qdrant (Docker local)
57
+
58
+ ## Licencia
59
+
60
+ MIT
61
+
@@ -0,0 +1,13 @@
1
+ embedding_flow/__init__.py,sha256=2M-FOuekraoC7yxnhc86gX1iwEhfWuejbqnf_AZSsag,78
2
+ embedding_flow/main.py,sha256=e0VxDPrnAqxvcygWg5qTeUoQzm_Yt9QWBO5NbFDAjxg,752
3
+ embedding_flow/contracts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ embedding_flow/contracts/contracts.py,sha256=Sg1rsuFxMPx4ROHsLqIxsvhYhr371-QL5v1LiZrijXI,443
5
+ embedding_flow/load/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ embedding_flow/load/load.py,sha256=mNkl5o6EOiTEq3qzAipGv-IPopgRnKnk1CoRTaQnIys,3722
7
+ embedding_flow/transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ embedding_flow/transform/transform.py,sha256=BPLZnMaf9txbJwkw6oG-FeGI3wfuaxsxxVxoiV3dOPQ,1601
9
+ embedding_flow-0.1.1.dist-info/licenses/LICENSE,sha256=Vi3cItkblr6fZwGbNlp_HnBaMFwXSWYPkrVQLXX3LCs,1057
10
+ embedding_flow-0.1.1.dist-info/METADATA,sha256=-79fyZKjE1N68N5QRSm8s_G_rKpQQwaAvOVzM2rNy5w,1478
11
+ embedding_flow-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ embedding_flow-0.1.1.dist-info/top_level.txt,sha256=ryROgL4aUmHN8Ez_pskO2-kRHYNg-wBDMTp48633S7s,15
13
+ embedding_flow-0.1.1.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ embedding_flow
@@ -1,11 +0,0 @@
1
- contracts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- contracts/contracts.py,sha256=im3wzTHMxZFVIVyjVVN1u4eZ5J_HxYGrcu3oQ8BXoOk,441
3
- embedding_flow-0.1.0.dist-info/licenses/LICENSE,sha256=Vi3cItkblr6fZwGbNlp_HnBaMFwXSWYPkrVQLXX3LCs,1057
4
- load/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- load/load.py,sha256=Y0JbC1x7SfG_EzsSDacDK8fuIcv7K1guQ8AD0birheE,3706
6
- transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- transform/transform.py,sha256=GPeea6D2EbdNwmxdCDtDGgTGIPVLs_Z4VePYQJfzROc,1585
8
- embedding_flow-0.1.0.dist-info/METADATA,sha256=uFM06vDN-pagU7V_GoZ1wNU6ZPsX8MzdlxNMjV2JoaA,802
9
- embedding_flow-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- embedding_flow-0.1.0.dist-info/top_level.txt,sha256=VfSCJdxLtTjvyUSe1z1kQgIf0j2zFt3--FivJqWMxxA,25
11
- embedding_flow-0.1.0.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- contracts
2
- load
3
- transform
File without changes
File without changes
File without changes