embedding-flow 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from embedding_flow.main import embedding_flow
2
+
3
+ __all__ = ['embedding_flow']
4
+
@@ -11,4 +11,5 @@ class load_data(ABC):
11
11
  @abstractmethod
12
12
  def load_data(self, url: str) -> bool:
13
13
  """Carga datos y retorna True si fue exitoso, False si falló"""
14
- pass
14
+ pass
15
+
@@ -1,4 +1,4 @@
1
- from contracts.contracts import load_data
1
+ from embedding_flow.contracts.contracts import load_data
2
2
  from qdrant_client import QdrantClient
3
3
  from qdrant_client.models import Distance, VectorParams, PointStruct
4
4
  import pandas as pd
@@ -98,4 +98,4 @@ class load_embedding(load_data):
98
98
 
99
99
  except Exception as e:
100
100
  logger.error(f"❌ Error al cargar embeddings a Qdrant desde {parquet_path}: {e}", exc_info=True)
101
- return False
101
+ return False
embedding_flow/main.py ADDED
@@ -0,0 +1,22 @@
1
+ from embedding_flow.transform.transform import transform_embedding
2
+ from embedding_flow.load.load import load_embedding
3
+ import logging
4
+
5
+ logging.basicConfig(
6
+ level=logging.INFO, # Nivel mínimo de logs a mostrar
7
+ format='%(asctime)s - %(levelname)s - %(message)s',
8
+ filename='appMain.log', # Opcional: guarda los logs en un archivo
9
+ filemode='a' # 'a' append, 'w' overwrite
10
+ )
11
+
12
+
13
+ def embedding_flow(parquet_path: str)-> str | None :
14
+
15
+ transformer = transform_embedding(parquet_path)
16
+ of = load_embedding(transformer)
17
+ if of is None:
18
+ logging.error("❌ Pipeline failed")
19
+ return None
20
+ else:
21
+ logging.info("✅ Pipeline completed successfully")
22
+ return of
@@ -1,4 +1,4 @@
1
- from contracts.contracts import transform_data
1
+ from embedding_flow.contracts.contracts import transform_data
2
2
  from pathlib import Path
3
3
  import pandas as pd
4
4
  from sentence_transformers import SentenceTransformer
@@ -39,4 +39,4 @@ class transform_embedding(transform_data):
39
39
 
40
40
  except Exception as e:
41
41
  logger.error(f"❌ Error al transformar en embeddings {parquet_path}: {e}", exc_info=True)
42
- return None
42
+ return None
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.4
2
+ Name: embedding-flow
3
+ Version: 0.1.2
4
+ Summary: Pipeline to transform text chunks into embeddings and load to Qdrant
5
+ Author: facuvega
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Programming Language :: Python :: 3.10
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pandas>=2.0.0
16
+ Requires-Dist: pyarrow>=12.0.0
17
+ Requires-Dist: sentence-transformers>=2.2.0
18
+ Requires-Dist: qdrant-client>=1.7.0
19
+ Requires-Dist: transformers
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
23
+ Provides-Extra: cpu
24
+ Requires-Dist: torch>=2.0.0; extra == "cpu"
25
+ Provides-Extra: cuda
26
+ Requires-Dist: torch>=2.0.0; extra == "cuda"
27
+ Dynamic: license-file
28
+
29
+ # embedding-flow
30
+
31
+ Biblioteca para transformar chunks de texto en embeddings de 768 dimensiones y cargarlos en Qdrant.
32
+
33
+ ## Instalación
34
+
35
+ ```bash
36
+ # Instalación básica (instala torch según tu sistema)
37
+ pip install embedding-flow
38
+
39
+ # O instalar con torch CPU (recomendado si no tenés GPU)
40
+ pip install embedding-flow torch --index-url https://download.pytorch.org/whl/cpu
41
+ ```
42
+
43
+ ## Uso
44
+
45
+ ```python
46
+ from embedding_flow import embedding_flow
47
+
48
+ # Recibe el path del parquet con chunks y carga embeddings a Qdrant
49
+ embedding_flow("/path/to/chunks.parquet")
50
+ ```
51
+
52
+ ## Variables de entorno
53
+
54
+ ```bash
55
+ QDRANT_URL=http://localhost:6333
56
+ QDRANT_COLLECTION=embeddings_collection
57
+ VECTOR_SIZE=768
58
+ ```
59
+
60
+ ## Flujo
61
+
62
+ 1. Lee chunks desde parquet
63
+ 2. Genera embeddings (768 dim) con `all-mpnet-base-v2`
64
+ 3. Carga embeddings a Qdrant (Docker local)
65
+
66
+ ## Licencia
67
+
68
+ MIT
69
+
@@ -0,0 +1,13 @@
1
+ embedding_flow/__init__.py,sha256=2M-FOuekraoC7yxnhc86gX1iwEhfWuejbqnf_AZSsag,78
2
+ embedding_flow/main.py,sha256=e0VxDPrnAqxvcygWg5qTeUoQzm_Yt9QWBO5NbFDAjxg,752
3
+ embedding_flow/contracts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ embedding_flow/contracts/contracts.py,sha256=Sg1rsuFxMPx4ROHsLqIxsvhYhr371-QL5v1LiZrijXI,443
5
+ embedding_flow/load/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ embedding_flow/load/load.py,sha256=mNkl5o6EOiTEq3qzAipGv-IPopgRnKnk1CoRTaQnIys,3722
7
+ embedding_flow/transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ embedding_flow/transform/transform.py,sha256=BPLZnMaf9txbJwkw6oG-FeGI3wfuaxsxxVxoiV3dOPQ,1601
9
+ embedding_flow-0.1.2.dist-info/licenses/LICENSE,sha256=Vi3cItkblr6fZwGbNlp_HnBaMFwXSWYPkrVQLXX3LCs,1057
10
+ embedding_flow-0.1.2.dist-info/METADATA,sha256=Ch2NjDXmI3P8xK8DA7yrSksVSkNlOZ7JxicFOX2_CeA,1806
11
+ embedding_flow-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ embedding_flow-0.1.2.dist-info/top_level.txt,sha256=ryROgL4aUmHN8Ez_pskO2-kRHYNg-wBDMTp48633S7s,15
13
+ embedding_flow-0.1.2.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ embedding_flow
@@ -1,22 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: embedding-flow
3
- Version: 0.1.0
4
- Summary: Pipeline to transform text chunks into embeddings and load to Qdrant
5
- Author: facuvega
6
- Classifier: Programming Language :: Python :: 3
7
- Classifier: Programming Language :: Python :: 3.10
8
- Classifier: Programming Language :: Python :: 3.11
9
- Classifier: Programming Language :: Python :: 3.12
10
- Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.10
13
- License-File: LICENSE
14
- Requires-Dist: pandas>=2.0.0
15
- Requires-Dist: pyarrow>=12.0.0
16
- Requires-Dist: sentence-transformers>=2.2.0
17
- Requires-Dist: torch>=2.0.0
18
- Requires-Dist: qdrant-client>=1.7.0
19
- Provides-Extra: dev
20
- Requires-Dist: pytest>=7.0.0; extra == "dev"
21
- Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
22
- Dynamic: license-file
@@ -1,11 +0,0 @@
1
- contracts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- contracts/contracts.py,sha256=im3wzTHMxZFVIVyjVVN1u4eZ5J_HxYGrcu3oQ8BXoOk,441
3
- embedding_flow-0.1.0.dist-info/licenses/LICENSE,sha256=Vi3cItkblr6fZwGbNlp_HnBaMFwXSWYPkrVQLXX3LCs,1057
4
- load/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- load/load.py,sha256=Y0JbC1x7SfG_EzsSDacDK8fuIcv7K1guQ8AD0birheE,3706
6
- transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- transform/transform.py,sha256=GPeea6D2EbdNwmxdCDtDGgTGIPVLs_Z4VePYQJfzROc,1585
8
- embedding_flow-0.1.0.dist-info/METADATA,sha256=uFM06vDN-pagU7V_GoZ1wNU6ZPsX8MzdlxNMjV2JoaA,802
9
- embedding_flow-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- embedding_flow-0.1.0.dist-info/top_level.txt,sha256=VfSCJdxLtTjvyUSe1z1kQgIf0j2zFt3--FivJqWMxxA,25
11
- embedding_flow-0.1.0.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- contracts
2
- load
3
- transform
File without changes
File without changes
File without changes