ddi-fw 0.0.262__py3-none-any.whl → 0.0.264__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -94,7 +94,7 @@ class BaseDataset(BaseModel, abc.ABC):
94
94
 
95
95
  train_data,test_data = np.stack(train_data.flatten().tolist()), np.stack(test_data.flatten().tolist())
96
96
  column = self.columns[0] if self.columns else 'default'
97
- items.append([f'default', np.nan_to_num(train_data),
97
+ items.append([f'{column}', np.nan_to_num(train_data),
98
98
  y_train_label, np.nan_to_num(test_data), y_test_label])
99
99
  else:
100
100
  for index, column in enumerate(self.columns):
@@ -414,8 +414,9 @@ def generate_embeddings(
414
414
  # Load embedding model
415
415
  try:
416
416
  model_kwargs = collection_config.get('model_kwargs')
417
+ kwargs = {"model_kwargs":model_kwargs}
417
418
  model = get_import(embedding_model_type)(
418
- model_name=name, **model_kwargs)
419
+ model_name=name, **kwargs)
419
420
  except Exception as e:
420
421
  raise Exception(f"Unknown embedding model: {embedding_model_type}") from e
421
422
 
@@ -459,4 +460,48 @@ def generate_embeddings(
459
460
  vector_store_manager.generate_vector_store(docs)
460
461
 
461
462
  # Optionally persist/save
462
- vector_store_manager.save(persist_dir)
463
+ vector_store_manager.save(persist_dir)
464
+
465
+
466
+ import os
467
+ import json
468
+
469
+ def generate_embeddings_for_json_object(
470
+ obj_json: dict,
471
+ vector_store_manager_type: Type[BaseVectorStoreManager],
472
+ persist_root: str = "./embeddings",
473
+ new_model_names: Optional[List] = None,
474
+ docs=None
475
+ ):
476
+ """
477
+ Generate embeddings for all collections in the given JSON object, storing them in a container folder.
478
+
479
+ Args:
480
+ obj_json: JSON object with 'id', 'name', and 'collections' keys.
481
+ vector_store_manager_type: The vector store manager class to use.
482
+ persist_root: Root directory for all embeddings.
483
+ new_model_names: Optional list of model names to filter collections.
484
+ docs: Documents to embed (if needed).
485
+ """
486
+ obj_id = obj_json.get("id")
487
+ obj_name = obj_json.get("name")
488
+ collections = obj_json.get("collections", [])
489
+
490
+ if not obj_id:
491
+ raise ValueError("JSON object must have an 'id' field.")
492
+ if not collections:
493
+ raise ValueError("No collections found in the given JSON object.")
494
+
495
+ # Create container directory for this object
496
+ container_dir = os.path.join(persist_root, str(obj_id))
497
+ os.makedirs(container_dir, exist_ok=True)
498
+
499
+ # Call your existing function
500
+ generate_embeddings(
501
+ docs=docs,
502
+ vector_store_manager_type=vector_store_manager_type,
503
+ config_file=None,
504
+ new_model_names=new_model_names,
505
+ collections=collections,
506
+ persist_directory=container_dir
507
+ )
@@ -149,8 +149,9 @@ class MultiPipeline():
149
149
 
150
150
  # Default model configuration
151
151
  default_model = config.get("default_model", {})
152
- default_model_type = get_import(default_model.get("model_type"))
153
- default_model_params = default_model.get("params", {})
152
+ if default_model:
153
+ default_model_type = get_import(default_model.get("model_type"))
154
+ default_model_params = default_model.get("params", {})
154
155
 
155
156
  multi_modal = config.get("multi_modal")
156
157
 
ddi_fw/utils/kaggle.py CHANGED
@@ -37,6 +37,7 @@ def create_kaggle_dataset(base_path: str, collections: list):
37
37
 
38
38
  # Ensure title is between 6 and 50 characters
39
39
  if not (6 <= len(title) <= 50):
40
+ raise ValueError(f"Title length for {title} must be between 6 and 50 characters.")
40
41
  continue # Skip if title length is out of the expected range
41
42
 
42
43
  # Step 3: Define the metadata content
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.262
3
+ Version: 0.0.264
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,12 +1,12 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=NozQvXPYIS01U0srZmcKhiqJgRDkD-C-VXHL6sKrFSw,166
2
- ddi_fw/datasets/core.py,sha256=UnbCDoWXdxeiAb0e0anhDqXiVFGUi02VA9sKl6NVBZU,17409
2
+ ddi_fw/datasets/core.py,sha256=Nc0OnaYi0hIYuGCdxOCyT2X4mdWK0wyVSxUw6836fKk,17410
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
6
6
  ddi_fw/langchain/__init__.py,sha256=97Y4lYuxShWqx5hfDbzf8VyV0HrM76fDlNp5xXusKQU,445
7
7
  ddi_fw/langchain/chroma_storage.py,sha256=fOxoJoaqqyOKqtfUtlq2zJd-XY03rARTDvrPE_9nY2I,15855
8
8
  ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
9
- ddi_fw/langchain/faiss_storage.py,sha256=M_i1WSb66u14pIDnQ9YXTw1XNgPzsy2vchxaSzc6vY4,18669
9
+ ddi_fw/langchain/faiss_storage.py,sha256=M-pogVtmESi_sXsBCEcTItz1-NDILllCAB41Pg54kNo,20235
10
10
  ddi_fw/langchain/sentence_splitter.py,sha256=NCcDdDWDnwZTZDqarg-5gSbcDFoAM_sxcgH9ZCu97IA,597
11
11
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
12
12
  ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
@@ -21,7 +21,7 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
21
21
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
22
22
  ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
23
23
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
24
- ddi_fw/pipeline/multi_pipeline.py,sha256=jHjSfQmRQ-zEwh_5ZPdG4MBVYMrRRzlqYgFAMbDZN0g,10206
24
+ ddi_fw/pipeline/multi_pipeline.py,sha256=ck6VhWF4dDTqeJu7Z0VYBYSxIcRUJLAYj01P6AplQgg,10241
25
25
  ddi_fw/pipeline/multi_pipeline_org.py,sha256=AbErwu05-3YIPnCcXRsj-jxPJG8HG2H7cMZlGjzaYa8,9037
26
26
  ddi_fw/pipeline/ner_pipeline.py,sha256=1gBk81LeZlU1rhjJ1qBgHbFt_HqOeJ5WLnJ4AkYku4s,8188
27
27
  ddi_fw/pipeline/pipeline.py,sha256=m6pZrhoBK2lUr7PwpmJl6-WEpYcPGGc9N9C1LNJ78NQ,6974
@@ -29,7 +29,7 @@ ddi_fw/utils/__init__.py,sha256=WNxkQXk-694roG50D355TGLXstfdWVb_tUyr-PM-8rg,537
29
29
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=T1X70Rh4atucAuqyUZmz-iFULllY9dY0NRyV9-jTjJ0,3438
30
30
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
31
31
  ddi_fw/utils/json_helper.py,sha256=BVU6wmJgdXPxyqLPu3Ck_9Es5RrP1PDanKvE-OSj1D4,571
32
- ddi_fw/utils/kaggle.py,sha256=ZlKS4kZVsNV_JNYO4IvqY4GkhQCvl-_5saaz_bZzgrQ,2508
32
+ ddi_fw/utils/kaggle.py,sha256=itisQ5nffYMZz6gFYMdmbrpo2qaQvFVmLiRCC73MB1U,2604
33
33
  ddi_fw/utils/numpy_utils.py,sha256=gd1WNq5NpWD2MBEMTtFuS5I0h8B6FAUNcq6BVOlxdhY,797
34
34
  ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5I,1056
35
35
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
@@ -38,7 +38,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
38
38
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
39
39
  ddi_fw/vectorization/feature_vector_generation.py,sha256=92bhZw4Qxh0hqPK-bPHm9bUO7pg2p4cStQYtVrOtetE,7919
40
40
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
41
- ddi_fw-0.0.262.dist-info/METADATA,sha256=HDaqqJBBz5EFNJ8ggBj4Si6Qib0mSlih4jqdeUEnU-Q,2623
42
- ddi_fw-0.0.262.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- ddi_fw-0.0.262.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
44
- ddi_fw-0.0.262.dist-info/RECORD,,
41
+ ddi_fw-0.0.264.dist-info/METADATA,sha256=PKLhkkd6zsEA_YtV_4vVLh0K-pgLTzpf6IDH_ETlsek,2623
42
+ ddi_fw-0.0.264.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ ddi_fw-0.0.264.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
44
+ ddi_fw-0.0.264.dist-info/RECORD,,