ebm4subjects 0.5.5__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ebm4subjects/ebm_model.py CHANGED
@@ -44,7 +44,7 @@ class EbmModel:
44
44
  use_altLabels: bool = True,
45
45
  hnsw_index_params: dict | str | None = None,
46
46
  embedding_model_name: str | None = None,
47
- embedding_model_deployment: str = "offline-inference",
47
+ embedding_model_deployment: str = "mock",
48
48
  embedding_model_args: dict | str | None = None,
49
49
  encode_args_vocab: dict | str | None = None,
50
50
  encode_args_documents: dict | str | None = None,
@@ -101,7 +101,7 @@ class EbmModel:
101
101
 
102
102
  # Parameters for embedding generator
103
103
  self.generator = None
104
- self.embedding_model_deployment = embedding_model_deployment
104
+ self.embedding_model_deployment = embedding_model_deployment.lower()
105
105
  self.embedding_model_name = embedding_model_name
106
106
  self.embedding_dimensions = int(embedding_dimensions)
107
107
  if isinstance(embedding_model_args, str) or not embedding_model_args:
@@ -182,7 +182,7 @@ class EbmModel:
182
182
  """
183
183
  if self.generator is None:
184
184
  if self.embedding_model_deployment == "in-process":
185
- self.logger.info("initializing offline-inference embedding generator")
185
+ self.logger.info("initializing in-process embedding generator")
186
186
  self.generator = EmbeddingGeneratorInProcess(
187
187
  model_name=self.embedding_model_name,
188
188
  embedding_dimensions=self.embedding_dimensions,
@@ -192,7 +192,7 @@ class EbmModel:
192
192
  elif self.embedding_model_deployment == "mock":
193
193
  self.logger.info("initializing mock embedding generator")
194
194
  self.generator = EmbeddingGeneratorMock(self.embedding_dimensions)
195
- elif self.embedding_model_deployment == "HuggingFaceTEI":
195
+ elif self.embedding_model_deployment == "huggingfacetei":
196
196
  self.logger.info("initializing API embedding generator")
197
197
  self.generator = EmbeddingGeneratorHuggingFaceTEI(
198
198
  model_name=self.embedding_model_name,
@@ -200,7 +200,7 @@ class EbmModel:
200
200
  logger=self.logger,
201
201
  **self.embedding_model_args,
202
202
  )
203
- elif self.embedding_model_deployment == "OpenAI":
203
+ elif self.embedding_model_deployment == "openai":
204
204
  self.logger.info("initializing API embedding generator")
205
205
  self.generator = EmbeddingGeneratorOpenAI(
206
206
  model_name=self.embedding_model_name,
@@ -209,8 +209,7 @@ class EbmModel:
209
209
  **self.embedding_model_args,
210
210
  )
211
211
  else:
212
- self.logger.error("unsupportet API for embedding generator")
213
- raise NotImplementedError
212
+ raise NotImplementedError("Unsupportet API for embedding generator")
214
213
 
215
214
  def init_logger(
216
215
  self,
@@ -670,7 +669,7 @@ class EbmModel:
670
669
  )
671
670
  self.logger.info("training successful finished")
672
671
  except xgb.core.XGBoostError:
673
- self.logger.critical(
672
+ self.logger.warn(
674
673
  "XGBoost can't train with candidates equal to gold standard "
675
674
  "or candidates with no match to gold standard at all - "
676
675
  "Check if your training data and gold standard are correct"
@@ -80,17 +80,7 @@ class EmbeddingGeneratorHuggingFaceTEI(EmbeddingGenerator):
80
80
  self.logger.debug(
81
81
  "API call successful. Everything seems to be working fine."
82
82
  )
83
- elif response.status_code == 404:
84
- self.logger.error(
85
- "API not found under given adress! Please check the corresponding parameter!"
86
- )
87
- raise RuntimeError(
88
- "API not found under given adress! Please check the corresponding parameter!"
89
- )
90
83
  else:
91
- self.logger.error(
92
- "Request to API not possible! Please check the corresponding parameters!"
93
- )
94
84
  raise RuntimeError(
95
85
  "Request to API not possible! Please check the corresponding parameters!"
96
86
  )
@@ -188,29 +178,12 @@ class EmbeddingGeneratorOpenAI(EmbeddingGenerator):
188
178
  """
189
179
  Tests if the API is working with the given parameters
190
180
  """
191
- try:
192
- _ = self.client.embeddings.create(
193
- input="This is a test request!",
194
- model=self.model_name,
195
- encoding_format="float",
196
- )
197
- self.logger.debug(
198
- "API call successful. Everything seems to be working fine."
199
- )
200
- except NotFoundError:
201
- self.logger.error(
202
- "API not found under given adress! Please check the corresponding parameter!"
203
- )
204
- raise RuntimeError(
205
- "API not found under given adress! Please check the corresponding parameter!"
206
- )
207
- except BadRequestError:
208
- self.logger.error(
209
- "Request to API not possible! Please check the corresponding parameters!"
210
- )
211
- raise RuntimeError(
212
- "Request to API not possible! Please check the corresponding parameters!"
213
- )
181
+ _ = self.client.embeddings.create(
182
+ input="This is a test request!",
183
+ model=self.model_name,
184
+ encoding_format="float",
185
+ )
186
+ self.logger.debug("API call successful. Everything seems to be working fine.")
214
187
 
215
188
  def generate_embeddings(self, texts: list[str], **kwargs) -> np.ndarray:
216
189
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ebm4subjects
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: Embedding Based Matching for Automated Subject Indexing
5
5
  Author: Deutsche Nationalbibliothek
6
6
  Maintainer-email: Clemens Rietdorf <c.rietdorf@dnb.de>, Maximilian Kähler <m.kaehler@dnb.de>
@@ -3,10 +3,10 @@ ebm4subjects/analyzer.py,sha256=lqX7AF8WsvwIavgtnmoVQ0i3wzBJJSeH47EiEwoLKGg,1664
3
3
  ebm4subjects/chunker.py,sha256=HcEFJtKWHFYZL8DmZcHGXLPGEkCqHZhh_0kSqyYVsdE,6764
4
4
  ebm4subjects/duckdb_client.py,sha256=8lDIpj2o2VTEtjHC_vTYrI5-RNXZnWMft45bS6z9B_k,13031
5
5
  ebm4subjects/ebm_logging.py,sha256=vGMa3xSm6T7ZQ94XeNGJVGCTl3zytt4sbunwXc6qF5U,5987
6
- ebm4subjects/ebm_model.py,sha256=mnnRqdO3vlRF0MTNx7JvHgTXPCNg-8YWJm1kOtHinak,30929
7
- ebm4subjects/embedding_generator.py,sha256=q5HP36q11EMkH_yomduXa176ays7mtRvBvL0f78NFIE,12909
6
+ ebm4subjects/ebm_model.py,sha256=UTCIv_KCQ4HTJVbcVIAUv4S2j87oq8HXBeN5mfJmclQ,30879
7
+ ebm4subjects/embedding_generator.py,sha256=fk8rRhqBcRCknpCYoFolcXjoCwsx25Qd_UEOt-nUlv8,11774
8
8
  ebm4subjects/prepare_data.py,sha256=vQ-BdXkIP3iZJdPXol0WDlY8cRFMHkjzzL7oC7EbouE,3084
9
- ebm4subjects-0.5.5.dist-info/METADATA,sha256=oekB-uWB3p53odPkbtx-CqzxL_AHf6Az3RJcNhw1xhY,8354
10
- ebm4subjects-0.5.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
- ebm4subjects-0.5.5.dist-info/licenses/LICENSE,sha256=RpvAZSjULHvoTR_esTlucJ08-zdQydnoqQLbqOh9Ub8,13826
12
- ebm4subjects-0.5.5.dist-info/RECORD,,
9
+ ebm4subjects-0.5.6.dist-info/METADATA,sha256=Dujb7SghFPo3j42yRAgkbqv-VSmwpocJIHW4NgJFhn0,8354
10
+ ebm4subjects-0.5.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
+ ebm4subjects-0.5.6.dist-info/licenses/LICENSE,sha256=RpvAZSjULHvoTR_esTlucJ08-zdQydnoqQLbqOh9Ub8,13826
12
+ ebm4subjects-0.5.6.dist-info/RECORD,,