ebm4subjects 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ebm4subjects/ebm_model.py CHANGED
@@ -19,7 +19,7 @@ from ebm4subjects.embedding_generator import EmbeddingGenerator
19
19
  class EbmModel:
20
20
  def __init__(
21
21
  self,
22
- embedding_model_name: str,
22
+ embedding_model_name: str | Any,
23
23
  embedding_dimensions: int | str,
24
24
  chunk_tokenizer: str | Any,
25
25
  max_chunk_count: int | str,
@@ -9,7 +9,8 @@ class EmbeddingGenerator:
9
9
  A class for generating embeddings using a given SentenceTransformer model.
10
10
 
11
11
  Args:
12
- model_name (str): The name of the SentenceTransformer model to use.
12
+ model_name (str, SentenceTransformer): The name of the SentenceTransformer
13
+ model or an SentenceTransformer model to use.
13
14
  embedding_dimensions (int): The dimensionality of the generated embeddings.
14
15
  **kwargs: Additional keyword arguments to pass to the model.
15
16
 
@@ -19,7 +20,9 @@ class EmbeddingGenerator:
19
20
  model (SentenceTransformer): The SentenceTransformer model instance.
20
21
  """
21
22
 
22
- def __init__(self, model_name: str, embedding_dimensions: int, **kwargs) -> None:
23
+ def __init__(
24
+ self, model_name: str | SentenceTransformer, embedding_dimensions: int, **kwargs
25
+ ) -> None:
23
26
  """
24
27
  Initializes the EmbeddingGenerator.
25
28
 
@@ -31,9 +34,13 @@ class EmbeddingGenerator:
31
34
 
32
35
  # Create a SentenceTransformer model instance with the given
33
36
  # model name and embedding dimensions
34
- self.model = SentenceTransformer(
35
- model_name, truncate_dim=embedding_dimensions, **kwargs
36
- )
37
+ # or set model to the given SentenceTransformer
38
+ if type(model_name) is str:
39
+ self.model = SentenceTransformer(
40
+ model_name, truncate_dim=embedding_dimensions, **kwargs
41
+ )
42
+ else:
43
+ self.model = model_name
37
44
 
38
45
  # Disabel parallelism for tokenizer
39
46
  # Needed because process might be already parallelized
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ebm4subjects
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Embedding Based Matching for Automated Subject Indexing
5
5
  Author: Deutsche Nationalbibliothek
6
6
  Maintainer-email: Clemens Rietdorf <c.rietdorf@dnb.de>, Maximilian Kähler <m.kaehler@dnb.de>
@@ -3,10 +3,10 @@ ebm4subjects/analyzer.py,sha256=lqX7AF8WsvwIavgtnmoVQ0i3wzBJJSeH47EiEwoLKGg,1664
3
3
  ebm4subjects/chunker.py,sha256=HcEFJtKWHFYZL8DmZcHGXLPGEkCqHZhh_0kSqyYVsdE,6764
4
4
  ebm4subjects/duckdb_client.py,sha256=8lDIpj2o2VTEtjHC_vTYrI5-RNXZnWMft45bS6z9B_k,13031
5
5
  ebm4subjects/ebm_logging.py,sha256=xkbqeVhSCNuhMwkx2yoIX8_D3z9DcsauZEmHhR1gaS0,5962
6
- ebm4subjects/ebm_model.py,sha256=fNO8Hy4EAY8gwu3ysSxQC_Kp2pUecbdr-6Ue7pFoAkk,29119
7
- ebm4subjects/embedding_generator.py,sha256=jC4rz4W50tKndxYezD7Kaoqysl8zhN-TbWirxA_WIQc,2354
6
+ ebm4subjects/ebm_model.py,sha256=PVFtljF3oZK8u0lA6df82lsTdAD8H1Y9CHvWq1jWF2M,29125
7
+ ebm4subjects/embedding_generator.py,sha256=DZhZxkjcsy_4NA62_2V-4UPbIUkg5qMPat_cIgsoIAA,2609
8
8
  ebm4subjects/prepare_data.py,sha256=vQ-BdXkIP3iZJdPXol0WDlY8cRFMHkjzzL7oC7EbouE,3084
9
- ebm4subjects-0.5.0.dist-info/METADATA,sha256=8JA0eDC-5-M7FW9nq3hqJDIpSGes4KoKd9uUWgafq8k,8274
10
- ebm4subjects-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
- ebm4subjects-0.5.0.dist-info/licenses/LICENSE,sha256=RpvAZSjULHvoTR_esTlucJ08-zdQydnoqQLbqOh9Ub8,13826
12
- ebm4subjects-0.5.0.dist-info/RECORD,,
9
+ ebm4subjects-0.5.1.dist-info/METADATA,sha256=QkOBvOAI49_AUipc3yAH6RVG9OVUs_8jO64Bjfy561U,8274
10
+ ebm4subjects-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ ebm4subjects-0.5.1.dist-info/licenses/LICENSE,sha256=RpvAZSjULHvoTR_esTlucJ08-zdQydnoqQLbqOh9Ub8,13826
12
+ ebm4subjects-0.5.1.dist-info/RECORD,,