ebm4subjects 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ebm4subjects/ebm_model.py +1 -1
- ebm4subjects/embedding_generator.py +12 -5
- {ebm4subjects-0.5.0.dist-info → ebm4subjects-0.5.1.dist-info}/METADATA +1 -1
- {ebm4subjects-0.5.0.dist-info → ebm4subjects-0.5.1.dist-info}/RECORD +6 -6
- {ebm4subjects-0.5.0.dist-info → ebm4subjects-0.5.1.dist-info}/WHEEL +0 -0
- {ebm4subjects-0.5.0.dist-info → ebm4subjects-0.5.1.dist-info}/licenses/LICENSE +0 -0
ebm4subjects/ebm_model.py
CHANGED
|
@@ -19,7 +19,7 @@ from ebm4subjects.embedding_generator import EmbeddingGenerator
|
|
|
19
19
|
class EbmModel:
|
|
20
20
|
def __init__(
|
|
21
21
|
self,
|
|
22
|
-
embedding_model_name: str,
|
|
22
|
+
embedding_model_name: str | Any,
|
|
23
23
|
embedding_dimensions: int | str,
|
|
24
24
|
chunk_tokenizer: str | Any,
|
|
25
25
|
max_chunk_count: int | str,
|
|
@@ -9,7 +9,8 @@ class EmbeddingGenerator:
|
|
|
9
9
|
A class for generating embeddings using a given SentenceTransformer model.
|
|
10
10
|
|
|
11
11
|
Args:
|
|
12
|
-
model_name (str): The name of the SentenceTransformer
|
|
12
|
+
model_name (str, SentenceTransformer): The name of the SentenceTransformer
|
|
13
|
+
model or an SentenceTransformer model to use.
|
|
13
14
|
embedding_dimensions (int): The dimensionality of the generated embeddings.
|
|
14
15
|
**kwargs: Additional keyword arguments to pass to the model.
|
|
15
16
|
|
|
@@ -19,7 +20,9 @@ class EmbeddingGenerator:
|
|
|
19
20
|
model (SentenceTransformer): The SentenceTransformer model instance.
|
|
20
21
|
"""
|
|
21
22
|
|
|
22
|
-
def __init__(
|
|
23
|
+
def __init__(
|
|
24
|
+
self, model_name: str | SentenceTransformer, embedding_dimensions: int, **kwargs
|
|
25
|
+
) -> None:
|
|
23
26
|
"""
|
|
24
27
|
Initializes the EmbeddingGenerator.
|
|
25
28
|
|
|
@@ -31,9 +34,13 @@ class EmbeddingGenerator:
|
|
|
31
34
|
|
|
32
35
|
# Create a SentenceTransformer model instance with the given
|
|
33
36
|
# model name and embedding dimensions
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
# or set model to the given SentenceTransformer
|
|
38
|
+
if type(model_name) is str:
|
|
39
|
+
self.model = SentenceTransformer(
|
|
40
|
+
model_name, truncate_dim=embedding_dimensions, **kwargs
|
|
41
|
+
)
|
|
42
|
+
else:
|
|
43
|
+
self.model = model_name
|
|
37
44
|
|
|
38
45
|
# Disabel parallelism for tokenizer
|
|
39
46
|
# Needed because process might be already parallelized
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ebm4subjects
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Embedding Based Matching for Automated Subject Indexing
|
|
5
5
|
Author: Deutsche Nationalbibliothek
|
|
6
6
|
Maintainer-email: Clemens Rietdorf <c.rietdorf@dnb.de>, Maximilian Kähler <m.kaehler@dnb.de>
|
|
@@ -3,10 +3,10 @@ ebm4subjects/analyzer.py,sha256=lqX7AF8WsvwIavgtnmoVQ0i3wzBJJSeH47EiEwoLKGg,1664
|
|
|
3
3
|
ebm4subjects/chunker.py,sha256=HcEFJtKWHFYZL8DmZcHGXLPGEkCqHZhh_0kSqyYVsdE,6764
|
|
4
4
|
ebm4subjects/duckdb_client.py,sha256=8lDIpj2o2VTEtjHC_vTYrI5-RNXZnWMft45bS6z9B_k,13031
|
|
5
5
|
ebm4subjects/ebm_logging.py,sha256=xkbqeVhSCNuhMwkx2yoIX8_D3z9DcsauZEmHhR1gaS0,5962
|
|
6
|
-
ebm4subjects/ebm_model.py,sha256=
|
|
7
|
-
ebm4subjects/embedding_generator.py,sha256=
|
|
6
|
+
ebm4subjects/ebm_model.py,sha256=PVFtljF3oZK8u0lA6df82lsTdAD8H1Y9CHvWq1jWF2M,29125
|
|
7
|
+
ebm4subjects/embedding_generator.py,sha256=DZhZxkjcsy_4NA62_2V-4UPbIUkg5qMPat_cIgsoIAA,2609
|
|
8
8
|
ebm4subjects/prepare_data.py,sha256=vQ-BdXkIP3iZJdPXol0WDlY8cRFMHkjzzL7oC7EbouE,3084
|
|
9
|
-
ebm4subjects-0.5.
|
|
10
|
-
ebm4subjects-0.5.
|
|
11
|
-
ebm4subjects-0.5.
|
|
12
|
-
ebm4subjects-0.5.
|
|
9
|
+
ebm4subjects-0.5.1.dist-info/METADATA,sha256=QkOBvOAI49_AUipc3yAH6RVG9OVUs_8jO64Bjfy561U,8274
|
|
10
|
+
ebm4subjects-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
11
|
+
ebm4subjects-0.5.1.dist-info/licenses/LICENSE,sha256=RpvAZSjULHvoTR_esTlucJ08-zdQydnoqQLbqOh9Ub8,13826
|
|
12
|
+
ebm4subjects-0.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|