embed-train 2.0.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {embed_train-2.0.0 → embed_train-3.0.0}/CHANGELOG.md +12 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/PKG-INFO +1 -1
- {embed_train-2.0.0 → embed_train-3.0.0}/pyproject.toml +1 -1
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/settings.py +3 -3
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/trainers/hf/__init__.py +4 -3
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/trainers/torch/__init__.py +2 -2
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/fixtures/components.py +3 -3
- {embed_train-2.0.0 → embed_train-3.0.0}/uv.lock +1 -1
- {embed_train-2.0.0 → embed_train-3.0.0}/.gitignore +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/.gitlab-ci.yml +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/.pre-commit-config.yaml +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/.releaserc.json +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/AGENTS.md +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/Makefile +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/README.md +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/codecov.yml +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/commitlint.config.cjs +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/constants.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/exceptions.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/models/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/push_to_hf/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/py.typed +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/dataset/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/dataset/collate.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/dataset/sampling/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/dataset/sampling/samplers.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/dataset/torch_datasets.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/trainers/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/train/trainers/torch/loss.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/src/embed_train/utils.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/conftest.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/fixtures/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/fixtures/data.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/integration/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/integration/test_dataset/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/integration/test_dataset/test_to_hf_dataset.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/integration/test_train_runner/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/integration/test_train_runner/test_train_runner_flow.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_abstract_guards.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_embed_train.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_exceptions.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_models.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_push_to_hf.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_settings.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/__init__.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_collate.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_dataset.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_hf_trainer.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_loss.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_runner.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_samplers.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_sampling.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_torch_datasets.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_torch_trainer.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_train/test_trainers.py +0 -0
- {embed_train-2.0.0 → embed_train-3.0.0}/tests/unit/test_utils.py +0 -0
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
# [3.0.0](https://gitlab.com/efysent/agentic-core/embed-train/compare/v2.0.0...v3.0.0) (2026-05-09)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
* feat!: rename trainer configuration fields for consistency ([c3538f1](https://gitlab.com/efysent/agentic-core/embed-train/commit/c3538f1eadeeb708fa50962e648da000e471b1e9))
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
### BREAKING CHANGES
|
|
8
|
+
|
|
9
|
+
* Renamed training configuration fields across PyTorch and Hugging Face trainers:
|
|
10
|
+
- config.lr -> config.learning_rate
|
|
11
|
+
- config.batch_size -> config.per_device_train_batch_size (HF trainer only)
|
|
12
|
+
|
|
1
13
|
# [2.0.0](https://gitlab.com/efysent/agentic-core/embed-train/compare/v1.0.0...v2.0.0) (2026-05-08)
|
|
2
14
|
|
|
3
15
|
|
|
@@ -82,7 +82,7 @@ class PyTorchTrainerSettings[
|
|
|
82
82
|
num_epochs: int
|
|
83
83
|
batch_size: int
|
|
84
84
|
shuffle: bool
|
|
85
|
-
|
|
85
|
+
learning_rate: float
|
|
86
86
|
device: str
|
|
87
87
|
save_every: int
|
|
88
88
|
drop_last: bool
|
|
@@ -172,9 +172,9 @@ class SentenceTransformersTrainerSettings[
|
|
|
172
172
|
tokenizer: TokenizerSettings
|
|
173
173
|
loss: SentenceTransformerLoss
|
|
174
174
|
pooling: Literal["cls", "mean_tokens", "max_tokens"]
|
|
175
|
-
|
|
175
|
+
per_device_train_batch_size: int
|
|
176
176
|
num_epochs: int
|
|
177
|
-
|
|
177
|
+
learning_rate: float
|
|
178
178
|
warmup_ratio: float
|
|
179
179
|
eval_steps: int
|
|
180
180
|
save_steps: int
|
|
@@ -40,9 +40,10 @@ class SentenceTransformersTrainer[TCHFTrainRunner: "SentenceTransformersTrainerS
|
|
|
40
40
|
warmup_steps = self._get_warmup_steps(train_dataset)
|
|
41
41
|
args = SentenceTransformerTrainingArguments(
|
|
42
42
|
output_dir=str(self.config.data_dir / f"checkpoints/{self._run_name()}"),
|
|
43
|
-
per_device_train_batch_size=self.config.
|
|
43
|
+
per_device_train_batch_size=self.config.per_device_train_batch_size,
|
|
44
|
+
per_device_eval_batch_size=self.config.per_device_train_batch_size,
|
|
44
45
|
num_train_epochs=self.config.num_epochs,
|
|
45
|
-
learning_rate=self.config.
|
|
46
|
+
learning_rate=self.config.learning_rate,
|
|
46
47
|
warmup_steps=warmup_steps,
|
|
47
48
|
eval_strategy="steps",
|
|
48
49
|
save_strategy="steps",
|
|
@@ -65,7 +66,7 @@ class SentenceTransformersTrainer[TCHFTrainRunner: "SentenceTransformersTrainerS
|
|
|
65
66
|
|
|
66
67
|
def _get_warmup_steps(self, dataset: Dataset) -> float:
|
|
67
68
|
train_size = len(dataset)
|
|
68
|
-
steps_per_epoch = train_size // self.config.
|
|
69
|
+
steps_per_epoch = train_size // self.config.per_device_train_batch_size
|
|
69
70
|
total_steps = steps_per_epoch * self.config.num_epochs
|
|
70
71
|
warmup_steps = int(total_steps * self.config.warmup_ratio)
|
|
71
72
|
return warmup_steps
|
|
@@ -146,7 +146,7 @@ class PyTorchTrainer[TCPyTorchTrainer: "PyTorchTrainerSettings[Any, Any, Any, An
|
|
|
146
146
|
def _run_name(self) -> str:
|
|
147
147
|
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
148
148
|
class_name = self.config.module_path.split(".")[-1].lower()
|
|
149
|
-
return f"{class_name}_bs{self.config.batch_size}_lr{self.config.
|
|
149
|
+
return f"{class_name}_bs{self.config.batch_size}_lr{self.config.learning_rate}_{ts}"
|
|
150
150
|
|
|
151
151
|
def _load_model(self) -> Model[Any]:
|
|
152
152
|
return cast(
|
|
@@ -157,7 +157,7 @@ class PyTorchTrainer[TCPyTorchTrainer: "PyTorchTrainerSettings[Any, Any, Any, An
|
|
|
157
157
|
def _load_optimizer(self) -> Optimizer:
|
|
158
158
|
return torch.optim.AdamW(
|
|
159
159
|
self.model.parameters(),
|
|
160
|
-
lr=self.config.
|
|
160
|
+
lr=self.config.learning_rate,
|
|
161
161
|
)
|
|
162
162
|
|
|
163
163
|
def _load_loss(self) -> Loss[Any]:
|
|
@@ -330,7 +330,7 @@ def build_pytorch_trainer_settings(tmp_path: Path, **overrides: Any) -> DummyPyT
|
|
|
330
330
|
"num_epochs": 1,
|
|
331
331
|
"batch_size": 2,
|
|
332
332
|
"shuffle": True,
|
|
333
|
-
"
|
|
333
|
+
"learning_rate": 0.01,
|
|
334
334
|
"device": "cpu",
|
|
335
335
|
"save_every": 1,
|
|
336
336
|
"drop_last": False,
|
|
@@ -432,9 +432,9 @@ def build_sentence_transformers_trainer_settings(**overrides: Any) -> SentenceTr
|
|
|
432
432
|
tokenizer=build_tokenizer_settings(),
|
|
433
433
|
loss=build_sentence_transformer_loss(),
|
|
434
434
|
pooling="mean_tokens",
|
|
435
|
-
|
|
435
|
+
per_device_train_batch_size=2,
|
|
436
436
|
num_epochs=1,
|
|
437
|
-
|
|
437
|
+
learning_rate=0.01,
|
|
438
438
|
warmup_ratio=0.1,
|
|
439
439
|
eval_steps=2,
|
|
440
440
|
save_steps=4,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{embed_train-2.0.0 → embed_train-3.0.0}/tests/integration/test_dataset/test_to_hf_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|