nexaai 1.0.10__cp310-cp310-macosx_14_0_universal2.whl → 1.0.11rc1__cp310-cp310-macosx_14_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +1 -1
- nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
- nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +5 -6
- nexaai/mlx_backend/embedding/generate.py +16 -219
- nexaai/mlx_backend/embedding/interface.py +41 -346
- nexaai/mlx_backend/embedding/main.py +35 -126
- nexaai/utils/model_manager.py +87 -103
- nexaai/utils/progress_tracker.py +8 -12
- {nexaai-1.0.10.dist-info → nexaai-1.0.11rc1.dist-info}/METADATA +1 -2
- {nexaai-1.0.10.dist-info → nexaai-1.0.11rc1.dist-info}/RECORD +23 -26
- nexaai/utils/manifest_utils.py +0 -280
- nexaai/utils/model_types.py +0 -47
- nexaai/utils/quantization_utils.py +0 -239
- {nexaai-1.0.10.dist-info → nexaai-1.0.11rc1.dist-info}/WHEEL +0 -0
- {nexaai-1.0.10.dist-info → nexaai-1.0.11rc1.dist-info}/top_level.txt +0 -0
|
@@ -20,16 +20,11 @@ import mlx.core as mx
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
from typing import Any, List, Optional, Sequence
|
|
23
|
-
from abc import ABC, abstractmethod
|
|
24
23
|
|
|
25
24
|
# Import necessary modules
|
|
26
25
|
from tokenizers import Tokenizer
|
|
27
26
|
|
|
28
27
|
# Import from ml.py for API alignment
|
|
29
|
-
import sys
|
|
30
|
-
from pathlib import Path as PathLib
|
|
31
|
-
sys.path.insert(0, str(PathLib(__file__).parent.parent))
|
|
32
|
-
|
|
33
28
|
from ml import (
|
|
34
29
|
Embedder as BaseEmbedder,
|
|
35
30
|
EmbeddingConfig,
|
|
@@ -39,24 +34,13 @@ from ml import (
|
|
|
39
34
|
# Import profiling module
|
|
40
35
|
from profiling import ProfilingMixin, StopReason
|
|
41
36
|
|
|
42
|
-
# Import the model implementation
|
|
43
|
-
|
|
44
|
-
from .modeling.nexa_jina_v2 import Model, ModelArgs
|
|
45
|
-
except ImportError:
|
|
46
|
-
# Fallback for when module is run directly
|
|
47
|
-
from modeling.nexa_jina_v2 import Model, ModelArgs
|
|
48
|
-
|
|
49
|
-
# Import mlx_embeddings for general embedding support
|
|
50
|
-
try:
|
|
51
|
-
import mlx_embeddings
|
|
52
|
-
MLX_EMBEDDINGS_AVAILABLE = True
|
|
53
|
-
except ImportError:
|
|
54
|
-
MLX_EMBEDDINGS_AVAILABLE = False
|
|
37
|
+
# Import the model implementation
|
|
38
|
+
from .modeling.nexa_jina_v2 import Model, ModelArgs
|
|
55
39
|
|
|
56
40
|
|
|
57
|
-
class
|
|
41
|
+
class Embedder(BaseEmbedder, ProfilingMixin):
|
|
58
42
|
"""
|
|
59
|
-
|
|
43
|
+
Embedder interface for MLX embedding models.
|
|
60
44
|
API aligned with ml.py Embedder abstract base class.
|
|
61
45
|
"""
|
|
62
46
|
|
|
@@ -80,7 +64,7 @@ class BaseMLXEmbedder(BaseEmbedder, ProfilingMixin, ABC):
|
|
|
80
64
|
|
|
81
65
|
self.model_path = model_path
|
|
82
66
|
self.tokenizer_path = tokenizer_path
|
|
83
|
-
self.device = device if device is not None else "cpu"
|
|
67
|
+
self.device = device if device is not None else "cpu" # TODO: This device field is never used
|
|
84
68
|
|
|
85
69
|
# Initialize model and tokenizer as None
|
|
86
70
|
self.model = None
|
|
@@ -94,69 +78,6 @@ class BaseMLXEmbedder(BaseEmbedder, ProfilingMixin, ABC):
|
|
|
94
78
|
self.config = None
|
|
95
79
|
self.reset_profiling()
|
|
96
80
|
|
|
97
|
-
@abstractmethod
|
|
98
|
-
def load_model(self, model_path: PathType) -> bool:
|
|
99
|
-
"""Load model from path."""
|
|
100
|
-
pass
|
|
101
|
-
|
|
102
|
-
def close(self) -> None:
|
|
103
|
-
"""Close the model."""
|
|
104
|
-
self.destroy()
|
|
105
|
-
|
|
106
|
-
@abstractmethod
|
|
107
|
-
def embed(
|
|
108
|
-
self,
|
|
109
|
-
texts: Sequence[str],
|
|
110
|
-
config: Optional[EmbeddingConfig] = None,
|
|
111
|
-
clear_cache: bool = True,
|
|
112
|
-
) -> List[List[float]]:
|
|
113
|
-
"""Generate embeddings for texts."""
|
|
114
|
-
pass
|
|
115
|
-
|
|
116
|
-
@abstractmethod
|
|
117
|
-
def embedding_dim(self) -> int:
|
|
118
|
-
"""Get embedding dimension."""
|
|
119
|
-
pass
|
|
120
|
-
|
|
121
|
-
def set_lora(self, lora_id: int) -> None:
|
|
122
|
-
"""Set active LoRA adapter. (Disabled for embedding models)"""
|
|
123
|
-
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
124
|
-
|
|
125
|
-
def add_lora(self, lora_path: PathType) -> int:
|
|
126
|
-
"""Add LoRA adapter and return its ID. (Disabled for embedding models)"""
|
|
127
|
-
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
128
|
-
|
|
129
|
-
def remove_lora(self, lora_id: int) -> None:
|
|
130
|
-
"""Remove LoRA adapter. (Disabled for embedding models)"""
|
|
131
|
-
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
132
|
-
|
|
133
|
-
def list_loras(self) -> List[int]:
|
|
134
|
-
"""List available LoRA adapters. (Disabled for embedding models)"""
|
|
135
|
-
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
136
|
-
|
|
137
|
-
def _normalize_embedding(self, embedding: List[float], method: str) -> List[float]:
|
|
138
|
-
"""Normalize embedding using specified method."""
|
|
139
|
-
if method == "none":
|
|
140
|
-
return embedding
|
|
141
|
-
|
|
142
|
-
embedding_array = np.array(embedding)
|
|
143
|
-
|
|
144
|
-
if method == "l2":
|
|
145
|
-
norm = np.linalg.norm(embedding_array)
|
|
146
|
-
if norm > 0:
|
|
147
|
-
embedding_array = embedding_array / norm
|
|
148
|
-
elif method == "mean":
|
|
149
|
-
mean_val = np.mean(embedding_array)
|
|
150
|
-
embedding_array = embedding_array - mean_val
|
|
151
|
-
|
|
152
|
-
return embedding_array.tolist()
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
class JinaV2Embedder(BaseMLXEmbedder):
|
|
156
|
-
"""
|
|
157
|
-
Embedder implementation specifically for Jina V2 models.
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
81
|
def load_model(self, model_path: PathType) -> bool:
|
|
161
82
|
"""Load model from path."""
|
|
162
83
|
try:
|
|
@@ -176,6 +97,10 @@ class JinaV2Embedder(BaseMLXEmbedder):
|
|
|
176
97
|
print(f"Failed to load model: {e}")
|
|
177
98
|
return False
|
|
178
99
|
|
|
100
|
+
def close(self) -> None:
|
|
101
|
+
"""Close the model."""
|
|
102
|
+
self.destroy()
|
|
103
|
+
|
|
179
104
|
def embed(
|
|
180
105
|
self,
|
|
181
106
|
texts: Sequence[str],
|
|
@@ -233,6 +158,22 @@ class JinaV2Embedder(BaseMLXEmbedder):
|
|
|
233
158
|
return 768 # Default dimension for Jina v2
|
|
234
159
|
return self.config.hidden_size
|
|
235
160
|
|
|
161
|
+
def set_lora(self, lora_id: int) -> None:
|
|
162
|
+
"""Set active LoRA adapter. (Disabled for embedding models)"""
|
|
163
|
+
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
164
|
+
|
|
165
|
+
def add_lora(self, lora_path: PathType) -> int:
|
|
166
|
+
"""Add LoRA adapter and return its ID. (Disabled for embedding models)"""
|
|
167
|
+
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
168
|
+
|
|
169
|
+
def remove_lora(self, lora_id: int) -> None:
|
|
170
|
+
"""Remove LoRA adapter. (Disabled for embedding models)"""
|
|
171
|
+
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
172
|
+
|
|
173
|
+
def list_loras(self) -> List[int]:
|
|
174
|
+
"""List available LoRA adapters. (Disabled for embedding models)"""
|
|
175
|
+
raise NotImplementedError("LoRA is not supported for embedding models")
|
|
176
|
+
|
|
236
177
|
def _load_jina_model(self, model_dir: str) -> Model:
|
|
237
178
|
"""Initialize and load the Jina V2 model with FP16 weights."""
|
|
238
179
|
|
|
@@ -340,267 +281,22 @@ class JinaV2Embedder(BaseMLXEmbedder):
|
|
|
340
281
|
|
|
341
282
|
return embedding_list
|
|
342
283
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
"""
|
|
348
|
-
|
|
349
|
-
def load_model(self, model_path: PathType) -> bool:
|
|
350
|
-
"""Load model from path using mlx_embeddings."""
|
|
351
|
-
if not MLX_EMBEDDINGS_AVAILABLE:
|
|
352
|
-
print("Warning: mlx_embeddings not available. Please install it to use general embedding models.")
|
|
353
|
-
raise ImportError("mlx_embeddings package is not available. Please install it first.")
|
|
354
|
-
|
|
355
|
-
try:
|
|
356
|
-
# Use the provided model_path or fall back to instance path
|
|
357
|
-
if model_path:
|
|
358
|
-
if os.path.isfile(model_path):
|
|
359
|
-
model_path = os.path.dirname(model_path)
|
|
360
|
-
self.model_path = model_path
|
|
361
|
-
|
|
362
|
-
# Load model and tokenizer using mlx_embeddings
|
|
363
|
-
self.model, self.tokenizer = mlx_embeddings.load(self.model_path)
|
|
364
|
-
|
|
365
|
-
# Load config to get dimensions
|
|
366
|
-
config_path = os.path.join(self.model_path, "config.json")
|
|
367
|
-
if os.path.exists(config_path):
|
|
368
|
-
with open(config_path, "r") as f:
|
|
369
|
-
self.config = json.load(f)
|
|
370
|
-
|
|
371
|
-
return True
|
|
372
|
-
except Exception as e:
|
|
373
|
-
print(f"Failed to load model: {e}")
|
|
374
|
-
return False
|
|
375
|
-
|
|
376
|
-
def embed(
|
|
377
|
-
self,
|
|
378
|
-
texts: Sequence[str],
|
|
379
|
-
config: Optional[EmbeddingConfig] = None,
|
|
380
|
-
clear_cache: bool = True,
|
|
381
|
-
) -> List[List[float]]:
|
|
382
|
-
"""Generate embeddings for texts using mlx_embeddings."""
|
|
383
|
-
if self.model is None or self.tokenizer is None:
|
|
384
|
-
raise RuntimeError("Model not loaded. Call load_model() first.")
|
|
385
|
-
|
|
386
|
-
if config is None:
|
|
387
|
-
config = EmbeddingConfig()
|
|
388
|
-
|
|
389
|
-
# Start profiling
|
|
390
|
-
self._start_profiling()
|
|
391
|
-
|
|
392
|
-
try:
|
|
393
|
-
# Calculate total tokens for profiling
|
|
394
|
-
if hasattr(self.tokenizer, 'encode'):
|
|
395
|
-
total_tokens = sum(len(self.tokenizer.encode(text)) for text in texts)
|
|
396
|
-
else:
|
|
397
|
-
# For tokenizers that don't have simple encode method
|
|
398
|
-
total_tokens = len(texts) * 50 # Rough estimate
|
|
399
|
-
|
|
400
|
-
self._update_prompt_tokens(total_tokens)
|
|
401
|
-
|
|
402
|
-
# End prompt processing, start decode
|
|
403
|
-
self._prompt_end()
|
|
404
|
-
self._decode_start()
|
|
405
|
-
|
|
406
|
-
# Check if this is a Gemma3TextModel
|
|
407
|
-
# WORKAROUND: Gemma3TextModel has a bug where it expects 'inputs' as positional arg
|
|
408
|
-
# but mlx_embeddings.generate passes 'input_ids' as keyword arg
|
|
409
|
-
# See: https://github.com/ml-explore/mlx-examples/issues/... (bug report pending)
|
|
410
|
-
is_gemma = False
|
|
411
|
-
if self.config and "architectures" in self.config:
|
|
412
|
-
architectures = self.config.get("architectures", [])
|
|
413
|
-
is_gemma = "Gemma3TextModel" in architectures
|
|
414
|
-
|
|
415
|
-
if is_gemma:
|
|
416
|
-
# HARDCODED WORKAROUND for Gemma3TextModel bug
|
|
417
|
-
# Use direct tokenization and model call instead of mlx_embeddings.generate
|
|
418
|
-
max_length = config.max_length if hasattr(config, 'max_length') else 512
|
|
419
|
-
|
|
420
|
-
# Tokenize using batch_encode_plus
|
|
421
|
-
encoded_input = self.tokenizer.batch_encode_plus(
|
|
422
|
-
list(texts),
|
|
423
|
-
padding=True,
|
|
424
|
-
truncation=True,
|
|
425
|
-
return_tensors='mlx',
|
|
426
|
-
max_length=max_length
|
|
427
|
-
)
|
|
428
|
-
|
|
429
|
-
# Get input tensors
|
|
430
|
-
input_ids = encoded_input['input_ids']
|
|
431
|
-
attention_mask = encoded_input.get('attention_mask', None)
|
|
432
|
-
|
|
433
|
-
# Call model with positional input_ids and keyword attention_mask
|
|
434
|
-
# This matches Gemma3TextModel's expected signature
|
|
435
|
-
output = self.model(input_ids, attention_mask=attention_mask)
|
|
436
|
-
|
|
437
|
-
# Extract embeddings
|
|
438
|
-
embeddings_tensor = output.text_embeds
|
|
439
|
-
else:
|
|
440
|
-
# Normal path for non-Gemma models
|
|
441
|
-
# Generate embeddings using mlx_embeddings standard approach
|
|
442
|
-
output = mlx_embeddings.generate(
|
|
443
|
-
self.model,
|
|
444
|
-
self.tokenizer,
|
|
445
|
-
texts=list(texts),
|
|
446
|
-
max_length=config.max_length if hasattr(config, 'max_length') else 512,
|
|
447
|
-
padding=True,
|
|
448
|
-
truncation=True
|
|
449
|
-
)
|
|
450
|
-
|
|
451
|
-
# Extract embeddings
|
|
452
|
-
embeddings_tensor = output.text_embeds
|
|
453
|
-
|
|
454
|
-
# Convert to list format
|
|
455
|
-
embeddings = []
|
|
456
|
-
for i in range(embeddings_tensor.shape[0]):
|
|
457
|
-
embedding = embeddings_tensor[i].tolist()
|
|
458
|
-
|
|
459
|
-
# Apply normalization if requested
|
|
460
|
-
if config.normalize:
|
|
461
|
-
embedding = self._normalize_embedding(embedding, config.normalize_method)
|
|
462
|
-
|
|
463
|
-
embeddings.append(embedding)
|
|
464
|
-
|
|
465
|
-
if clear_cache:
|
|
466
|
-
mx.clear_cache()
|
|
467
|
-
|
|
468
|
-
# End timing and finalize profiling data
|
|
469
|
-
self._update_generated_tokens(0) # No generation in embedding
|
|
470
|
-
self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
|
|
471
|
-
self._decode_end()
|
|
472
|
-
self._end_profiling()
|
|
473
|
-
|
|
474
|
-
return embeddings
|
|
475
|
-
|
|
476
|
-
except Exception as e:
|
|
477
|
-
self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
|
|
478
|
-
self._decode_end()
|
|
479
|
-
self._end_profiling()
|
|
480
|
-
raise RuntimeError(f"Error generating embeddings: {str(e)}")
|
|
481
|
-
|
|
482
|
-
def embedding_dim(self) -> int:
|
|
483
|
-
"""Get embedding dimension."""
|
|
484
|
-
if self.config is None:
|
|
485
|
-
return 768 # Default dimension
|
|
486
|
-
|
|
487
|
-
# Try different config keys that might contain the dimension
|
|
488
|
-
if "hidden_size" in self.config:
|
|
489
|
-
return self.config["hidden_size"]
|
|
490
|
-
elif "d_model" in self.config:
|
|
491
|
-
return self.config["d_model"]
|
|
492
|
-
elif "dim" in self.config:
|
|
493
|
-
return self.config["dim"]
|
|
494
|
-
else:
|
|
495
|
-
return 768 # Fallback default
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
class MLXEmbedder(BaseMLXEmbedder):
|
|
499
|
-
"""
|
|
500
|
-
Concrete embedder class that routes to the appropriate implementation.
|
|
501
|
-
This class can be instantiated directly (for C++ compatibility) and will
|
|
502
|
-
automatically delegate to JinaV2Embedder or MlxEmbeddingEmbedder based on model type.
|
|
503
|
-
"""
|
|
504
|
-
|
|
505
|
-
def __init__(
|
|
506
|
-
self,
|
|
507
|
-
model_path: PathType,
|
|
508
|
-
tokenizer_path: PathType,
|
|
509
|
-
device: Optional[str] = None,
|
|
510
|
-
) -> None:
|
|
511
|
-
"""Initialize the Embedder model."""
|
|
512
|
-
super().__init__(model_path, tokenizer_path, device)
|
|
513
|
-
self._impl = None # Will hold the actual implementation
|
|
514
|
-
|
|
515
|
-
def _get_implementation(self) -> BaseMLXEmbedder:
|
|
516
|
-
"""Get or create the appropriate implementation based on model type."""
|
|
517
|
-
if self._impl is None:
|
|
518
|
-
# Detect model type and create appropriate implementation
|
|
519
|
-
model_type = _detect_model_type(self.model_path)
|
|
520
|
-
|
|
521
|
-
if model_type == "jina_v2":
|
|
522
|
-
self._impl = JinaV2Embedder(self.model_path, self.tokenizer_path, self.device)
|
|
523
|
-
else:
|
|
524
|
-
self._impl = MlxEmbeddingEmbedder(self.model_path, self.tokenizer_path, self.device)
|
|
525
|
-
|
|
526
|
-
# Copy over any existing state
|
|
527
|
-
if self.model is not None:
|
|
528
|
-
self._impl.model = self.model
|
|
529
|
-
if self.tokenizer is not None:
|
|
530
|
-
self._impl.tokenizer = self.tokenizer
|
|
531
|
-
if self.config is not None:
|
|
532
|
-
self._impl.config = self.config
|
|
533
|
-
|
|
534
|
-
return self._impl
|
|
535
|
-
|
|
536
|
-
def load_model(self, model_path: PathType) -> bool:
|
|
537
|
-
"""Load model from path."""
|
|
538
|
-
# Get the appropriate implementation and delegate
|
|
539
|
-
impl = self._get_implementation()
|
|
540
|
-
result = impl.load_model(model_path)
|
|
541
|
-
|
|
542
|
-
# Sync state back
|
|
543
|
-
self.model = impl.model
|
|
544
|
-
self.tokenizer = impl.tokenizer
|
|
545
|
-
self.config = impl.config
|
|
284
|
+
def _normalize_embedding(self, embedding: List[float], method: str) -> List[float]:
|
|
285
|
+
"""Normalize embedding using specified method."""
|
|
286
|
+
if method == "none":
|
|
287
|
+
return embedding
|
|
546
288
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
def embed(
|
|
550
|
-
self,
|
|
551
|
-
texts: Sequence[str],
|
|
552
|
-
config: Optional[EmbeddingConfig] = None,
|
|
553
|
-
clear_cache: bool = True,
|
|
554
|
-
) -> List[List[float]]:
|
|
555
|
-
"""Generate embeddings for texts."""
|
|
556
|
-
# Get the appropriate implementation and delegate
|
|
557
|
-
impl = self._get_implementation()
|
|
558
|
-
return impl.embed(texts, config, clear_cache)
|
|
559
|
-
|
|
560
|
-
def embedding_dim(self) -> int:
|
|
561
|
-
"""Get embedding dimension."""
|
|
562
|
-
# Get the appropriate implementation and delegate
|
|
563
|
-
impl = self._get_implementation()
|
|
564
|
-
return impl.embedding_dim()
|
|
565
|
-
|
|
566
|
-
def destroy(self) -> None:
|
|
567
|
-
"""Destroy the model and free resources."""
|
|
568
|
-
super().destroy()
|
|
569
|
-
if self._impl is not None:
|
|
570
|
-
self._impl.destroy()
|
|
571
|
-
self._impl = None
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
# Backward compatibility alias
|
|
575
|
-
Embedder = MLXEmbedder
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
def _detect_model_type(model_path: PathType) -> str:
|
|
579
|
-
"""Detect the model type from config.json."""
|
|
580
|
-
if os.path.isfile(model_path):
|
|
581
|
-
model_path = os.path.dirname(model_path)
|
|
582
|
-
|
|
583
|
-
config_path = os.path.join(model_path, "config.json")
|
|
584
|
-
|
|
585
|
-
if not os.path.exists(config_path):
|
|
586
|
-
# If no config.json, assume it's a generic model
|
|
587
|
-
return "generic"
|
|
588
|
-
|
|
589
|
-
try:
|
|
590
|
-
with open(config_path, "r") as f:
|
|
591
|
-
config = json.load(f)
|
|
289
|
+
embedding_array = np.array(embedding)
|
|
592
290
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
291
|
+
if method == "l2":
|
|
292
|
+
norm = np.linalg.norm(embedding_array)
|
|
293
|
+
if norm > 0:
|
|
294
|
+
embedding_array = embedding_array / norm
|
|
295
|
+
elif method == "mean":
|
|
296
|
+
mean_val = np.mean(embedding_array)
|
|
297
|
+
embedding_array = embedding_array - mean_val
|
|
597
298
|
|
|
598
|
-
|
|
599
|
-
return "generic"
|
|
600
|
-
|
|
601
|
-
except Exception as e:
|
|
602
|
-
print(f"Warning: Could not parse config.json: {e}")
|
|
603
|
-
return "generic"
|
|
299
|
+
return embedding_array.tolist()
|
|
604
300
|
|
|
605
301
|
|
|
606
302
|
# Factory function for creating embedder instances
|
|
@@ -608,10 +304,9 @@ def create_embedder(
|
|
|
608
304
|
model_path: PathType,
|
|
609
305
|
tokenizer_path: Optional[PathType] = None,
|
|
610
306
|
device: Optional[str] = None,
|
|
611
|
-
) ->
|
|
612
|
-
"""Create and return an
|
|
307
|
+
) -> Embedder:
|
|
308
|
+
"""Create and return an Embedder instance."""
|
|
613
309
|
if tokenizer_path is None:
|
|
614
310
|
tokenizer_path = model_path
|
|
615
311
|
|
|
616
|
-
|
|
617
|
-
return MLXEmbedder(model_path, tokenizer_path, device)
|
|
312
|
+
return Embedder(model_path, tokenizer_path, device)
|
|
@@ -12,162 +12,71 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
import sys
|
|
17
|
-
import numpy as np
|
|
18
|
-
from pathlib import Path
|
|
15
|
+
from .interface import create_embedder, EmbeddingConfig
|
|
19
16
|
|
|
20
|
-
# Add parent path for imports
|
|
21
|
-
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
22
17
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
from .interface import EmbeddingConfig
|
|
26
|
-
from huggingface_hub import snapshot_download
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def download_model_if_needed(model_id, local_dir):
|
|
30
|
-
"""Download model from Hugging Face Hub if not present locally."""
|
|
31
|
-
if not os.path.exists(os.path.join(local_dir, "config.json")):
|
|
32
|
-
print(f"📥 Model not found locally. Downloading {model_id}...")
|
|
33
|
-
os.makedirs(local_dir, exist_ok=True)
|
|
34
|
-
try:
|
|
35
|
-
snapshot_download(
|
|
36
|
-
repo_id=model_id,
|
|
37
|
-
local_dir=local_dir,
|
|
38
|
-
resume_download=True,
|
|
39
|
-
local_dir_use_symlinks=False
|
|
40
|
-
)
|
|
41
|
-
print("✅ Model download completed!")
|
|
42
|
-
except Exception as e:
|
|
43
|
-
print(f"❌ Failed to download model: {e}")
|
|
44
|
-
raise
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def test_embedding_interface(model_path, is_local=False):
|
|
48
|
-
"""Test embedding model functionality using the interface."""
|
|
49
|
-
|
|
50
|
-
print("=" * 70)
|
|
51
|
-
print("TESTING EMBEDDING MODEL VIA INTERFACE")
|
|
52
|
-
print("=" * 70)
|
|
53
|
-
|
|
54
|
-
# Handle model path - download if it's a HF model ID
|
|
55
|
-
if not is_local and "/" in model_path:
|
|
56
|
-
# It's a HuggingFace model ID
|
|
57
|
-
local_dir = f"./modelfiles/{model_path.replace('/', '_')}"
|
|
58
|
-
download_model_if_needed(model_path, local_dir)
|
|
59
|
-
model_path = local_dir
|
|
60
|
-
|
|
61
|
-
# Create embedder using factory function (will auto-detect model type)
|
|
62
|
-
print(f"\n🔍 Creating embedder for: {model_path}")
|
|
18
|
+
def test_embedding(model_path):
|
|
19
|
+
"""Test embedding model functionality."""
|
|
63
20
|
embedder = create_embedder(model_path=model_path)
|
|
64
|
-
print(f"✅ Created embedder type: {type(embedder).__name__}")
|
|
65
21
|
|
|
66
22
|
# Load the model
|
|
67
|
-
print("
|
|
23
|
+
print("Loading embedding model...")
|
|
68
24
|
success = embedder.load_model(model_path)
|
|
69
25
|
|
|
70
26
|
if not success:
|
|
71
|
-
print("
|
|
27
|
+
print("Failed to load model!")
|
|
72
28
|
return
|
|
73
29
|
|
|
74
30
|
print("✅ Model loaded successfully!")
|
|
75
|
-
print(f"
|
|
31
|
+
print(f"Embedding dimension: {embedder.embedding_dim()}")
|
|
76
32
|
|
|
77
33
|
# Test texts
|
|
78
34
|
test_texts = [
|
|
79
35
|
"Hello, how are you?",
|
|
80
36
|
"What is machine learning?",
|
|
81
37
|
"The weather is nice today.",
|
|
82
|
-
"Python is a programming language."
|
|
83
|
-
"Artificial intelligence is changing the world."
|
|
38
|
+
"Python is a programming language."
|
|
84
39
|
]
|
|
85
40
|
|
|
86
|
-
# Configure embedding
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
for config_idx, config in enumerate(configs):
|
|
93
|
-
print(f"\n{'='*50}")
|
|
94
|
-
print(f"TEST {config_idx + 1}: Config - Batch: {config.batch_size}, "
|
|
95
|
-
f"Normalize: {config.normalize}, Method: {config.normalize_method}")
|
|
96
|
-
print('='*50)
|
|
97
|
-
|
|
98
|
-
# Generate embeddings
|
|
99
|
-
embeddings = embedder.embed(test_texts, config)
|
|
100
|
-
|
|
101
|
-
# Display results
|
|
102
|
-
print(f"\n📊 Generated {len(embeddings)} embeddings")
|
|
103
|
-
|
|
104
|
-
for i, (text, embedding) in enumerate(zip(test_texts[:3], embeddings[:3])):
|
|
105
|
-
print(f"\n Text {i+1}: '{text}'")
|
|
106
|
-
print(f" Dimension: {len(embedding)}")
|
|
107
|
-
print(f" First 5 values: {[f'{v:.4f}' for v in embedding[:5]]}")
|
|
108
|
-
|
|
109
|
-
# Calculate magnitude
|
|
110
|
-
magnitude = np.linalg.norm(embedding)
|
|
111
|
-
print(f" Magnitude: {magnitude:.6f}")
|
|
41
|
+
# Configure embedding
|
|
42
|
+
config = EmbeddingConfig(
|
|
43
|
+
batch_size=2,
|
|
44
|
+
normalize=True,
|
|
45
|
+
normalize_method="l2"
|
|
46
|
+
)
|
|
112
47
|
|
|
113
|
-
|
|
114
|
-
print("\n" + "="*50)
|
|
115
|
-
print("SIMILARITY MATRIX (L2 Normalized)")
|
|
116
|
-
print("="*50)
|
|
48
|
+
print(f"\nGenerating embeddings for {len(test_texts)} texts...")
|
|
117
49
|
|
|
118
|
-
|
|
50
|
+
# Generate embeddings
|
|
119
51
|
embeddings = embedder.embed(test_texts, config)
|
|
120
52
|
|
|
121
|
-
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
print("\nTexts:")
|
|
126
|
-
for i, text in enumerate(test_texts):
|
|
127
|
-
print(f" [{i}] {text[:30]}...")
|
|
128
|
-
|
|
129
|
-
print("\nSimilarity Matrix:")
|
|
130
|
-
print(" ", end="")
|
|
131
|
-
for i in range(len(test_texts)):
|
|
132
|
-
print(f" [{i}] ", end="")
|
|
133
|
-
print()
|
|
53
|
+
# Display results
|
|
54
|
+
print("\nEmbedding Results:")
|
|
55
|
+
print("=" * 50)
|
|
134
56
|
|
|
135
|
-
for i in
|
|
136
|
-
print(f"
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
similarities = []
|
|
144
|
-
for i in range(len(test_texts)):
|
|
145
|
-
for j in range(i+1, len(test_texts)):
|
|
146
|
-
similarities.append((similarity_matrix[i, j], i, j))
|
|
57
|
+
for i, (text, embedding) in enumerate(zip(test_texts, embeddings)):
|
|
58
|
+
print(f"\nText {i+1}: '{text}'")
|
|
59
|
+
print(f"Embedding shape: {len(embedding)}")
|
|
60
|
+
print(f"First 5 values: {embedding[:5]}")
|
|
61
|
+
|
|
62
|
+
# Calculate magnitude for normalized embeddings
|
|
63
|
+
magnitude = sum(x*x for x in embedding) ** 0.5
|
|
64
|
+
print(f"Magnitude: {magnitude:.6f}")
|
|
147
65
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
66
|
+
# Test similarity between first two embeddings
|
|
67
|
+
if len(embeddings) >= 2:
|
|
68
|
+
emb1, emb2 = embeddings[0], embeddings[1]
|
|
69
|
+
similarity = sum(a*b for a, b in zip(emb1, emb2))
|
|
70
|
+
print(f"\nCosine similarity between text 1 and 2: {similarity:.6f}")
|
|
151
71
|
|
|
152
72
|
# Cleanup
|
|
153
73
|
embedder.close()
|
|
154
|
-
print("\n✅
|
|
74
|
+
print("\n✅ Embedding test completed!")
|
|
155
75
|
|
|
156
76
|
|
|
157
77
|
if __name__ == "__main__":
|
|
158
78
|
import argparse
|
|
159
|
-
parser = argparse.ArgumentParser(
|
|
160
|
-
parser.add_argument(
|
|
161
|
-
"--model_path",
|
|
162
|
-
type=str,
|
|
163
|
-
default="nexaml/jina-v2-fp16-mlx",
|
|
164
|
-
help="Model path (local) or HuggingFace model ID"
|
|
165
|
-
)
|
|
166
|
-
parser.add_argument(
|
|
167
|
-
"--local",
|
|
168
|
-
action="store_true",
|
|
169
|
-
help="Indicate if model_path is a local directory"
|
|
170
|
-
)
|
|
79
|
+
parser = argparse.ArgumentParser()
|
|
80
|
+
parser.add_argument("--model_path", type=str, default="nexaml/jina-v2-fp16-mlx")
|
|
171
81
|
args = parser.parse_args()
|
|
172
|
-
|
|
173
|
-
test_embedding_interface(args.model_path, args.local)
|
|
82
|
+
test_embedding(args.model_path)
|