searchsocket 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,9 +6,9 @@ Semantic site search and MCP retrieval for SvelteKit content projects.
6
6
 
7
7
  ## Features
8
8
 
9
- - **Embeddings**: Jina AI `jina-embeddings-v3` with task-specific LoRA adapters (configurable)
9
+ - **Embeddings**: Jina AI `jina-embeddings-v5-text-small` with task-specific LoRA adapters (configurable)
10
10
  - **Vector Backend**: Turso/libSQL with vector search (local file DB for development, remote for production)
11
- - **Rerank**: Optional Jina reranker — same API key, one boolean to enable
11
+ - **Rerank**: Jina `jina-reranker-v3` enabled by default — same API key
12
12
  - **Page Aggregation**: Group results by page with score-weighted chunk decay
13
13
  - **Meta Extraction**: Automatically extracts `<meta name="description">` and `<meta name="keywords">` for improved relevance
14
14
  - **SvelteKit Integrations**:
@@ -163,7 +163,7 @@ pnpm searchsocket search --q "getting started" --top-k 5 --path-prefix /docs
163
163
  "meta": {
164
164
  "timingsMs": { "embed": 120, "vector": 15, "rerank": 0, "total": 135 },
165
165
  "usedRerank": false,
166
- "modelId": "jina-embeddings-v3"
166
+ "modelId": "jina-embeddings-v5-text-small"
167
167
  }
168
168
  }
169
169
  ```
@@ -416,9 +416,9 @@ SearchSocket uses **Jina AI's embedding models** to convert text into semantic v
416
416
 
417
417
  ### Default Model
418
418
 
419
- - **Model**: `jina-embeddings-v3`
419
+ - **Model**: `jina-embeddings-v5-text-small`
420
420
  - **Dimensions**: 1024 (default)
421
- - **Cost**: ~$0.00002 per 1K tokens (generous 10M token free tier)
421
+ - **Cost**: ~$0.00005 per 1K tokens
422
422
  - **Task adapters**: Uses `retrieval.passage` for indexing, `retrieval.query` for search queries (LoRA task-specific adapters for better retrieval quality)
423
423
 
424
424
  ### How It Works
@@ -612,7 +612,7 @@ pnpm searchsocket status
612
612
  # Output:
613
613
  # project: my-site
614
614
  # resolved scope: main
615
- # embedding model: jina-embeddings-v3
615
+ # embedding model: jina-embeddings-v5-text-small
616
616
  # vector backend: turso/libsql (local (.searchsocket/vectors.db))
617
617
  # vector health: ok
618
618
  # last indexed (main): 2025-02-23T10:30:00Z
@@ -865,7 +865,7 @@ export default {
865
865
 
866
866
  embeddings: {
867
867
  provider: "jina",
868
- model: "jina-embeddings-v3",
868
+ model: "jina-embeddings-v5-text-small",
869
869
  apiKey: "jina_...", // direct API key (or use apiKeyEnv)
870
870
  apiKeyEnv: "JINA_API_KEY",
871
871
  batchSize: 64,
@@ -886,7 +886,7 @@ export default {
886
886
  rerank: {
887
887
  enabled: true,
888
888
  topN: 20,
889
- model: "jina-reranker-v2-base-multilingual"
889
+ model: "jina-reranker-v3"
890
890
  },
891
891
 
892
892
  ranking: {