@nusoft/nuos-build-catalogue 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,16 @@
1
1
  /**
2
2
  * Ollama embedder — local inference, no network egress.
3
3
  *
4
- * Default model: qwen3-embedding:8b (4096 dims, 32k context). Config via
5
- * NUOS_CATALOGUE_OLLAMA_MODEL. Smaller variants (qwen3-embedding:4b,
6
- * qwen3-embedding:0.6b) work the same way; switching variants requires
7
- * a full reindex if the dimension changes.
4
+ * Default model: qwen3-embedding:0.6b (1024 dims). Picked as default
5
+ * because it runs on the broad majority of developer machines without
6
+ * meaningful CPU strain — the prior 8b default produced noticeable load
7
+ * on Apple Silicon during a catalogue reindex, and the build harness
8
+ * ships to projects whose maintainers won't necessarily have an
9
+ * M-series Mac. Higher-fidelity variants (qwen3-embedding:4b at 2560
10
+ * dims, qwen3-embedding:8b at 4096 dims) are available via
11
+ * NUOS_CATALOGUE_OLLAMA_MODEL when the user wants better recall and
12
+ * has the headroom. Switching variants requires a full reindex because
13
+ * dimensions change.
8
14
  *
9
15
  * Why local: keeps the catalogue's content (and any future workload that
10
16
  * uses the same Embedder interface) inside whatever boundary Ollama is
@@ -26,10 +32,10 @@
26
32
  * idle-timeout (the keep_alive: "1m" we sent) cleans up within a
27
33
  * minute.
28
34
  *
29
- * Sizing note — the 8b model at Q4_K_M is ~4.7GB on disk and benefits
30
- * from ~16GB of RAM. Apple Silicon Metal acceleration helps a lot. On
31
- * smaller boxes drop to qwen3-embedding:4b (better accuracy/RAM ratio)
32
- * or qwen3-embedding:0.6b (CPU-only friendly).
35
+ * Sizing note — the new 0.6b default is ~600MB on disk and runs
36
+ * comfortably on any modern laptop, including CPU-only. The 4b variant
37
+ * (~2.5GB) and 8b variant (~4.7GB, benefits from ~16GB RAM + Metal)
38
+ * are upgrades for users who want better recall and have the headroom.
33
39
  */
34
40
  import type { Embedder } from './types.js';
35
41
  export declare class OllamaEmbedder implements Embedder {
@@ -1,10 +1,16 @@
1
1
  /**
2
2
  * Ollama embedder — local inference, no network egress.
3
3
  *
4
- * Default model: qwen3-embedding:8b (4096 dims, 32k context). Config via
5
- * NUOS_CATALOGUE_OLLAMA_MODEL. Smaller variants (qwen3-embedding:4b,
6
- * qwen3-embedding:0.6b) work the same way; switching variants requires
7
- * a full reindex if the dimension changes.
4
+ * Default model: qwen3-embedding:0.6b (1024 dims). Picked as default
5
+ * because it runs on the broad majority of developer machines without
6
+ * meaningful CPU strain — the prior 8b default produced noticeable load
7
+ * on Apple Silicon during a catalogue reindex, and the build harness
8
+ * ships to projects whose maintainers won't necessarily have an
9
+ * M-series Mac. Higher-fidelity variants (qwen3-embedding:4b at 2560
10
+ * dims, qwen3-embedding:8b at 4096 dims) are available via
11
+ * NUOS_CATALOGUE_OLLAMA_MODEL when the user wants better recall and
12
+ * has the headroom. Switching variants requires a full reindex because
13
+ * dimensions change.
8
14
  *
9
15
  * Why local: keeps the catalogue's content (and any future workload that
10
16
  * uses the same Embedder interface) inside whatever boundary Ollama is
@@ -26,12 +32,12 @@
26
32
  * idle-timeout (the keep_alive: "1m" we sent) cleans up within a
27
33
  * minute.
28
34
  *
29
- * Sizing note — the 8b model at Q4_K_M is ~4.7GB on disk and benefits
30
- * from ~16GB of RAM. Apple Silicon Metal acceleration helps a lot. On
31
- * smaller boxes drop to qwen3-embedding:4b (better accuracy/RAM ratio)
32
- * or qwen3-embedding:0.6b (CPU-only friendly).
35
+ * Sizing note — the new 0.6b default is ~600MB on disk and runs
36
+ * comfortably on any modern laptop, including CPU-only. The 4b variant
37
+ * (~2.5GB) and 8b variant (~4.7GB, benefits from ~16GB RAM + Metal)
38
+ * are upgrades for users who want better recall and have the headroom.
33
39
  */
34
- const DEFAULT_MODEL = 'qwen3-embedding:8b';
40
+ const DEFAULT_MODEL = 'qwen3-embedding:0.6b';
35
41
  const DEFAULT_HOST = 'http://localhost:11434';
36
42
  // Qwen3-Embedding produces Matryoshka representations 32–4096 dims.
37
43
  // We use the model default. A future tweak could truncate to e.g. 1024
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nusoft/nuos-build-catalogue",
3
- "version": "0.11.0",
3
+ "version": "0.12.0",
4
4
  "description": "NuOS build-catalogue tooling: semantic search (WU 110) + migration runner that lifts markdown artefacts into JSON-backed workflow records (WU 111, Phase G).",
5
5
  "type": "module",
6
6
  "bin": {