@lloyal-labs/lloyal.node 1.0.5-alpha → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -1,6 +1,3 @@
1
- const path = require('path');
2
- const binary = require('node-gyp-build')(path.join(__dirname, '..'));
3
-
4
1
  /**
5
2
  * liblloyal-node - Thin N-API wrapper over liblloyal
6
3
  *
@@ -9,7 +6,7 @@ const binary = require('node-gyp-build')(path.join(__dirname, '..'));
9
6
  *
10
7
  * @example
11
8
  * ```js
12
- * const { createContext, withLogits } = require('lloyal.node');
9
+ * const { createContext, withLogits } = require('@lloyal-labs/lloyal.node');
13
10
  *
14
11
  * const ctx = await createContext({
15
12
  * modelPath: './model.gguf',
@@ -24,20 +21,133 @@ const binary = require('node-gyp-build')(path.join(__dirname, '..'));
24
21
  * await ctx.decode(tokens, 0);
25
22
  *
26
23
  * // Safe logits access (Runtime Borrow Checker pattern)
27
- * const entropy = await withLogits(ctx, (logits) => {
24
+ * const entropy = withLogits(ctx, (logits) => {
28
25
  * // logits is valid here - use synchronously only!
29
- * return computeEntropy(logits);
26
+ * return myComputeEntropy(logits);
30
27
  * });
31
28
  *
32
29
  * // Or with native reference implementations (for testing)
33
- * const nativeEntropy = ctx.computeEntropy();
30
+ * const entropy = ctx.modelEntropy();
34
31
  * const token = ctx.greedySample();
35
32
  *
36
33
  * // Cleanup
37
34
  * ctx.dispose();
38
35
  * ```
36
+ *
37
+ * @example GPU variant selection
38
+ * ```js
39
+ * // Option 1: Environment variable (affects all contexts)
40
+ * // Set LLOYAL_GPU=cuda before running
41
+ *
42
+ * // Option 2: Per-context selection (recommended)
43
+ * const ctx = await createContext(
44
+ * { modelPath: './model.gguf', nCtx: 4096 },
45
+ * { gpuVariant: 'cuda' } // Falls back to CPU if CUDA unavailable
46
+ * );
47
+ * ```
39
48
  */
40
49
 
50
+ /**
51
+ * Platform package naming: @lloyal-labs/lloyal.node-{platform}-{arch}[-{gpu}]
52
+ * @param {string} [variant] - GPU variant: 'cuda', 'vulkan', or undefined for CPU
53
+ * @returns {string} Platform package name
54
+ */
55
+ const getPlatformPackageName = (variant) => {
56
+ const platform = process.platform;
57
+ const arch = process.arch;
58
+ // cpu/metal/default = no suffix, cuda/vulkan = suffix
59
+ const noSuffix = !variant || variant === 'default' || variant === 'cpu' || variant === 'metal';
60
+ const suffix = noSuffix ? '' : `-${variant}`;
61
+ return `@lloyal-labs/lloyal.node-${platform}-${arch}${suffix}`;
62
+ };
63
+
64
+ /**
65
+ * Try to load a platform package, return null on failure.
66
+ * Failures include: package not installed, missing GPU runtime libs (dlopen fails),
67
+ * or module doesn't export expected interface.
68
+ * @param {string} packageName - Package name to load
69
+ * @param {boolean} [verbose=false] - Log failure reasons
70
+ * @returns {object|null} The native binary module or null
71
+ */
72
+ const tryLoadPackage = (packageName, verbose = false) => {
73
+ try {
74
+ const mod = require(packageName);
75
+ // Validate it's actually a native module with expected exports
76
+ if (mod && typeof mod.createContext === 'function') {
77
+ return mod;
78
+ }
79
+ if (verbose) {
80
+ console.warn(`[lloyal.node] ${packageName} loaded but missing createContext export`);
81
+ }
82
+ return null;
83
+ } catch (e) {
84
+ if (verbose) {
85
+ console.warn(`[lloyal.node] Failed to load ${packageName}: ${e.message}`);
86
+ }
87
+ return null;
88
+ }
89
+ };
90
+
91
+ /**
92
+ * Load the native binary with automatic fallback.
93
+ *
94
+ * Loading priority:
95
+ * 1. Requested GPU variant (if specified)
96
+ * 2. Default platform package (CPU)
97
+ * 3. Local build (development: build/Release/lloyal.node)
98
+ *
99
+ * @param {string} [variant] - GPU variant: 'cuda', 'vulkan', or undefined for CPU
100
+ * @returns {object} The native binary module
101
+ * @throws {Error} If no binary can be loaded
102
+ */
103
+ const loadBinary = (variant) => {
104
+ // Use env var if no variant specified
105
+ variant = variant ?? process.env.LLOYAL_GPU;
106
+ // LLOYAL_NO_FALLBACK=1 disables fallback (for CI testing specific packages)
107
+ const noFallback = process.env.LLOYAL_NO_FALLBACK === '1';
108
+
109
+ // 1. Try requested variant (if specified)
110
+ if (variant && variant !== 'default') {
111
+ const pkgName = getPlatformPackageName(variant);
112
+ const binary = tryLoadPackage(pkgName, true); // verbose=true to see errors
113
+ if (binary) return binary;
114
+
115
+ if (noFallback) {
116
+ throw new Error(
117
+ `[lloyal.node] GPU variant "${variant}" failed to load. ` +
118
+ `Package: ${pkgName}. Check that runtime libraries are available.`
119
+ );
120
+ }
121
+ console.warn(`[lloyal.node] GPU variant "${variant}" unavailable, falling back to CPU`);
122
+ }
123
+
124
+ // 2. Try default platform package (CPU)
125
+ const defaultPkg = getPlatformPackageName();
126
+ const binary = tryLoadPackage(defaultPkg, true); // verbose=true
127
+ if (binary) return binary;
128
+
129
+ // 3. Try local build (development)
130
+ try {
131
+ return require('../build/Release/lloyal.node');
132
+ } catch (e) {
133
+ // ignore
134
+ }
135
+
136
+ throw new Error(
137
+ `No lloyal.node binary found for ${process.platform}-${process.arch}. ` +
138
+ `Tried: ${variant ? getPlatformPackageName(variant) + ', ' : ''}${defaultPkg}`
139
+ );
140
+ };
141
+
142
+ // Default binary (loaded lazily on first use)
143
+ let _binary = null;
144
+ const getBinary = () => {
145
+ if (!_binary) {
146
+ _binary = loadBinary(process.env.LLOYAL_GPU);
147
+ }
148
+ return _binary;
149
+ };
150
+
41
151
  /**
42
152
  * Safe logits access with Runtime Borrow Checker pattern
43
153
  *
@@ -93,29 +203,65 @@ function withLogits(ctx, fn) {
93
203
  return result;
94
204
  }
95
205
 
206
+ const { Branch } = require('./Branch');
207
+
96
208
  module.exports = {
209
+ /**
210
+ * Branch class for parallel generation
211
+ * @see Branch.create()
212
+ */
213
+ Branch,
97
214
  /**
98
215
  * Create a new inference context
99
216
  *
100
- * @param {Object} options
101
- * @param {string} options.modelPath - Path to .gguf model file
102
- * @param {number} [options.nCtx=2048] - Context size
103
- * @param {number} [options.nThreads=4] - Number of threads
104
- * @returns {Promise<SessionContext>}
217
+ * @param {ContextOptions} options - Context configuration
218
+ * @param {LoadOptions} [loadOptions] - Binary loading options
219
+ * @returns {Promise<SessionContext>} The inference context
220
+ *
221
+ * @example
222
+ * ```js
223
+ * // Basic usage
224
+ * const ctx = await createContext({
225
+ * modelPath: './model.gguf',
226
+ * nCtx: 2048,
227
+ * nThreads: 4
228
+ * });
229
+ *
230
+ * // With GPU variant
231
+ * const ctx = await createContext(
232
+ * { modelPath: './model.gguf' },
233
+ * { gpuVariant: 'cuda' }
234
+ * );
235
+ * ```
105
236
  */
106
- createContext: async (options) => {
107
- // For now, createContext is synchronous in C++
108
- // Wrap in Promise for future async model loading
237
+ createContext: async (options, loadOptions) => {
238
+ const variant = loadOptions?.gpuVariant || process.env.LLOYAL_GPU;
239
+ const binary = variant ? loadBinary(variant) : getBinary();
109
240
  return binary.createContext(options);
110
241
  },
111
242
 
112
243
  /**
113
- * Safe logits access with Runtime Borrow Checker pattern
244
+ * Load binary for a specific GPU variant.
245
+ * Useful for checking variant availability before creating context.
246
+ *
247
+ * @param {string} [variant] - 'cuda', 'vulkan', or undefined for CPU
248
+ * @returns {object} Native binary module
249
+ * @throws {Error} If no binary available for platform
114
250
  *
115
- * Ensures logits are only accessed synchronously within the callback.
251
+ * @example
252
+ * ```js
253
+ * // Load default (CPU) binary
254
+ * const binary = loadBinary();
255
+ *
256
+ * // Load CUDA binary (falls back to CPU if unavailable)
257
+ * const binary = loadBinary('cuda');
258
+ * ```
259
+ */
260
+ loadBinary,
261
+
262
+ /**
263
+ * Safe logits access with Runtime Borrow Checker pattern.
116
264
  * See function JSDoc for full documentation.
117
265
  */
118
266
  withLogits,
119
-
120
- SessionContext: binary.SessionContext
121
267
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lloyal-labs/lloyal.node",
3
- "version": "1.0.5-alpha",
3
+ "version": "1.0.7",
4
4
  "description": "Node.js client for liblloyal+llama.cpp",
5
5
  "main": "lib/index.js",
6
6
  "types": "lib/index.d.ts",
@@ -10,7 +10,6 @@
10
10
  },
11
11
  "scripts": {
12
12
  "download-models": "bash scripts/download-test-models.sh",
13
- "install": "node scripts/install.js",
14
13
  "build": "node scripts/build.js",
15
14
  "build:debug": "cmake-js compile --debug",
16
15
  "rebuild": "cmake-js rebuild",
@@ -20,6 +19,7 @@
20
19
  "test": "npm run test:api && npm run test:e2e",
21
20
  "test:api": "node test/api.js",
22
21
  "test:e2e": "node test/e2e.js",
22
+ "test:examples": "node test/examples.js",
23
23
  "example": "node examples/chat/chat.mjs"
24
24
  },
25
25
  "repository": {
@@ -43,28 +43,29 @@
43
43
  },
44
44
  "homepage": "https://github.com/lloyal-ai/lloyal.node#readme",
45
45
  "dependencies": {
46
- "node-addon-api": "^8.5.0",
47
- "node-gyp-build": "^4.8.4"
46
+ "@lloyal-labs/tsampler": "^0.2.0",
47
+ "node-addon-api": "^8.5.0"
48
48
  },
49
49
  "devDependencies": {
50
50
  "cmake-js": "^7.4.0",
51
51
  "glob": "^11.0.0",
52
- "typedoc": "^0.27.5"
52
+ "typedoc": "^0.28.16",
53
+ "typedoc-rhineai-theme": "^1.2.0"
53
54
  },
54
55
  "optionalDependencies": {
55
- "@lloyal-labs/lloyal.node-darwin-arm64": "1.0.5-alpha",
56
- "@lloyal-labs/lloyal.node-darwin-x64": "1.0.5-alpha",
57
- "@lloyal-labs/lloyal.node-linux-arm64": "1.0.5-alpha",
58
- "@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0.5-alpha",
59
- "@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0.5-alpha",
60
- "@lloyal-labs/lloyal.node-linux-x64": "1.0.5-alpha",
61
- "@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0.5-alpha",
62
- "@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0.5-alpha",
63
- "@lloyal-labs/lloyal.node-win32-arm64": "1.0.5-alpha",
64
- "@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0.5-alpha",
65
- "@lloyal-labs/lloyal.node-win32-x64": "1.0.5-alpha",
66
- "@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0.5-alpha",
67
- "@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0.5-alpha"
56
+ "@lloyal-labs/lloyal.node-darwin-arm64": "1.0.7",
57
+ "@lloyal-labs/lloyal.node-darwin-x64": "1.0.7",
58
+ "@lloyal-labs/lloyal.node-linux-arm64": "1.0.7",
59
+ "@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0.7",
60
+ "@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0.7",
61
+ "@lloyal-labs/lloyal.node-linux-x64": "1.0.7",
62
+ "@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0.7",
63
+ "@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0.7",
64
+ "@lloyal-labs/lloyal.node-win32-arm64": "1.0.7",
65
+ "@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0.7",
66
+ "@lloyal-labs/lloyal.node-win32-x64": "1.0.7",
67
+ "@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0.7",
68
+ "@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0.7"
68
69
  },
69
70
  "engines": {
70
71
  "node": ">=22.0.0"
@@ -108,52 +108,31 @@ if (osName === 'darwin') {
108
108
  // Create package.json from template
109
109
  console.log('\nGenerating package.json...');
110
110
  const mainPackageJson = require(path.join(ROOT, 'package.json'));
111
- const templatePath = path.join(ROOT, 'packages', 'template', 'package.json');
112
-
113
- let pkgJson;
114
- if (fs.existsSync(templatePath)) {
115
- pkgJson = require(templatePath);
116
- } else {
117
- // Fallback template if file doesn't exist yet
118
- pkgJson = {
119
- name: '@lloyal-labs/lloyal.node-PLATFORM',
120
- version: '0.0.0',
121
- description: 'Lloyal native binary for PLATFORM',
122
- main: 'index.js',
123
- files: ['bin/', 'index.js'],
124
- repository: {
125
- type: 'git',
126
- url: 'git+https://github.com/lloyal-ai/lloyal.node.git'
127
- },
128
- license: 'Apache-2.0'
129
- };
130
- }
131
111
 
132
- // Update with actual values
133
- pkgJson.name = `@lloyal-labs/lloyal.node-${packageName}`;
134
- pkgJson.version = mainPackageJson.version;
135
- pkgJson.description = `Lloyal native binary for ${packageName}`;
136
- pkgJson.os = [osName];
137
- pkgJson.cpu = [arch];
112
+ // Platform package exports the binary directly (no index.js wrapper)
113
+ // This enables runtime dynamic require with automatic fallback:
114
+ // require('@lloyal-labs/lloyal.node-linux-x64') bin/lloyal.node
115
+ const pkgJson = {
116
+ name: `@lloyal-labs/lloyal.node-${packageName}`,
117
+ version: mainPackageJson.version,
118
+ description: `Lloyal native binary for ${packageName}`,
119
+ main: 'bin/lloyal.node',
120
+ os: [osName],
121
+ cpu: [arch],
122
+ files: ['bin/'],
123
+ repository: {
124
+ type: 'git',
125
+ url: 'git+https://github.com/lloyal-ai/lloyal.node.git'
126
+ },
127
+ author: 'lloyal.ai',
128
+ license: 'Apache-2.0'
129
+ };
138
130
 
139
131
  fs.writeFileSync(
140
132
  path.join(PKG_DIR, 'package.json'),
141
133
  JSON.stringify(pkgJson, null, 2) + '\n'
142
134
  );
143
- console.log(` ✓ Created package.json`);
144
-
145
- // Create index.js
146
- console.log('\nGenerating index.js...');
147
- const indexJs = `// Platform-specific binary package for ${packageName}
148
- // This file resolves to the native binary in bin/
149
-
150
- const path = require('path');
151
-
152
- module.exports = path.join(__dirname, 'bin', 'lloyal.node');
153
- `;
154
-
155
- fs.writeFileSync(path.join(PKG_DIR, 'index.js'), indexJs);
156
- console.log(` ✓ Created index.js`);
135
+ console.log(` ✓ Created package.json (main: bin/lloyal.node)`);
157
136
 
158
137
  // Summary
159
138
  console.log(`\n✅ Platform package created successfully!`);
@@ -26,5 +26,15 @@ else
26
26
  echo " ✓ nomic-embed-text already exists"
27
27
  fi
28
28
 
29
+ # slim-summary-tool (1.7GB) - Summary sidecar for dynamic sinks
30
+ if [ ! -f "slim-summarize.gguf" ]; then
31
+ echo " → Downloading slim-summarize.gguf..."
32
+ curl -L -o "slim-summarize.gguf" \
33
+ "https://huggingface.co/llmware/slim-summary-tool/resolve/main/slim-summary-tool.gguf"
34
+ echo " ✓ Downloaded slim-summarize"
35
+ else
36
+ echo " ✓ slim-summarize already exists"
37
+ fi
38
+
29
39
  echo ""
30
40
  echo "✅ All test models ready"
@@ -1,138 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Smart installer for lloyal.node
4
- *
5
- * Strategy:
6
- * 1. Check if prebuilt binary exists for this platform
7
- * 2. If yes, copy to build/Release/ and exit
8
- * 3. If no, show helpful error with build-from-source instructions
9
- *
10
- * Respects LLOYAL_GPU environment variable for GPU variant selection
11
- */
12
-
13
- const fs = require('fs');
14
- const path = require('path');
15
-
16
- const PLATFORM = process.platform;
17
- const ARCH = process.arch;
18
- const ROOT = __dirname + '/..';
19
- const BUILD_DIR = path.join(ROOT, 'build', 'Release');
20
-
21
- // Logging helpers
22
- const log = (msg) => console.log(`[lloyal.node] ${msg}`);
23
- const error = (msg) => console.error(`[lloyal.node] ❌ ${msg}`);
24
-
25
- /**
26
- * Check if a platform package is installed and has binaries
27
- */
28
- function findPrebuilt(packageName) {
29
- try {
30
- const pkgPath = require.resolve(packageName);
31
- const binPath = require(packageName); // index.js exports path to binary
32
-
33
- if (fs.existsSync(binPath)) {
34
- const binDir = path.dirname(binPath);
35
- return binDir;
36
- }
37
- } catch (e) {
38
- // Package not installed or doesn't export binary path
39
- }
40
- return null;
41
- }
42
-
43
- /**
44
- * Copy prebuilt binaries to build/Release/
45
- */
46
- function installPrebuilt(binDir, packageName) {
47
- log(`Found prebuilt binaries in ${packageName}`);
48
-
49
- try {
50
- // Create build/Release directory
51
- fs.mkdirSync(BUILD_DIR, { recursive: true });
52
-
53
- // Copy all files from bin directory
54
- const files = fs.readdirSync(binDir);
55
- files.forEach(file => {
56
- const src = path.join(binDir, file);
57
- const dest = path.join(BUILD_DIR, file);
58
-
59
- if (fs.statSync(src).isFile()) {
60
- fs.copyFileSync(src, dest);
61
- log(` ✓ Copied ${file}`);
62
- }
63
- });
64
-
65
- log(`✅ Installed prebuilt binaries successfully`);
66
- process.exit(0);
67
- } catch (e) {
68
- error(`Failed to install prebuilt: ${e.message}`);
69
- // Don't exit - fall through to source build
70
- }
71
- }
72
-
73
- /**
74
- * Main installation logic
75
- */
76
- function main() {
77
- log(`Platform: ${PLATFORM}-${ARCH}`);
78
-
79
- // 1. Check for user-specified GPU variant via environment variable
80
- if (process.env.LLOYAL_GPU) {
81
- const gpu = process.env.LLOYAL_GPU.toLowerCase();
82
- const packageName = `@lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}-${gpu}`;
83
-
84
- log(`LLOYAL_GPU=${gpu}, looking for ${packageName}...`);
85
- const binDir = findPrebuilt(packageName);
86
-
87
- if (binDir) {
88
- installPrebuilt(binDir, packageName);
89
- return; // exit(0) called in installPrebuilt
90
- } else {
91
- log(` ⚠️ Package ${packageName} not found`);
92
- }
93
- }
94
-
95
- // 2. Check for GPU variants in priority order
96
- const gpuVariants = ['cuda', 'vulkan'];
97
- for (const gpu of gpuVariants) {
98
- const packageName = `@lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}-${gpu}`;
99
- const binDir = findPrebuilt(packageName);
100
-
101
- if (binDir) {
102
- log(`Auto-detected GPU variant: ${gpu}`);
103
- installPrebuilt(binDir, packageName);
104
- return; // exit(0) called in installPrebuilt
105
- }
106
- }
107
-
108
- // 3. Check for default platform package (CPU or Metal on macOS)
109
- const defaultPackage = `@lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}`;
110
- const binDir = findPrebuilt(defaultPackage);
111
-
112
- if (binDir) {
113
- installPrebuilt(binDir, defaultPackage);
114
- return; // exit(0) called in installPrebuilt
115
- }
116
-
117
- // 4. No prebuilt found - error with helpful message
118
- log('');
119
- error('No prebuilt binary found for your platform');
120
- log('');
121
- log(` Platform: ${PLATFORM}-${ARCH}`);
122
- log('');
123
- log(' Options:');
124
- log(' 1. Install a platform-specific package:');
125
- log(` npm install @lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}`);
126
- log('');
127
- log(' 2. Build from source (requires C++20, CMake 3.18+):');
128
- log(' git clone --recursive https://github.com/lloyal-ai/lloyal.node.git');
129
- log(' cd lloyal.node && npm run build');
130
- log('');
131
- log(' See: https://github.com/lloyal-ai/lloyal.node#building');
132
- log('');
133
-
134
- process.exit(1);
135
- }
136
-
137
- // Run installer
138
- main();