@fugood/llama.node 0.0.1-alpha.1 → 0.0.1-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +27 -1
- package/README.md +4 -4
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/lib/binding.js +36 -4
- package/lib/binding.ts +4 -3
- package/lib/index.js +16 -4
- package/lib/index.ts +7 -4
- package/package.json +4 -3
- package/src/DisposeWorker.cpp +11 -0
- package/src/DisposeWorker.h +14 -0
- package/src/LlamaCompletionWorker.cpp +163 -0
- package/src/LlamaCompletionWorker.h +34 -0
- package/src/LlamaContext.cpp +200 -0
- package/src/LlamaContext.h +21 -0
- package/src/LoadSessionWorker.cpp +24 -0
- package/src/LoadSessionWorker.h +17 -0
- package/src/SaveSessionWorker.cpp +21 -0
- package/src/SaveSessionWorker.h +16 -0
- package/src/addons.cc +9 -0
- package/src/common.hpp +81 -0
- package/src/addons.cpp +0 -506
package/CMakeLists.txt
CHANGED
|
@@ -61,7 +61,21 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
|
|
61
61
|
set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
|
|
62
62
|
add_subdirectory("src/llama.cpp")
|
|
63
63
|
|
|
64
|
-
file(
|
|
64
|
+
file(
|
|
65
|
+
GLOB SOURCE_FILES
|
|
66
|
+
"src/addons.cc"
|
|
67
|
+
"src/common.hpp"
|
|
68
|
+
"src/DisposeWorker.cpp"
|
|
69
|
+
"src/DisposeWorker.h"
|
|
70
|
+
"src/LlamaCompletionWorker.cpp"
|
|
71
|
+
"src/LlamaCompletionWorker.h"
|
|
72
|
+
"src/LlamaContext.cpp"
|
|
73
|
+
"src/LlamaContext.h"
|
|
74
|
+
"src/LoadSessionWorker.cpp"
|
|
75
|
+
"src/LoadSessionWorker.h"
|
|
76
|
+
"src/SaveSessionWorker.cpp"
|
|
77
|
+
"src/SaveSessionWorker.h"
|
|
78
|
+
)
|
|
65
79
|
|
|
66
80
|
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
|
|
67
81
|
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
|
|
@@ -83,3 +97,15 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
|
|
|
83
97
|
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${PROJECT_NAME}> ${PLATFORM_BINARY_DIR}/$<TARGET_FILE_NAME:${PROJECT_NAME}>
|
|
84
98
|
COMMENT "Copying to bin folder"
|
|
85
99
|
)
|
|
100
|
+
|
|
101
|
+
if (LLAMA_METAL)
|
|
102
|
+
# copy ${CMAKE_BINARY_DIR}/bin/default.metallib
|
|
103
|
+
add_custom_target(copy_metallib)
|
|
104
|
+
add_custom_command(
|
|
105
|
+
TARGET copy_metallib
|
|
106
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/default.metallib ${PLATFORM_BINARY_DIR}/default.metallib
|
|
107
|
+
COMMENT "Copying default.metallib to bin folder"
|
|
108
|
+
)
|
|
109
|
+
add_dependencies(copy_metallib ggml-metal)
|
|
110
|
+
add_dependencies(${PROJECT_NAME} copy_metallib)
|
|
111
|
+
endif()
|
package/README.md
CHANGED
|
@@ -7,20 +7,20 @@ Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
9
9
|
```sh
|
|
10
|
-
npm install llama.node
|
|
10
|
+
npm install @fugood/llama.node
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
## Usage
|
|
14
14
|
|
|
15
15
|
```js
|
|
16
|
-
import { loadModel } from 'llama.node'
|
|
16
|
+
import { loadModel } from '@fugood/llama.node'
|
|
17
17
|
|
|
18
18
|
// Initial a Llama context with the model (may take a while)
|
|
19
|
-
const context = loadModel({
|
|
19
|
+
const context = await loadModel({
|
|
20
20
|
model: 'path/to/gguf/model',
|
|
21
21
|
use_mlock: true,
|
|
22
22
|
n_ctx: 2048,
|
|
23
|
-
n_gpu_layers: 1, // > 0: enable
|
|
23
|
+
n_gpu_layers: 1, // > 0: enable GPU
|
|
24
24
|
// embedding: true, // use embedding
|
|
25
25
|
})
|
|
26
26
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
package/lib/binding.js
CHANGED
|
@@ -1,13 +1,45 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
2
34
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
35
|
exports.loadModule = void 0;
|
|
4
|
-
const loadModule = (variant) => {
|
|
36
|
+
const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
5
37
|
try {
|
|
6
38
|
if (variant) {
|
|
7
|
-
return
|
|
39
|
+
return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
8
40
|
}
|
|
9
41
|
}
|
|
10
42
|
catch (_a) { } // ignore errors and try the common path
|
|
11
|
-
return
|
|
12
|
-
};
|
|
43
|
+
return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
|
|
44
|
+
});
|
|
13
45
|
exports.loadModule = loadModule;
|
package/lib/binding.ts
CHANGED
|
@@ -41,17 +41,18 @@ export interface LlamaContext {
|
|
|
41
41
|
stopCompletion(): void
|
|
42
42
|
saveSession(path: string): Promise<void>
|
|
43
43
|
loadSession(path: string): Promise<void>
|
|
44
|
+
release(): Promise<void>
|
|
44
45
|
}
|
|
45
46
|
|
|
46
47
|
export interface Module {
|
|
47
48
|
LlamaContext: LlamaContext
|
|
48
49
|
}
|
|
49
50
|
|
|
50
|
-
export const loadModule = (variant?: string): Module => {
|
|
51
|
+
export const loadModule = async (variant?: string): Promise<Module> => {
|
|
51
52
|
try {
|
|
52
53
|
if (variant) {
|
|
53
|
-
return
|
|
54
|
+
return await import(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`) as Module
|
|
54
55
|
}
|
|
55
56
|
} catch {} // ignore errors and try the common path
|
|
56
|
-
return
|
|
57
|
+
return await import(`../bin/${process.platform}/${process.arch}/llama-node.node`) as Module
|
|
57
58
|
}
|
package/lib/index.js
CHANGED
|
@@ -13,12 +13,24 @@ var __createBinding = (this && this.__createBinding) || (Object.create ? (functi
|
|
|
13
13
|
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
17
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
18
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
19
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
20
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
21
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
22
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
23
|
+
});
|
|
24
|
+
};
|
|
16
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
26
|
exports.loadModel = void 0;
|
|
18
27
|
const binding_1 = require("./binding");
|
|
19
28
|
__exportStar(require("./binding"), exports);
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
29
|
+
const mods = {};
|
|
30
|
+
const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
31
|
+
var _a, _b;
|
|
32
|
+
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
33
|
+
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
34
|
+
return new mods[variant].LlamaContext(options);
|
|
35
|
+
});
|
|
24
36
|
exports.loadModel = loadModel;
|
package/lib/index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { loadModule, LlamaModelOptions } from './binding'
|
|
2
|
-
import type { LlamaContext } from './binding'
|
|
2
|
+
import type { Module, LlamaContext } from './binding'
|
|
3
3
|
|
|
4
4
|
export * from './binding'
|
|
5
5
|
|
|
@@ -7,7 +7,10 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
|
|
|
7
7
|
lib_variant?: string
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
const mods: { [key: string]: Module } = {}
|
|
11
|
+
|
|
12
|
+
export const loadModel = async (options: LlamaModelOptionsExtended): Promise<LlamaContext> => {
|
|
13
|
+
const variant = options.lib_variant ?? 'default'
|
|
14
|
+
mods[variant] ??= await loadModule(options.lib_variant)
|
|
15
|
+
return new mods[variant].LlamaContext(options)
|
|
13
16
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.0.1-alpha.
|
|
4
|
+
"version": "0.0.1-alpha.3",
|
|
5
5
|
"description": "Llama.cpp for Node.js",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
},
|
|
31
31
|
"homepage": "https://github.com/mybigday/llama.node#readme",
|
|
32
32
|
"publishConfig": {
|
|
33
|
-
"registry": "https://registry.npmjs.org"
|
|
33
|
+
"registry": "https://registry.npmjs.org"
|
|
34
34
|
},
|
|
35
35
|
"binary": {
|
|
36
36
|
"napi_versions": [
|
|
@@ -57,7 +57,8 @@
|
|
|
57
57
|
"husky": "^9.0.11",
|
|
58
58
|
"jest": "^29.7.0",
|
|
59
59
|
"rimraf": "^5.0.5",
|
|
60
|
-
"typescript": "^5.4.5"
|
|
60
|
+
"typescript": "^5.4.5",
|
|
61
|
+
"wait-for-expect": "^3.0.2"
|
|
61
62
|
},
|
|
62
63
|
"dependencies": {
|
|
63
64
|
"node-addon-api": "^8.0.0"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#include "DisposeWorker.h"
|
|
2
|
+
|
|
3
|
+
DisposeWorker::DisposeWorker(const Napi::CallbackInfo &info,
|
|
4
|
+
LlamaSessionPtr sess)
|
|
5
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), sess_(std::move(sess)) {}
|
|
6
|
+
|
|
7
|
+
void DisposeWorker::Execute() { sess_->dispose(); }
|
|
8
|
+
|
|
9
|
+
void DisposeWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
10
|
+
|
|
11
|
+
void DisposeWorker::OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
|
|
3
|
+
class DisposeWorker : public Napi::AsyncWorker, public Napi::Promise::Deferred {
|
|
4
|
+
public:
|
|
5
|
+
DisposeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr sess);
|
|
6
|
+
|
|
7
|
+
protected:
|
|
8
|
+
void Execute();
|
|
9
|
+
void OnOK();
|
|
10
|
+
void OnError(const Napi::Error &err);
|
|
11
|
+
|
|
12
|
+
private:
|
|
13
|
+
LlamaSessionPtr sess_;
|
|
14
|
+
};
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
#include "LlamaCompletionWorker.h"
|
|
2
|
+
#include "LlamaContext.h"
|
|
3
|
+
|
|
4
|
+
size_t common_part(const std::vector<llama_token> &a,
|
|
5
|
+
const std::vector<llama_token> &b) {
|
|
6
|
+
size_t i = 0;
|
|
7
|
+
while (i < a.size() && i < b.size() && a[i] == b[i]) {
|
|
8
|
+
i++;
|
|
9
|
+
}
|
|
10
|
+
return i;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
size_t findStoppingStrings(const std::string &text,
|
|
14
|
+
const size_t last_token_size,
|
|
15
|
+
const std::vector<std::string> &stop_words) {
|
|
16
|
+
size_t stop_pos = std::string::npos;
|
|
17
|
+
|
|
18
|
+
for (const std::string &word : stop_words) {
|
|
19
|
+
size_t pos;
|
|
20
|
+
|
|
21
|
+
const size_t tmp = word.size() + last_token_size;
|
|
22
|
+
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
|
23
|
+
|
|
24
|
+
pos = text.find(word, from_pos);
|
|
25
|
+
|
|
26
|
+
if (pos != std::string::npos &&
|
|
27
|
+
(stop_pos == std::string::npos || pos < stop_pos)) {
|
|
28
|
+
stop_pos = pos;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return stop_pos;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
36
|
+
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
|
+
Napi::Function callback, gpt_params params,
|
|
38
|
+
std::vector<std::string> stop_words)
|
|
39
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
40
|
+
_params(params), _stop_words(stop_words) {
|
|
41
|
+
if (!callback.IsEmpty()) {
|
|
42
|
+
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
43
|
+
"LlamaCompletionCallback", 0, 1);
|
|
44
|
+
_has_callback = true;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
LlamaCompletionWorker::~LlamaCompletionWorker() {
|
|
49
|
+
if (_has_callback) {
|
|
50
|
+
_tsfn.Release();
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
void LlamaCompletionWorker::Execute() {
|
|
55
|
+
_sess->get_mutex().lock();
|
|
56
|
+
const auto t_main_start = ggml_time_us();
|
|
57
|
+
const size_t n_ctx = _params.n_ctx;
|
|
58
|
+
const auto n_keep = _params.n_keep;
|
|
59
|
+
size_t n_cur = 0;
|
|
60
|
+
size_t n_input = 0;
|
|
61
|
+
const auto model = llama_get_model(_sess->context());
|
|
62
|
+
const bool add_bos = llama_should_add_bos_token(model);
|
|
63
|
+
auto ctx = _sess->context();
|
|
64
|
+
|
|
65
|
+
llama_set_rng_seed(ctx, _params.seed);
|
|
66
|
+
|
|
67
|
+
LlamaCppSampling sampling{llama_sampling_init(_params.sparams),
|
|
68
|
+
llama_sampling_free};
|
|
69
|
+
|
|
70
|
+
std::vector<llama_token> prompt_tokens =
|
|
71
|
+
::llama_tokenize(ctx, _params.prompt, add_bos);
|
|
72
|
+
n_input = prompt_tokens.size();
|
|
73
|
+
if (_sess->tokens_ptr()->size() > 0) {
|
|
74
|
+
n_cur = common_part(*(_sess->tokens_ptr()), prompt_tokens);
|
|
75
|
+
if (n_cur == n_input) {
|
|
76
|
+
--n_cur;
|
|
77
|
+
}
|
|
78
|
+
n_input -= n_cur;
|
|
79
|
+
llama_kv_cache_seq_rm(ctx, 0, n_cur, -1);
|
|
80
|
+
}
|
|
81
|
+
_sess->set_tokens(std::move(prompt_tokens));
|
|
82
|
+
|
|
83
|
+
const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
|
|
84
|
+
_sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
|
|
85
|
+
|
|
86
|
+
auto embd = _sess->tokens_ptr();
|
|
87
|
+
for (int i = 0; i < max_len || _stop; i++) {
|
|
88
|
+
// check if we need to remove some tokens
|
|
89
|
+
if (embd->size() >= _params.n_ctx) {
|
|
90
|
+
const int n_left = n_cur - n_keep - 1;
|
|
91
|
+
const int n_discard = n_left / 2;
|
|
92
|
+
|
|
93
|
+
llama_kv_cache_seq_rm(ctx, 0, n_keep + 1, n_keep + n_discard + 1);
|
|
94
|
+
llama_kv_cache_seq_add(ctx, 0, n_keep + 1 + n_discard, n_cur, -n_discard);
|
|
95
|
+
|
|
96
|
+
// shift the tokens
|
|
97
|
+
embd->insert(embd->begin() + n_keep + 1,
|
|
98
|
+
embd->begin() + n_keep + 1 + n_discard, embd->end());
|
|
99
|
+
embd->resize(embd->size() - n_discard);
|
|
100
|
+
|
|
101
|
+
n_cur -= n_discard;
|
|
102
|
+
_result.truncated = true;
|
|
103
|
+
}
|
|
104
|
+
int ret = llama_decode(
|
|
105
|
+
ctx, llama_batch_get_one(embd->data() + n_cur, n_input, n_cur, 0));
|
|
106
|
+
if (ret < 0) {
|
|
107
|
+
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
// sample the next token
|
|
111
|
+
const llama_token new_token_id =
|
|
112
|
+
llama_sampling_sample(sampling.get(), ctx, nullptr);
|
|
113
|
+
// prepare the next batch
|
|
114
|
+
embd->emplace_back(new_token_id);
|
|
115
|
+
auto token = llama_token_to_piece(ctx, new_token_id);
|
|
116
|
+
_result.text += token;
|
|
117
|
+
n_cur += n_input;
|
|
118
|
+
_result.tokens_evaluated += n_input;
|
|
119
|
+
_result.tokens_predicted += 1;
|
|
120
|
+
n_input = 1;
|
|
121
|
+
if (_has_callback) {
|
|
122
|
+
const char *c_token = strdup(token.c_str());
|
|
123
|
+
_tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
|
|
124
|
+
const char *value) {
|
|
125
|
+
auto obj = Napi::Object::New(env);
|
|
126
|
+
obj.Set("token", Napi::String::New(env, value));
|
|
127
|
+
delete value;
|
|
128
|
+
jsCallback.Call({obj});
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
// is it an end of generation?
|
|
132
|
+
if (llama_token_is_eog(model, new_token_id)) {
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
// check for stop words
|
|
136
|
+
if (!_stop_words.empty()) {
|
|
137
|
+
const size_t stop_pos =
|
|
138
|
+
findStoppingStrings(_result.text, token.size(), _stop_words);
|
|
139
|
+
if (stop_pos != std::string::npos) {
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
const auto t_main_end = ggml_time_us();
|
|
145
|
+
_sess->get_mutex().unlock();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
void LlamaCompletionWorker::OnOK() {
|
|
149
|
+
auto result = Napi::Object::New(Napi::AsyncWorker::Env());
|
|
150
|
+
result.Set("tokens_evaluated", Napi::Number::New(Napi::AsyncWorker::Env(),
|
|
151
|
+
_result.tokens_evaluated));
|
|
152
|
+
result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
|
|
153
|
+
_result.tokens_predicted));
|
|
154
|
+
result.Set("truncated",
|
|
155
|
+
Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
|
|
156
|
+
result.Set("text",
|
|
157
|
+
Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));
|
|
158
|
+
Napi::Promise::Deferred::Resolve(result);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
void LlamaCompletionWorker::OnError(const Napi::Error &err) {
|
|
162
|
+
Napi::Promise::Deferred::Reject(err.Value());
|
|
163
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
|
|
3
|
+
struct CompletionResult {
|
|
4
|
+
std::string text = "";
|
|
5
|
+
bool truncated = false;
|
|
6
|
+
size_t tokens_predicted = 0;
|
|
7
|
+
size_t tokens_evaluated = 0;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
11
|
+
public Napi::Promise::Deferred {
|
|
12
|
+
public:
|
|
13
|
+
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
14
|
+
Napi::Function callback, gpt_params params,
|
|
15
|
+
std::vector<std::string> stop_words = {});
|
|
16
|
+
|
|
17
|
+
~LlamaCompletionWorker();
|
|
18
|
+
|
|
19
|
+
inline void Stop() { _stop = true; }
|
|
20
|
+
|
|
21
|
+
protected:
|
|
22
|
+
void Execute();
|
|
23
|
+
void OnOK();
|
|
24
|
+
void OnError(const Napi::Error &err);
|
|
25
|
+
|
|
26
|
+
private:
|
|
27
|
+
LlamaSessionPtr _sess;
|
|
28
|
+
gpt_params _params;
|
|
29
|
+
std::vector<std::string> _stop_words;
|
|
30
|
+
Napi::ThreadSafeFunction _tsfn;
|
|
31
|
+
bool _has_callback = false;
|
|
32
|
+
bool _stop = false;
|
|
33
|
+
CompletionResult _result;
|
|
34
|
+
};
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
#include "LlamaContext.h"
|
|
2
|
+
#include "DisposeWorker.h"
|
|
3
|
+
#include "LlamaCompletionWorker.h"
|
|
4
|
+
#include "LoadSessionWorker.h"
|
|
5
|
+
#include "SaveSessionWorker.h"
|
|
6
|
+
|
|
7
|
+
void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
8
|
+
Napi::Function func = DefineClass(
|
|
9
|
+
env, "LlamaContext",
|
|
10
|
+
{InstanceMethod<&LlamaContext::GetSystemInfo>(
|
|
11
|
+
"getSystemInfo",
|
|
12
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
13
|
+
InstanceMethod<&LlamaContext::Completion>(
|
|
14
|
+
"completion",
|
|
15
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
16
|
+
InstanceMethod<&LlamaContext::StopCompletion>(
|
|
17
|
+
"stopCompletion",
|
|
18
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
19
|
+
InstanceMethod<&LlamaContext::SaveSession>(
|
|
20
|
+
"saveSession",
|
|
21
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
22
|
+
InstanceMethod<&LlamaContext::LoadSession>(
|
|
23
|
+
"loadSession",
|
|
24
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
25
|
+
InstanceMethod<&LlamaContext::Release>(
|
|
26
|
+
"release", static_cast<napi_property_attributes>(napi_enumerable))});
|
|
27
|
+
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
28
|
+
*constructor = Napi::Persistent(func);
|
|
29
|
+
#if NAPI_VERSION > 5
|
|
30
|
+
env.SetInstanceData(constructor);
|
|
31
|
+
#endif
|
|
32
|
+
exports.Set("LlamaContext", func);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
|
|
36
|
+
// use_mlock, use_mmap }): LlamaContext throws error
|
|
37
|
+
LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
38
|
+
: Napi::ObjectWrap<LlamaContext>(info) {
|
|
39
|
+
Napi::Env env = info.Env();
|
|
40
|
+
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
41
|
+
Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
|
|
42
|
+
}
|
|
43
|
+
auto options = info[0].As<Napi::Object>();
|
|
44
|
+
|
|
45
|
+
gpt_params params;
|
|
46
|
+
params.model = get_option<std::string>(options, "model", "");
|
|
47
|
+
if (params.model.empty()) {
|
|
48
|
+
Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
|
|
49
|
+
}
|
|
50
|
+
params.embedding = get_option<bool>(options, "embedding", false);
|
|
51
|
+
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
52
|
+
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
53
|
+
params.n_threads =
|
|
54
|
+
get_option<int32_t>(options, "n_threads", get_math_cpu_count() / 2);
|
|
55
|
+
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
|
|
56
|
+
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
57
|
+
params.use_mmap = get_option<bool>(options, "use_mmap", true);
|
|
58
|
+
params.numa =
|
|
59
|
+
static_cast<ggml_numa_strategy>(get_option<uint32_t>(options, "numa", 0));
|
|
60
|
+
|
|
61
|
+
llama_backend_init();
|
|
62
|
+
llama_numa_init(params.numa);
|
|
63
|
+
|
|
64
|
+
llama_model *model;
|
|
65
|
+
llama_context *ctx;
|
|
66
|
+
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
|
67
|
+
|
|
68
|
+
if (model == nullptr || ctx == nullptr) {
|
|
69
|
+
Napi::TypeError::New(env, "Failed to load model")
|
|
70
|
+
.ThrowAsJavaScriptException();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
_sess = std::make_shared<LlamaSession>(ctx, params);
|
|
74
|
+
_info = get_system_info(params);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// getSystemInfo(): string
|
|
78
|
+
Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
79
|
+
return Napi::String::New(info.Env(), _info);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
|
|
83
|
+
// void): Promise<LlamaCompletionResult>
|
|
84
|
+
Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
85
|
+
Napi::Env env = info.Env();
|
|
86
|
+
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
87
|
+
Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
|
|
88
|
+
}
|
|
89
|
+
if (info.Length() >= 2 && !info[1].IsFunction()) {
|
|
90
|
+
Napi::TypeError::New(env, "Function expected").ThrowAsJavaScriptException();
|
|
91
|
+
}
|
|
92
|
+
if (_sess == nullptr) {
|
|
93
|
+
Napi::TypeError::New(env, "Context is disposed")
|
|
94
|
+
.ThrowAsJavaScriptException();
|
|
95
|
+
}
|
|
96
|
+
auto options = info[0].As<Napi::Object>();
|
|
97
|
+
|
|
98
|
+
gpt_params params = _sess->params();
|
|
99
|
+
params.prompt = get_option<std::string>(options, "prompt", "");
|
|
100
|
+
if (params.prompt.empty()) {
|
|
101
|
+
Napi::TypeError::New(env, "Prompt is required")
|
|
102
|
+
.ThrowAsJavaScriptException();
|
|
103
|
+
}
|
|
104
|
+
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
|
|
105
|
+
params.sparams.temp = get_option<float>(options, "temperature", 0.80f);
|
|
106
|
+
params.sparams.top_k = get_option<int32_t>(options, "top_k", 40);
|
|
107
|
+
params.sparams.top_p = get_option<float>(options, "top_p", 0.95f);
|
|
108
|
+
params.sparams.min_p = get_option<float>(options, "min_p", 0.05f);
|
|
109
|
+
params.sparams.tfs_z = get_option<float>(options, "tfs_z", 1.00f);
|
|
110
|
+
params.sparams.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
|
|
111
|
+
params.sparams.mirostat_tau =
|
|
112
|
+
get_option<float>(options, "mirostat_tau", 5.00f);
|
|
113
|
+
params.sparams.mirostat_eta =
|
|
114
|
+
get_option<float>(options, "mirostat_eta", 0.10f);
|
|
115
|
+
params.sparams.penalty_last_n =
|
|
116
|
+
get_option<int32_t>(options, "penalty_last_n", 64);
|
|
117
|
+
params.sparams.penalty_repeat =
|
|
118
|
+
get_option<float>(options, "penalty_repeat", 1.00f);
|
|
119
|
+
params.sparams.penalty_freq =
|
|
120
|
+
get_option<float>(options, "penalty_freq", 0.00f);
|
|
121
|
+
params.sparams.penalty_present =
|
|
122
|
+
get_option<float>(options, "penalty_present", 0.00f);
|
|
123
|
+
params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
|
|
124
|
+
params.sparams.typical_p = get_option<float>(options, "typical_p", 1.00f);
|
|
125
|
+
params.ignore_eos = get_option<float>(options, "ignore_eos", false);
|
|
126
|
+
params.sparams.grammar = get_option<std::string>(options, "grammar", "");
|
|
127
|
+
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
128
|
+
params.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
129
|
+
std::vector<std::string> stop_words;
|
|
130
|
+
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
131
|
+
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
132
|
+
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
133
|
+
stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
Napi::Function callback;
|
|
138
|
+
if (info.Length() >= 2) {
|
|
139
|
+
callback = info[1].As<Napi::Function>();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
auto *worker =
|
|
143
|
+
new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
|
|
144
|
+
worker->Queue();
|
|
145
|
+
_wip = worker;
|
|
146
|
+
return worker->Promise();
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// stopCompletion(): void
|
|
150
|
+
void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
|
|
151
|
+
if (_wip != nullptr) {
|
|
152
|
+
_wip->Stop();
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// saveSession(path: string): Promise<void> throws error
|
|
157
|
+
Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
|
|
158
|
+
Napi::Env env = info.Env();
|
|
159
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
160
|
+
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
161
|
+
}
|
|
162
|
+
if (_sess == nullptr) {
|
|
163
|
+
Napi::TypeError::New(env, "Context is disposed")
|
|
164
|
+
.ThrowAsJavaScriptException();
|
|
165
|
+
}
|
|
166
|
+
auto *worker = new SaveSessionWorker(info, _sess);
|
|
167
|
+
worker->Queue();
|
|
168
|
+
return worker->Promise();
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// loadSession(path: string): Promise<{ count }> throws error
|
|
172
|
+
Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
|
|
173
|
+
Napi::Env env = info.Env();
|
|
174
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
175
|
+
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
176
|
+
}
|
|
177
|
+
if (_sess == nullptr) {
|
|
178
|
+
Napi::TypeError::New(env, "Context is disposed")
|
|
179
|
+
.ThrowAsJavaScriptException();
|
|
180
|
+
}
|
|
181
|
+
auto *worker = new LoadSessionWorker(info, _sess);
|
|
182
|
+
worker->Queue();
|
|
183
|
+
return worker->Promise();
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// release(): Promise<void>
|
|
187
|
+
Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
|
|
188
|
+
auto env = info.Env();
|
|
189
|
+
if (_wip != nullptr) {
|
|
190
|
+
_wip->Stop();
|
|
191
|
+
}
|
|
192
|
+
if (_sess == nullptr) {
|
|
193
|
+
auto promise = Napi::Promise::Deferred(env);
|
|
194
|
+
promise.Resolve(env.Undefined());
|
|
195
|
+
return promise.Promise();
|
|
196
|
+
}
|
|
197
|
+
auto *worker = new DisposeWorker(info, std::move(_sess));
|
|
198
|
+
worker->Queue();
|
|
199
|
+
return worker->Promise();
|
|
200
|
+
}
|