@fugood/llama.node 1.4.8 → 1.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/lib/binding.ts +43 -0
  2. package/lib/parallel.js +26 -0
  3. package/lib/parallel.ts +33 -0
  4. package/package.json +15 -15
  5. package/scripts/llama.cpp.patch +12 -14
  6. package/src/LlamaCompletionWorker.cpp +3 -1
  7. package/src/LlamaCompletionWorker.h +2 -0
  8. package/src/LlamaContext.cpp +16 -1
  9. package/src/LlamaContext.h +3 -0
  10. package/src/llama.cpp/common/CMakeLists.txt +4 -4
  11. package/src/llama.cpp/common/arg.cpp +159 -42
  12. package/src/llama.cpp/common/arg.h +10 -1
  13. package/src/llama.cpp/common/common.cpp +1 -1
  14. package/src/llama.cpp/common/common.h +6 -2
  15. package/src/llama.cpp/common/preset.cpp +197 -5
  16. package/src/llama.cpp/common/preset.h +45 -3
  17. package/src/llama.cpp/common/sampling.cpp +51 -37
  18. package/src/llama.cpp/common/sampling.h +6 -3
  19. package/src/llama.cpp/common/speculative.cpp +1 -1
  20. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  21. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +4 -0
  22. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +283 -0
  23. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +28 -0
  24. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +51 -6
  25. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +286 -0
  26. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +8 -0
  27. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +41 -1
  28. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +125 -22
  29. package/src/llama.cpp/src/llama-arch.cpp +1 -1
  30. package/src/llama.cpp/src/llama-mmap.cpp +123 -28
  31. package/src/llama.cpp/src/llama-mmap.h +5 -1
  32. package/src/llama.cpp/src/llama-model-loader.cpp +56 -13
  33. package/src/llama.cpp/src/llama-model.cpp +7 -5
  34. package/src/llama.cpp/src/llama-sampling.cpp +16 -0
  35. package/src/llama.cpp/src/llama.cpp +22 -32
package/lib/binding.ts CHANGED
@@ -120,6 +120,8 @@ export type LlamaCompletionOptions = {
120
120
  tool_choice?: string
121
121
  enable_thinking?: boolean
122
122
  thinking_forced_open?: boolean
123
+ /** Serialized PEG parser for chat output parsing (required for PEG format types) */
124
+ chat_parser?: string
123
125
  prompt?: string
124
126
  /**
125
127
  * Text to prefill the response with.
@@ -415,6 +417,8 @@ export type JinjaFormattedChatResult = {
415
417
  thinking_forced_open: boolean
416
418
  preserved_tokens: string[]
417
419
  additional_stops: string[]
420
+ /** Serialized PEG parser for chat output parsing (required for PEG format types) */
421
+ chat_parser: string
418
422
  }
419
423
 
420
424
  export type Tool = {
@@ -435,6 +439,24 @@ export type ToolCall = {
435
439
  id?: string
436
440
  }
437
441
 
442
+ export type ParallelRequestStatus = {
443
+ request_id: number
444
+ type: 'completion' | 'embedding' | 'rerank'
445
+ state: 'queued' | 'processing_prompt' | 'generating' | 'done'
446
+ prompt_length: number
447
+ tokens_generated: number
448
+ prompt_ms: number
449
+ generation_ms: number
450
+ tokens_per_second: number
451
+ }
452
+
453
+ export type ParallelStatus = {
454
+ n_parallel: number
455
+ active_slots: number
456
+ queued_requests: number
457
+ requests: ParallelRequestStatus[]
458
+ }
459
+
438
460
  export interface LlamaContext {
439
461
  new (
440
462
  options: LlamaModelOptions,
@@ -612,6 +634,27 @@ export interface LlamaContext {
612
634
  */
613
635
  cancelRequest(requestId: number): void
614
636
 
637
+ /**
638
+ * Get current parallel processing status (one-time snapshot)
639
+ * @returns Current parallel status
640
+ */
641
+ getParallelStatus(): ParallelStatus
642
+
643
+ /**
644
+ * Subscribe to parallel processing status changes
645
+ * @param callback Called whenever parallel status changes
646
+ * @returns Subscriber ID that can be used to unsubscribe
647
+ */
648
+ subscribeParallelStatus(
649
+ callback: (status: ParallelStatus) => void,
650
+ ): { subscriberId: number }
651
+
652
+ /**
653
+ * Unsubscribe from parallel processing status changes
654
+ * @param subscriberId Subscriber ID returned from subscribeParallelStatus
655
+ */
656
+ unsubscribeParallelStatus(subscriberId: number): void
657
+
615
658
  /**
616
659
  * Clear the KV and recurrent caches.
617
660
  * This is faster than recreating the context and useful for preventing
package/lib/parallel.js CHANGED
@@ -212,5 +212,31 @@ class LlamaParallelAPI {
212
212
  isEnabled() {
213
213
  return this.enabled;
214
214
  }
215
+ /**
216
+ * Get current parallel processing status (one-time snapshot)
217
+ * @returns Current parallel status
218
+ */
219
+ getStatus() {
220
+ if (!this.enabled) {
221
+ throw new Error('Parallel mode is not enabled. Call enable() first.');
222
+ }
223
+ return this.context.getParallelStatus();
224
+ }
225
+ /**
226
+ * Subscribe to parallel processing status changes
227
+ * @param callback Called whenever parallel status changes
228
+ * @returns Object with remove() method to unsubscribe
229
+ */
230
+ subscribeToStatus(callback) {
231
+ if (!this.enabled) {
232
+ throw new Error('Parallel mode is not enabled. Call enable() first.');
233
+ }
234
+ const { subscriberId } = this.context.subscribeParallelStatus(callback);
235
+ return {
236
+ remove: () => {
237
+ this.context.unsubscribeParallelStatus(subscriberId);
238
+ },
239
+ };
240
+ }
215
241
  }
216
242
  exports.LlamaParallelAPI = LlamaParallelAPI;
package/lib/parallel.ts CHANGED
@@ -4,6 +4,7 @@ import type {
4
4
  LlamaCompletionOptions,
5
5
  LlamaCompletionToken,
6
6
  RerankParams,
7
+ ParallelStatus,
7
8
  } from './binding'
8
9
  import { formatMediaChat } from './utils'
9
10
 
@@ -278,4 +279,36 @@ export class LlamaParallelAPI {
278
279
  isEnabled(): boolean {
279
280
  return this.enabled
280
281
  }
282
+
283
+ /**
284
+ * Get current parallel processing status (one-time snapshot)
285
+ * @returns Current parallel status
286
+ */
287
+ getStatus(): ParallelStatus {
288
+ if (!this.enabled) {
289
+ throw new Error('Parallel mode is not enabled. Call enable() first.')
290
+ }
291
+ return this.context.getParallelStatus()
292
+ }
293
+
294
+ /**
295
+ * Subscribe to parallel processing status changes
296
+ * @param callback Called whenever parallel status changes
297
+ * @returns Object with remove() method to unsubscribe
298
+ */
299
+ subscribeToStatus(
300
+ callback: (status: ParallelStatus) => void,
301
+ ): { remove: () => void } {
302
+ if (!this.enabled) {
303
+ throw new Error('Parallel mode is not enabled. Call enable() first.')
304
+ }
305
+
306
+ const { subscriberId } = this.context.subscribeParallelStatus(callback)
307
+
308
+ return {
309
+ remove: () => {
310
+ this.context.unsubscribeParallelStatus(subscriberId)
311
+ },
312
+ }
313
+ }
281
314
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.4.8",
4
+ "version": "1.4.10",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.4.8",
76
- "@fugood/node-llama-darwin-x64": "1.4.8",
77
- "@fugood/node-llama-linux-arm64": "1.4.8",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.4.8",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.4.8",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.4.8",
81
- "@fugood/node-llama-linux-x64": "1.4.8",
82
- "@fugood/node-llama-linux-x64-cuda": "1.4.8",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.4.8",
84
- "@fugood/node-llama-win32-arm64": "1.4.8",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.4.8",
86
- "@fugood/node-llama-win32-x64": "1.4.8",
87
- "@fugood/node-llama-win32-x64-cuda": "1.4.8",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.4.8"
75
+ "@fugood/node-llama-darwin-arm64": "1.4.10",
76
+ "@fugood/node-llama-darwin-x64": "1.4.10",
77
+ "@fugood/node-llama-linux-arm64": "1.4.10",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.10",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.10",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.10",
81
+ "@fugood/node-llama-linux-x64": "1.4.10",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.4.10",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.10",
84
+ "@fugood/node-llama-win32-arm64": "1.4.10",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.10",
86
+ "@fugood/node-llama-win32-x64": "1.4.10",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.4.10",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.10"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -1,25 +1,23 @@
1
1
  diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
2
- index 0182767c2..f8c4a4f63 100644
2
+ index f7b99159e..fa37fed19 100644
3
3
  --- a/src/llama.cpp/common/CMakeLists.txt
4
4
  +++ b/src/llama.cpp/common/CMakeLists.txt
5
- @@ -151,9 +151,16 @@ if (LLAMA_LLGUIDANCE)
5
+ @@ -154,8 +154,14 @@ if (LLAMA_LLGUIDANCE)
6
6
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
7
7
  endif ()
8
8
 
9
+ -target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
9
10
  +# Add Windows socket libraries unconditionally on Windows
10
11
  +if (WIN32)
11
12
  + set(LLAMA_COMMON_WIN_LIBS ws2_32)
12
13
  +else()
13
14
  + set(LLAMA_COMMON_WIN_LIBS "")
14
15
  +endif()
15
- +
16
- target_include_directories(${TARGET} PUBLIC . ../vendor)
17
- target_compile_features (${TARGET} PUBLIC cxx_std_17)
18
- -target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
19
- +target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
20
16
 
17
+ +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
21
18
 
22
19
  #
20
+ # copy the license files
23
21
  diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
24
22
  index 1bcba9cd8..b7cd68734 100644
25
23
  --- a/src/llama.cpp/common/chat-peg-parser.cpp
@@ -98,7 +96,7 @@ index 6085510a4..263076ce2 100644
98
96
  struct common_chat_tool_call {
99
97
  std::string name;
100
98
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
101
- index 5a8cf5248..8010a990e 100644
99
+ index d4e8c7405..af3dec813 100644
102
100
  --- a/src/llama.cpp/common/common.cpp
103
101
  +++ b/src/llama.cpp/common/common.cpp
104
102
  @@ -1343,6 +1343,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
@@ -110,7 +108,7 @@ index 5a8cf5248..8010a990e 100644
110
108
  mparams.split_mode = params.split_mode;
111
109
  mparams.tensor_split = params.tensor_split;
112
110
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
113
- index d70744840..dea8c4546 100644
111
+ index 334372073..e912b593a 100644
114
112
  --- a/src/llama.cpp/common/common.h
115
113
  +++ b/src/llama.cpp/common/common.h
116
114
  @@ -307,6 +307,7 @@ struct lr_opt {
@@ -122,7 +120,7 @@ index d70744840..dea8c4546 100644
122
120
  int32_t n_ctx = 0; // context size, 0 == context the model was trained with
123
121
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
124
122
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
125
- index fc31089f3..aa9befe4c 100644
123
+ index 28fb7612e..63f7e1ca1 100644
126
124
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
127
125
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
128
126
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -135,10 +133,10 @@ index fc31089f3..aa9befe4c 100644
135
133
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
136
134
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
137
135
  diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
138
- index 514f086f6..792abaa58 100644
136
+ index 6a00abacc..9e12459b6 100644
139
137
  --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
140
138
  +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
141
- @@ -3213,11 +3213,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
139
+ @@ -3226,11 +3226,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
142
140
  GGML_UNUSED(dev);
143
141
  }
144
142
 
@@ -168,7 +166,7 @@ index 514f086f6..792abaa58 100644
168
166
  GGML_UNUSED(dev);
169
167
  }
170
168
 
171
- @@ -3398,10 +3413,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
169
+ @@ -3413,10 +3428,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
172
170
  }
173
171
  }
174
172
 
@@ -187,7 +185,7 @@ index 514f086f6..792abaa58 100644
187
185
 
188
186
  GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
189
187
 
190
- @@ -3414,6 +3436,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
188
+ @@ -3429,6 +3451,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
191
189
  } catch (std::exception const &exc) {
192
190
  GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
193
191
  devices[i].context = nullptr;
@@ -37,6 +37,7 @@ LlamaCompletionWorker::LlamaCompletionWorker(
37
37
  int32_t chat_format,
38
38
  bool thinking_forced_open,
39
39
  std::string reasoning_format,
40
+ const std::string &chat_parser,
40
41
  const std::vector<std::string> &media_paths,
41
42
  const std::vector<llama_token> &guide_tokens,
42
43
  bool has_vocoder,
@@ -46,6 +47,7 @@ LlamaCompletionWorker::LlamaCompletionWorker(
46
47
  _params(params), _stop_words(stop_words), _chat_format(chat_format),
47
48
  _thinking_forced_open(thinking_forced_open),
48
49
  _reasoning_format(reasoning_format),
50
+ _chat_parser(chat_parser),
49
51
  _media_paths(media_paths), _guide_tokens(guide_tokens),
50
52
  _prefill_text(prefill_text),
51
53
  _has_vocoder(has_vocoder), _tts_type(tts_type_val) {
@@ -121,7 +123,7 @@ void LlamaCompletionWorker::Execute() {
121
123
  }
122
124
 
123
125
  // Begin completion with chat format and reasoning settings
124
- completion->beginCompletion(_chat_format, common_reasoning_format_from_name(_reasoning_format), _thinking_forced_open);
126
+ completion->beginCompletion(_chat_format, common_reasoning_format_from_name(_reasoning_format), _thinking_forced_open, _chat_parser);
125
127
 
126
128
  // Main completion loop
127
129
  int token_count = 0;
@@ -23,6 +23,7 @@ public:
23
23
  int32_t chat_format,
24
24
  bool thinking_forced_open,
25
25
  std::string reasoning_format,
26
+ const std::string &chat_parser = "",
26
27
  const std::vector<std::string> &media_paths = {},
27
28
  const std::vector<llama_token> &guide_tokens = {},
28
29
  bool has_vocoder = false,
@@ -50,6 +51,7 @@ private:
50
51
  int32_t _chat_format;
51
52
  bool _thinking_forced_open;
52
53
  std::string _reasoning_format;
54
+ std::string _chat_parser;
53
55
  std::vector<std::string> _media_paths;
54
56
  std::vector<llama_token> _guide_tokens;
55
57
  std::string _prefill_text;
@@ -201,6 +201,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
201
201
  InstanceMethod<&LlamaContext::CancelRequest>(
202
202
  "cancelRequest",
203
203
  static_cast<napi_property_attributes>(napi_enumerable)),
204
+ InstanceMethod<&LlamaContext::GetParallelStatus>(
205
+ "getParallelStatus",
206
+ static_cast<napi_property_attributes>(napi_enumerable)),
207
+ InstanceMethod<&LlamaContext::SubscribeParallelStatus>(
208
+ "subscribeParallelStatus",
209
+ static_cast<napi_property_attributes>(napi_enumerable)),
210
+ InstanceMethod<&LlamaContext::UnsubscribeParallelStatus>(
211
+ "unsubscribeParallelStatus",
212
+ static_cast<napi_property_attributes>(napi_enumerable)),
204
213
  InstanceMethod<&LlamaContext::ClearCache>(
205
214
  "clearCache",
206
215
  static_cast<napi_property_attributes>(napi_enumerable)),
@@ -250,6 +259,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
250
259
  }
251
260
 
252
261
  common_params params;
262
+ params.fit_params = false;
263
+
253
264
  params.model.path = get_option<std::string>(options, "model", "");
254
265
  if (params.model.path.empty()) {
255
266
  Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
@@ -760,6 +771,8 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
760
771
  i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
761
772
  }
762
773
  result.Set("additional_stops", additional_stops);
774
+ // chat_parser: string (serialized PEG parser for chat output parsing)
775
+ result.Set("chat_parser", chatParams.parser);
763
776
 
764
777
  return result;
765
778
  } else {
@@ -821,6 +834,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
821
834
  int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
822
835
  bool thinking_forced_open = get_option<bool>(options, "thinking_forced_open", false);
823
836
  std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
837
+ std::string chat_parser = get_option<std::string>(options, "chat_parser", "");
824
838
 
825
839
  common_params params = _rn_ctx->params;
826
840
  auto grammar_from_params = get_option<std::string>(options, "grammar", "");
@@ -959,6 +973,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
959
973
 
960
974
  chat_format = chatParams.format;
961
975
  thinking_forced_open = chatParams.thinking_forced_open;
976
+ chat_parser = chatParams.parser;
962
977
 
963
978
  for (const auto &token : chatParams.preserved_tokens) {
964
979
  auto ids =
@@ -1074,7 +1089,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
1074
1089
 
1075
1090
  auto *worker =
1076
1091
  new LlamaCompletionWorker(info, _rn_ctx, callback, params, stop_words,
1077
- chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
1092
+ chat_format, thinking_forced_open, reasoning_format, chat_parser, media_paths, guide_tokens,
1078
1093
  _rn_ctx->has_vocoder, _rn_ctx->tts_wrapper ? _rn_ctx->tts_wrapper->type : rnllama::UNKNOWN, prefill_text);
1079
1094
  worker->Queue();
1080
1095
  _wip = worker;
@@ -68,6 +68,9 @@ private:
68
68
  Napi::Value QueueEmbedding(const Napi::CallbackInfo &info);
69
69
  Napi::Value QueueRerank(const Napi::CallbackInfo &info);
70
70
  void CancelRequest(const Napi::CallbackInfo &info);
71
+ Napi::Value GetParallelStatus(const Napi::CallbackInfo &info);
72
+ Napi::Value SubscribeParallelStatus(const Napi::CallbackInfo &info);
73
+ void UnsubscribeParallelStatus(const Napi::CallbackInfo &info);
71
74
 
72
75
  // Cache management
73
76
  void ClearCache(const Napi::CallbackInfo &info);
@@ -85,6 +85,9 @@ add_library(${TARGET} STATIC
85
85
  unicode.h
86
86
  )
87
87
 
88
+ target_include_directories(${TARGET} PUBLIC . ../vendor)
89
+ target_compile_features (${TARGET} PUBLIC cxx_std_17)
90
+
88
91
  if (BUILD_SHARED_LIBS)
89
92
  set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
90
93
  endif()
@@ -158,10 +161,7 @@ else()
158
161
  set(LLAMA_COMMON_WIN_LIBS "")
159
162
  endif()
160
163
 
161
- target_include_directories(${TARGET} PUBLIC . ../vendor)
162
- target_compile_features (${TARGET} PUBLIC cxx_std_17)
163
- target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
164
-
164
+ target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
165
165
 
166
166
  #
167
167
  # copy the license files