node-llama-cpp 3.0.0-beta.12 → 3.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/dist/ChatWrapper.d.ts +1 -0
  2. package/dist/ChatWrapper.js +2 -1
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/TemplateChatWrapper.d.ts +67 -0
  5. package/dist/TemplateChatWrapper.js +239 -0
  6. package/dist/TemplateChatWrapper.js.map +1 -0
  7. package/dist/bindings/AddonTypes.d.ts +2 -0
  8. package/dist/bindings/Llama.d.ts +1 -3
  9. package/dist/bindings/Llama.js +10 -20
  10. package/dist/bindings/Llama.js.map +1 -1
  11. package/dist/bindings/consts.d.ts +2 -0
  12. package/dist/bindings/consts.js +11 -0
  13. package/dist/bindings/consts.js.map +1 -0
  14. package/dist/bindings/getLlama.d.ts +14 -18
  15. package/dist/bindings/getLlama.js +210 -78
  16. package/dist/bindings/getLlama.js.map +1 -1
  17. package/dist/bindings/types.d.ts +8 -5
  18. package/dist/bindings/types.js +18 -0
  19. package/dist/bindings/types.js.map +1 -1
  20. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  21. package/dist/bindings/utils/asyncEvery.js +15 -0
  22. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  23. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  24. package/dist/bindings/utils/asyncSome.js +27 -0
  25. package/dist/bindings/utils/asyncSome.js.map +1 -0
  26. package/dist/bindings/utils/cloneLlamaCppRepo.js +13 -3
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  28. package/dist/bindings/utils/compileLLamaCpp.js +30 -4
  29. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  30. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +11 -0
  31. package/dist/bindings/utils/detectAvailableComputeLayers.js +158 -0
  32. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  33. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  34. package/dist/bindings/utils/detectGlibc.js +36 -0
  35. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  36. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  37. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  38. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  39. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +12 -6
  40. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  41. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
  42. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  43. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  44. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  45. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  46. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  47. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  48. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  49. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  50. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  51. package/dist/bindings/utils/hasFileInPath.js +34 -0
  52. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  53. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  54. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  55. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  56. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +13 -0
  57. package/dist/bindings/utils/logDistroInstallInstruction.js +38 -0
  58. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  59. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +9 -2
  60. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +10 -4
  61. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -1
  62. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  63. package/dist/bindings/utils/testBindingBinary.js +98 -0
  64. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  65. package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
  66. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  67. package/dist/chatWrappers/GemmaChatWrapper.d.ts +18 -0
  68. package/dist/chatWrappers/GemmaChatWrapper.js +86 -0
  69. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  70. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +3 -0
  71. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  72. package/dist/cli/cli.js +2 -0
  73. package/dist/cli/cli.js.map +1 -1
  74. package/dist/cli/commands/BuildCommand.d.ts +5 -5
  75. package/dist/cli/commands/BuildCommand.js +78 -60
  76. package/dist/cli/commands/BuildCommand.js.map +1 -1
  77. package/dist/cli/commands/DebugCommand.js +3 -9
  78. package/dist/cli/commands/DebugCommand.js.map +1 -1
  79. package/dist/cli/commands/DownloadCommand.d.ts +5 -5
  80. package/dist/cli/commands/DownloadCommand.js +97 -56
  81. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  82. package/dist/cli/commands/InspectCommand.d.ts +7 -0
  83. package/dist/cli/commands/InspectCommand.js +113 -0
  84. package/dist/cli/commands/InspectCommand.js.map +1 -0
  85. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  86. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  87. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  88. package/dist/config.d.ts +3 -3
  89. package/dist/config.js +10 -11
  90. package/dist/config.js.map +1 -1
  91. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  92. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  93. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  94. package/dist/gguf/GGUFInsights.d.ts +28 -0
  95. package/dist/gguf/GGUFInsights.js +58 -0
  96. package/dist/gguf/GGUFInsights.js.map +1 -0
  97. package/dist/gguf/GGUFMetadata.d.ts +19 -0
  98. package/dist/gguf/GGUFMetadata.js +38 -0
  99. package/dist/gguf/GGUFMetadata.js.map +1 -0
  100. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +3 -0
  101. package/dist/gguf/errors/InvalidGGUFMagicError.js +6 -0
  102. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +1 -0
  103. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +3 -0
  104. package/dist/gguf/errors/MetadataNotParsedYetError.js +6 -0
  105. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +1 -0
  106. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +3 -0
  107. package/dist/gguf/errors/MissingNodeLlamaError.js +6 -0
  108. package/dist/gguf/errors/MissingNodeLlamaError.js.map +1 -0
  109. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +5 -0
  110. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +12 -0
  111. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +1 -0
  112. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +4 -0
  113. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +8 -0
  114. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +1 -0
  115. package/dist/gguf/ggufParser/GGUFParser.d.ts +18 -0
  116. package/dist/gguf/ggufParser/GGUFParser.js +123 -0
  117. package/dist/gguf/ggufParser/GGUFParser.js.map +1 -0
  118. package/dist/gguf/ggufParser/GGUFTypes.d.ts +257 -0
  119. package/dist/gguf/ggufParser/GGUFTypes.js +2 -0
  120. package/dist/gguf/ggufParser/GGUFTypes.js.map +1 -0
  121. package/dist/gguf/ggufParser/checkArchitecture.d.ts +14 -0
  122. package/dist/gguf/ggufParser/checkArchitecture.js +74 -0
  123. package/dist/gguf/ggufParser/checkArchitecture.js.map +1 -0
  124. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +38 -0
  125. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +83 -0
  126. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +1 -0
  127. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +14 -0
  128. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +35 -0
  129. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +1 -0
  130. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +15 -0
  131. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +40 -0
  132. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +1 -0
  133. package/dist/index.d.ts +3 -1
  134. package/dist/index.js +3 -1
  135. package/dist/index.js.map +1 -1
  136. package/dist/utils/LlamaText.js +2 -2
  137. package/dist/utils/LlamaText.js.map +1 -1
  138. package/dist/utils/cmake.js +23 -10
  139. package/dist/utils/cmake.js.map +1 -1
  140. package/dist/utils/getBuildDefaults.d.ts +1 -3
  141. package/dist/utils/getBuildDefaults.js +2 -4
  142. package/dist/utils/getBuildDefaults.js.map +1 -1
  143. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  144. package/dist/utils/getConsoleLogPrefix.js +2 -2
  145. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  146. package/dist/utils/mergeUnionTypes.d.ts +6 -0
  147. package/dist/utils/mergeUnionTypes.js +2 -0
  148. package/dist/utils/mergeUnionTypes.js.map +1 -0
  149. package/dist/utils/parseTextTemplate.d.ts +66 -0
  150. package/dist/utils/parseTextTemplate.js +116 -0
  151. package/dist/utils/parseTextTemplate.js.map +1 -0
  152. package/llama/CMakeLists.txt +11 -5
  153. package/llama/addon.cpp +31 -7
  154. package/llama/binariesGithubRelease.json +1 -1
  155. package/llama/gitRelease.bundle +0 -0
  156. package/llama/llama.cpp.info.json +1 -1
  157. package/llamaBins/linux-arm64/.buildMetadata.json +1 -1
  158. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  159. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -1
  160. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  161. package/llamaBins/linux-x64/.buildMetadata.json +1 -1
  162. package/llamaBins/linux-x64/llama-addon.node +0 -0
  163. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -1
  164. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  165. package/llamaBins/linux-x64-vulkan/.buildMetadata.json +1 -1
  166. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  167. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -1
  168. package/llamaBins/mac-arm64-metal/ggml-metal.metal +815 -106
  169. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  170. package/llamaBins/mac-x64/.buildMetadata.json +1 -1
  171. package/llamaBins/mac-x64/llama-addon.node +0 -0
  172. package/llamaBins/win-x64/.buildMetadata.json +1 -1
  173. package/llamaBins/win-x64/llama-addon.node +0 -0
  174. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -1
  175. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  176. package/llamaBins/win-x64-vulkan/.buildMetadata.json +1 -1
  177. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  178. package/package.json +6 -4
  179. package/dist/cli/utils/logEnabledComputeLayers.d.ts +0 -8
  180. package/dist/cli/utils/logEnabledComputeLayers.js +0 -11
  181. package/dist/cli/utils/logEnabledComputeLayers.js.map +0 -1
@@ -0,0 +1,257 @@
1
+ import { fileTypeIntToString } from "./checkArchitecture.js";
2
+ export type GGUFArchitectureType = "llama" | "falcon" | "mpt" | "gptneox" | "gptj" | "gpt2" | "bloom" | "rwkv" | "whisper";
3
+ export type GGUFMetadataArchitectureProperties = {
4
+ context_length: number;
5
+ embedding_length: number;
6
+ block_count: number;
7
+ feed_forward_length: number;
8
+ use_parallel_residual: boolean;
9
+ tensor_data_layout: string;
10
+ expert_count: number;
11
+ expert_used_count: number;
12
+ attention: {
13
+ head_count: number;
14
+ head_count_kv: number;
15
+ max_alibi_bias: number;
16
+ clamp_kqv: number;
17
+ key_length: number;
18
+ value_length: number;
19
+ layer_norm_epsilon: number;
20
+ layer_norm_rms_epsilon: number;
21
+ };
22
+ rope: {
23
+ dimension_count: number;
24
+ freq_base: number;
25
+ scaling: {
26
+ type: string;
27
+ factor: number;
28
+ original_context_length: number;
29
+ finetuned: boolean;
30
+ };
31
+ };
32
+ };
33
+ export type GGUFMetadataGeneralProperties = {
34
+ architecture: GGUFArchitectureType;
35
+ /**
36
+ * The version of the quantization format. Not required if the model is not
37
+ * quantized (i.e. no tensors are quantized). If any tensors are quantized,
38
+ * this must be present. This is separate to the quantization scheme of the
39
+ * tensors itself; the quantization version may change without changing the
40
+ * scheme's name (e.g. the quantization scheme is Q5_K, and the quantization
41
+ * version is 4).
42
+ */
43
+ quantization_version: string;
44
+ /**
45
+ * the global alignment to use, as described above. This can vary to allow
46
+ * for different alignment schemes, but it must be a multiple of 8. Some
47
+ * writers may not write the alignment. If the alignment is not specified,
48
+ * assume it is `32`.
49
+ */
50
+ alignment: string;
51
+ /**
52
+ * The name of the model. This should be a human-readable name that can be
53
+ * used to identify the model. It should be unique within the community
54
+ * that the model is defined in.
55
+ */
56
+ name: string;
57
+ author: string;
58
+ /**
59
+ * URL to the model's homepage. This can be a GitHub repo, a paper, etc.
60
+ */
61
+ url: string;
62
+ /**
63
+ * free-form description of the model including anything that isn't
64
+ * covered by the other fields
65
+ */
66
+ description: string;
67
+ /**
68
+ * License of the model, expressed as a SPDX license expression
69
+ * (e.g. `MIT OR Apache-2.0`). *Should not* include any other information,
70
+ * such as the license text or the URL to the license.
71
+ */
72
+ license: string;
73
+ /**
74
+ * Information about where this model came from. This is useful for tracking
75
+ * the provenance of the model, and for finding the original source if the
76
+ * model is modified. For a model that was converted from GGML, for
77
+ * example, these keys would point to the model that was converted from.
78
+ */
79
+ source: {
80
+ /**
81
+ * URL to the source of the model. Can be a GitHub repo, a paper, etc.
82
+ */
83
+ url: string;
84
+ huggingface: {
85
+ repository: string;
86
+ };
87
+ };
88
+ /**
89
+ * An enumerated value describing the type of the majority of the tensors
90
+ * in the file. Optional; can be inferred from the tensor types.
91
+ */
92
+ file_type: ReturnType<typeof fileTypeIntToString>;
93
+ };
94
+ export type GGUFMetadataAny = {
95
+ general: GGUFMetadataGeneralProperties;
96
+ } & {
97
+ [key in GGUFArchitectureType]: GGUFMetadataArchitectureProperties;
98
+ };
99
+ export type GGUFMetadataLLAMA = {
100
+ general: GGUFMetadataGeneralProperties & {
101
+ architecture: "llama";
102
+ };
103
+ llama: {
104
+ context_length: number;
105
+ embedding_length: number;
106
+ block_count: number;
107
+ feed_forward_length: number;
108
+ attention: {
109
+ head_count: number;
110
+ layer_norm_rms_epsilon: number;
111
+ head_count_kv?: number;
112
+ };
113
+ rope: {
114
+ dimension_count: number;
115
+ scale?: number;
116
+ };
117
+ expert_count?: number;
118
+ expert_used_count?: number;
119
+ tensor_data_layout?: string;
120
+ };
121
+ };
122
+ export type GGUFMetadataFalcon = {
123
+ general: GGUFMetadataGeneralProperties & {
124
+ architecture: "falcon";
125
+ };
126
+ falcon: {
127
+ context_length: number;
128
+ embedding_length: number;
129
+ block_count: number;
130
+ attention: {
131
+ head_count: number;
132
+ head_count_kv: number;
133
+ use_norm: boolean;
134
+ layer_norm_epsilon: number;
135
+ };
136
+ tensor_data_layout?: string;
137
+ };
138
+ };
139
+ export type GGUFMetadataMPT = {
140
+ general: GGUFMetadataGeneralProperties & {
141
+ architecture: "mpt";
142
+ };
143
+ mpt: {
144
+ context_length: number;
145
+ embedding_length: number;
146
+ block_count: number;
147
+ attention: {
148
+ head_count: number;
149
+ alibi_bias_max: number;
150
+ clip_kqv: number;
151
+ layer_norm_epsilon: number;
152
+ };
153
+ };
154
+ };
155
+ export type GGUFMetadataGPTNeoX = {
156
+ general: GGUFMetadataGeneralProperties & {
157
+ architecture: "gptneox";
158
+ };
159
+ gptneox: {
160
+ context_length: number;
161
+ embedding_length: number;
162
+ block_count: number;
163
+ use_parallel_residual: boolean;
164
+ rope: {
165
+ dimension_count: number;
166
+ freq_base: number;
167
+ scale?: number;
168
+ };
169
+ attention: {
170
+ head_count: number;
171
+ layer_norm_epsilon: number;
172
+ };
173
+ };
174
+ };
175
+ export type GGUFMetadataGPTJ = {
176
+ general: GGUFMetadataGeneralProperties & {
177
+ architecture: "gptj";
178
+ };
179
+ gptj: {
180
+ context_length: number;
181
+ embedding_length: number;
182
+ block_count: number;
183
+ rope: {
184
+ dimension_count: number;
185
+ scale?: number;
186
+ };
187
+ attention: {
188
+ head_count: number;
189
+ layer_norm_epsilon: number;
190
+ };
191
+ };
192
+ };
193
+ export type GGUFMetadataGPT2 = {
194
+ general: GGUFMetadataGeneralProperties & {
195
+ architecture: "gpt2";
196
+ };
197
+ gpt2: {
198
+ context_length: number;
199
+ embedding_length: number;
200
+ block_count: number;
201
+ attention: {
202
+ head_count: number;
203
+ layer_norm_epsilon: number;
204
+ };
205
+ };
206
+ };
207
+ export type GGUFMetadataBloom = {
208
+ general: GGUFMetadataGeneralProperties & {
209
+ architecture: "bloom";
210
+ };
211
+ bloom: {
212
+ context_length: number;
213
+ embedding_length: number;
214
+ block_count: number;
215
+ feed_forward_length: number;
216
+ attention: {
217
+ head_count: number;
218
+ layer_norm_epsilon: number;
219
+ };
220
+ };
221
+ };
222
+ export type GGUFMetadataRWKV = {
223
+ general: GGUFMetadataGeneralProperties & {
224
+ architecture: "rwkv";
225
+ };
226
+ rwkv: {
227
+ context_length: number;
228
+ block_count: number;
229
+ embedding_length: number;
230
+ feed_forward_length: number;
231
+ };
232
+ };
233
+ export type GGUFMetadataWhisper = {
234
+ general: GGUFMetadataGeneralProperties & {
235
+ architecture: "whisper";
236
+ };
237
+ whisper: {
238
+ encoder: {
239
+ context_length: number;
240
+ embedding_length: number;
241
+ block_count: number;
242
+ mels_count: number;
243
+ attention: {
244
+ head_count: number;
245
+ };
246
+ };
247
+ decoder: {
248
+ context_length: number;
249
+ embedding_length: number;
250
+ block_count: number;
251
+ attention: {
252
+ head_count: number;
253
+ };
254
+ };
255
+ };
256
+ };
257
+ export type GGUFMetadata = GGUFMetadataLLAMA | GGUFMetadataFalcon | GGUFMetadataMPT | GGUFMetadataGPTNeoX | GGUFMetadataGPTJ | GGUFMetadataGPT2 | GGUFMetadataBloom | GGUFMetadataRWKV | GGUFMetadataWhisper;
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=GGUFTypes.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GGUFTypes.js","sourceRoot":"","sources":["../../../src/gguf/ggufParser/GGUFTypes.ts"],"names":[],"mappings":""}
@@ -0,0 +1,14 @@
1
+ import { GGUFMetadata, GGUFMetadataBloom, GGUFMetadataFalcon, GGUFMetadataGPT2, GGUFMetadataGPTJ, GGUFMetadataGPTNeoX, GGUFMetadataLLAMA, GGUFMetadataMPT, GGUFMetadataRWKV } from "./GGUFTypes.js";
2
+ export declare function isLlamaMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataLLAMA;
3
+ export declare function isMPTMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataMPT;
4
+ export declare function isGPTNeoXMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataGPTNeoX;
5
+ export declare function isGPTJMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataGPTJ;
6
+ export declare function isGPT2Metadata(metadata: GGUFMetadata): metadata is GGUFMetadataGPT2;
7
+ export declare function isBloomMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataBloom;
8
+ export declare function isFalconMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataFalcon;
9
+ export declare function isRWKVMetadata(metadata: GGUFMetadata): metadata is GGUFMetadataRWKV;
10
+ /**
11
+ * https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#general-metadata
12
+ * Convert file type from string to int
13
+ */
14
+ export declare function fileTypeIntToString(fileType?: number): "ALL_F32" | "MOSTLY_F16" | "MOSTLY_Q4_0" | "MOSTLY_Q4_1" | "MOSTLY_Q4_1_SOME_F16" | "MOSTLY_Q4_2" | "MOSTLY_Q4_3" | "MOSTLY_Q8_0" | "MOSTLY_Q5_0" | "MOSTLY_Q5_1" | "MOSTLY_Q2_K" | "MOSTLY_Q3_K_S" | "MOSTLY_Q3_K_M" | "MOSTLY_Q3_K_L" | "MOSTLY_Q4_K_S" | "MOSTLY_Q4_K_M" | "MOSTLY_Q5_K_S" | "MOSTLY_Q5_K_M" | "MOSTLY_Q6_K" | undefined;
@@ -0,0 +1,74 @@
1
+ export function isLlamaMetadata(metadata) {
2
+ return metadata.general.architecture === "llama";
3
+ }
4
+ export function isMPTMetadata(metadata) {
5
+ return metadata.general.architecture === "mpt";
6
+ }
7
+ export function isGPTNeoXMetadata(metadata) {
8
+ return metadata.general.architecture === "gptneox";
9
+ }
10
+ export function isGPTJMetadata(metadata) {
11
+ return metadata.general.architecture === "gptj";
12
+ }
13
+ export function isGPT2Metadata(metadata) {
14
+ return metadata.general.architecture === "gpt2";
15
+ }
16
+ export function isBloomMetadata(metadata) {
17
+ return metadata.general.architecture === "bloom";
18
+ }
19
+ export function isFalconMetadata(metadata) {
20
+ return metadata.general.architecture === "falcon";
21
+ }
22
+ export function isRWKVMetadata(metadata) {
23
+ return metadata.general.architecture === "rwkv";
24
+ }
25
+ /**
26
+ * https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#general-metadata
27
+ * Convert file type from string to int
28
+ */
29
+ export function fileTypeIntToString(fileType) {
30
+ if (fileType == null)
31
+ return;
32
+ switch (fileType) {
33
+ case 0:
34
+ return "ALL_F32";
35
+ case 1:
36
+ return "MOSTLY_F16";
37
+ case 2:
38
+ return "MOSTLY_Q4_0";
39
+ case 3:
40
+ return "MOSTLY_Q4_1";
41
+ case 4:
42
+ return "MOSTLY_Q4_1_SOME_F16";
43
+ case 5:
44
+ return "MOSTLY_Q4_2";
45
+ case 6:
46
+ return "MOSTLY_Q4_3";
47
+ case 7:
48
+ return "MOSTLY_Q8_0";
49
+ case 8:
50
+ return "MOSTLY_Q5_0";
51
+ case 9:
52
+ return "MOSTLY_Q5_1";
53
+ case 10:
54
+ return "MOSTLY_Q2_K";
55
+ case 11:
56
+ return "MOSTLY_Q3_K_S";
57
+ case 12:
58
+ return "MOSTLY_Q3_K_M";
59
+ case 13:
60
+ return "MOSTLY_Q3_K_L";
61
+ case 14:
62
+ return "MOSTLY_Q4_K_S";
63
+ case 15:
64
+ return "MOSTLY_Q4_K_M";
65
+ case 16:
66
+ return "MOSTLY_Q5_K_S";
67
+ case 17:
68
+ return "MOSTLY_Q5_K_M";
69
+ case 18:
70
+ return "MOSTLY_Q6_K";
71
+ }
72
+ return;
73
+ }
74
+ //# sourceMappingURL=checkArchitecture.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"checkArchitecture.js","sourceRoot":"","sources":["../../../src/gguf/ggufParser/checkArchitecture.ts"],"names":[],"mappings":"AAWA,MAAM,UAAU,eAAe,CAAE,QAAsB;IACnD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,OAAO,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,aAAa,CAAE,QAAsB;IACjD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,KAAK,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAE,QAAsB;IACrD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,SAAS,CAAC;AACvD,CAAC;AAED,MAAM,UAAU,cAAc,CAAE,QAAsB;IAClD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,MAAM,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,cAAc,CAAE,QAAsB;IAClD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,MAAM,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,eAAe,CAAE,QAAsB;IACnD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,OAAO,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAE,QAAsB;IACpD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,QAAQ,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,cAAc,CAAE,QAAsB;IAClD,OAAO,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,MAAM,CAAC;AACpD,CAAC;AAGD;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAAiB;IACjD,IAAI,QAAQ,IAAI,IAAI;QAAE,OAAO;IAC7B,QAAQ,QAAQ,EAAE;QACd,KAAK,CAAC;YACF,OAAO,SAAS,CAAC;QACrB,KAAK,CAAC;YACF,OAAO,YAAY,CAAC;QACxB,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,CAAC;YACF,OAAO,sBAAsB,CAAC;QAClC,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,CAAC;YACF,OAAO,aAAa,CAAC;QACzB,KAAK,EAAE;YACH,OAAO,aAAa,CAAC;QACzB,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,eAAe,CAAC;QAC3B,KAAK,EAAE;YACH,OAAO,aAAa,CAAC;KAC5B;IAED,OAAO;AACX,CAAC"}
@@ -0,0 +1,38 @@
1
+ /// <reference types="node" />
2
+ import { Buffer } from "buffer";
3
+ export declare const METHOD_TO_BYTE_COUNT: {
4
+ readUint8: number;
5
+ readUint16: number;
6
+ readUint32: number;
7
+ readUint64: number;
8
+ readInt8: number;
9
+ readInt16: number;
10
+ readInt32: number;
11
+ readInt64: number;
12
+ readFloat32: number;
13
+ readFloat64: number;
14
+ readBool: number;
15
+ };
16
+ export declare const ALLOCATION_SIZE: number;
17
+ export default abstract class GGUFBaseStream {
18
+ protected _buffer: Buffer;
19
+ protected constructor();
20
+ abstract readNBytes(numBytes: number, offset?: number): Promise<Buffer>;
21
+ readUint8(offset: number): Promise<number>;
22
+ readUint16(offset: number): Promise<number>;
23
+ readUint32(offset: number): Promise<number>;
24
+ readUint64(offset: number): Promise<bigint>;
25
+ readInt8(offset: number): Promise<number>;
26
+ readInt16(offset: number): Promise<number>;
27
+ readInt32(offset: number): Promise<number>;
28
+ readInt64(offset: number): Promise<bigint>;
29
+ readFloat32(offset: number): Promise<number>;
30
+ readFloat64(offset: number): Promise<number>;
31
+ readBool(offset: number): Promise<boolean>;
32
+ readString(offset: number): Promise<{
33
+ string: string;
34
+ newOffset: number;
35
+ }>;
36
+ protected _addToBuffer(buffer: Buffer): void;
37
+ static castNumber(value: bigint): number | bigint;
38
+ }
@@ -0,0 +1,83 @@
1
+ import { Buffer } from "buffer";
2
+ export const METHOD_TO_BYTE_COUNT = {
3
+ readUint8: 1,
4
+ readUint16: 2,
5
+ readUint32: 4,
6
+ readUint64: 8,
7
+ readInt8: 1,
8
+ readInt16: 2,
9
+ readInt32: 4,
10
+ readInt64: 8,
11
+ readFloat32: 4,
12
+ readFloat64: 8,
13
+ readBool: 1
14
+ };
15
+ export const ALLOCATION_SIZE = 1024 * 1024 * 1.5; // 1.5MB
16
+ export default class GGUFBaseStream {
17
+ _buffer = Buffer.alloc(0);
18
+ constructor() {
19
+ }
20
+ async readUint8(offset) {
21
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readUint8, offset);
22
+ return response.readUInt8();
23
+ }
24
+ async readUint16(offset) {
25
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readUint16, offset);
26
+ return response.readUInt16LE();
27
+ }
28
+ async readUint32(offset) {
29
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readUint32, offset);
30
+ return response.readUInt32LE();
31
+ }
32
+ async readUint64(offset) {
33
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readUint64, offset);
34
+ return response.readBigUInt64LE();
35
+ }
36
+ async readInt8(offset) {
37
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readInt8, offset);
38
+ return response.readInt8();
39
+ }
40
+ async readInt16(offset) {
41
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readInt16, offset);
42
+ return response.readInt16LE();
43
+ }
44
+ async readInt32(offset) {
45
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readInt32, offset);
46
+ return response.readInt32LE();
47
+ }
48
+ async readInt64(offset) {
49
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readInt64, offset);
50
+ return response.readBigInt64LE();
51
+ }
52
+ async readFloat32(offset) {
53
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readFloat32, offset);
54
+ return response.readFloatLE();
55
+ }
56
+ async readFloat64(offset) {
57
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readFloat64, offset);
58
+ return response.readDoubleLE();
59
+ }
60
+ async readBool(offset) {
61
+ const response = await this.readNBytes(METHOD_TO_BYTE_COUNT.readUint8, offset);
62
+ return response.readUInt8() === 1;
63
+ }
64
+ async readString(offset) {
65
+ const length = Number(await this.readUint64(offset));
66
+ offset += METHOD_TO_BYTE_COUNT.readUint64;
67
+ const readLength = METHOD_TO_BYTE_COUNT.readUint8 * length;
68
+ const stringBytes = await this.readNBytes(readLength, offset);
69
+ return {
70
+ string: String.fromCharCode(...stringBytes),
71
+ newOffset: offset + readLength
72
+ };
73
+ }
74
+ _addToBuffer(buffer) {
75
+ this._buffer = Buffer.concat([this._buffer, buffer]);
76
+ }
77
+ static castNumber(value) {
78
+ if (value > Number.MAX_SAFE_INTEGER)
79
+ return value;
80
+ return Number(value);
81
+ }
82
+ }
83
+ //# sourceMappingURL=GGUFBaseStream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GGUFBaseStream.js","sourceRoot":"","sources":["../../../../src/gguf/ggufParser/stream/GGUFBaseStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,MAAM,EAAC,MAAM,QAAQ,CAAC;AAE9B,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAChC,SAAS,EAAE,CAAC;IACZ,UAAU,EAAE,CAAC;IACb,UAAU,EAAE,CAAC;IACb,UAAU,EAAE,CAAC;IACb,QAAQ,EAAE,CAAC;IACX,SAAS,EAAE,CAAC;IACZ,SAAS,EAAE,CAAC;IACZ,SAAS,EAAE,CAAC;IACZ,WAAW,EAAE,CAAC;IACd,WAAW,EAAE,CAAC;IACd,QAAQ,EAAE,CAAC;CACd,CAAC;AAEF,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,GAAG,IAAI,GAAG,GAAG,CAAC,CAAC,QAAQ;AAE1D,MAAM,CAAC,OAAO,OAAgB,cAAc;IAC9B,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACpC;IACA,CAAC;IAIM,KAAK,CAAC,SAAS,CAAC,MAAc;QACjC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC/E,OAAO,QAAQ,CAAC,SAAS,EAAE,CAAC;IAChC,CAAC;IAEM,KAAK,CAAC,UAAU,CAAC,MAAc;QAClC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAChF,OAAO,QAAQ,CAAC,YAAY,EAAE,CAAC;IACnC,CAAC;IAEM,KAAK,CAAC,UAAU,CAAC,MAAc;QAClC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAChF,OAAO,QAAQ,CAAC,YAAY,EAAE,CAAC;IACnC,CAAC;IAEM,KAAK,CAAC,UAAU,CAAC,MAAc;QAClC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAChF,OAAO,QAAQ,CAAC,eAAe,EAAE,CAAC;IACtC,CAAC;IAEM,KAAK,CAAC,QAAQ,CAAC,MAAc;QAChC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC9E,OAAO,QAAQ,CAAC,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAc;QACjC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC/E,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC;IAClC,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAc;QACjC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC/E,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC;IAClC,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAc;QACjC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC/E,OAAO,QAAQ,CAAC,cAAc,EAAE,CAAC;IACrC,CAAC;IAEM,KAAK,CAAC,WAAW,CAAC,MAAc;QACnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;QACjF,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC;IAClC,CAAC;IAEM,KAAK,CAAC,WAAW,CAAC,MAAc;QACnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;QACjF,OAAO,QAAQ,CAAC,YAAY,EAAE,CAAC;IACnC,CAAC;IAEM,KAAK,CAAC,QAAQ,CAAC,MAAc;QAChC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC/E,OAAO,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACtC,CAAC;IAEM,KAAK,CAAC,UAAU,CAAC,MAAc;QAClC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;QACrD,MAAM,IAAI,oBAAoB,CAAC,UAAU,CAAC;QAE1C,MAAM,UAAU,GAAG,oBAAoB,CAAC,SAAS,GAAG,MAAM,CAAC;QAC3D,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAE9D,OAAO;YACH,MAAM,EAAE,MAAM,CAAC,YAAY,CAAC,GAAG,WAAW,CAAC;YAC3C,SAAS,EAAE,MAAM,GAAG,UAAU;SACjC,CAAC;IACN,CAAC;IAES,YAAY,CAAC,MAAc;QACjC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;IACzD,CAAC;IAEM,MAAM,CAAC,UAAU,CAAC,KAAa;QAClC,IAAI,KAAK,GAAG,MAAM,CAAC,gBAAgB;YAAE,OAAO,KAAK,CAAC;QAClD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACzB,CAAC;CACJ"}
@@ -0,0 +1,14 @@
1
+ /// <reference types="node" />
2
+ import retry from "async-retry";
3
+ import GgufBaseStream from "./GGUFBaseStream.js";
4
+ type GGUFFetchStreamOptions = {
5
+ retry: retry.Options;
6
+ };
7
+ export default class GGUFFetchStream extends GgufBaseStream {
8
+ readonly url: string;
9
+ readonly options: Partial<GGUFFetchStreamOptions>;
10
+ constructor(url: string, options?: Partial<GGUFFetchStreamOptions>);
11
+ readNBytes(numBytes: number, offset?: number): Promise<Buffer>;
12
+ private _fetchBytesWithoutRetry;
13
+ }
14
+ export {};
@@ -0,0 +1,35 @@
1
+ import retry from "async-retry";
2
+ import { withLock } from "lifecycle-utils";
3
+ import GgufBaseStream, { ALLOCATION_SIZE } from "./GGUFBaseStream.js";
4
+ export default class GGUFFetchStream extends GgufBaseStream {
5
+ url;
6
+ options = {};
7
+ constructor(url, options = {}) {
8
+ super();
9
+ this.options = options;
10
+ this.url = url;
11
+ }
12
+ async readNBytes(numBytes, offset = 0) {
13
+ return await withLock(this, "_lock", async function readNBytesWithoutLock() {
14
+ if (offset + numBytes < this._buffer.length) {
15
+ return this._buffer.subarray(offset, offset + numBytes);
16
+ }
17
+ const fetchMissingBytes = await retry(async () => {
18
+ return await this._fetchBytesWithoutRetry(this._buffer.length, offset + numBytes + ALLOCATION_SIZE);
19
+ }, this.options.retry);
20
+ this._addToBuffer(fetchMissingBytes);
21
+ return await readNBytesWithoutLock.call(this);
22
+ });
23
+ }
24
+ async _fetchBytesWithoutRetry(start, end) {
25
+ const response = await fetch(this.url, {
26
+ headers: {
27
+ Range: `bytes=${start}-${end}`,
28
+ accept: "*/*"
29
+ }
30
+ });
31
+ const arrayBuffer = await response.arrayBuffer();
32
+ return Buffer.from(arrayBuffer);
33
+ }
34
+ }
35
+ //# sourceMappingURL=GGUFFetchStream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GGUFFetchStream.js","sourceRoot":"","sources":["../../../../src/gguf/ggufParser/stream/GGUFFetchStream.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,aAAa,CAAC;AAChC,OAAO,EAAC,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AAEzC,OAAO,cAAc,EAAE,EAAC,eAAe,EAAC,MAAM,qBAAqB,CAAC;AAMpE,MAAM,CAAC,OAAO,OAAO,eAAgB,SAAQ,cAAc;IACvC,GAAG,CAAS;IACZ,OAAO,GAAoC,EAAE,CAAC;IAE9D,YAAmB,GAAW,EAAE,UAA2C,EAAE;QACzE,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC;IACnB,CAAC;IAEe,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,MAAM,GAAG,CAAC;QACzD,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,UAAU,qBAAqB;YACrE,IAAI,MAAM,GAAG,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACzC,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,QAAQ,CAAC,CAAC;aAC3D;YAED,MAAM,iBAAiB,GAAG,MAAM,KAAK,CAAC,KAAK,IAAI,EAAE;gBAC7C,OAAO,MAAM,IAAI,CAAC,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,QAAQ,GAAG,eAAe,CAAC,CAAC;YACxG,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAEvB,IAAI,CAAC,YAAY,CAAC,iBAAiB,CAAC,CAAC;YACrC,OAAO,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,uBAAuB,CAAC,KAAa,EAAE,GAAW;QAC5D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE;YACnC,OAAO,EAAE;gBACL,KAAK,EAAE,SAAS,KAAK,IAAI,GAAG,EAAE;gBAC9B,MAAM,EAAE,KAAK;aAChB;SACJ,CAAC,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC;CACJ"}
@@ -0,0 +1,15 @@
1
+ /// <reference types="node" />
2
+ import retry from "async-retry";
3
+ import GgufBaseStream from "./GGUFBaseStream.js";
4
+ type GGUFReadStreamOptions = {
5
+ retry?: retry.Options;
6
+ mode: string;
7
+ };
8
+ export default class GGUFReadStream extends GgufBaseStream {
9
+ readonly options: GGUFReadStreamOptions;
10
+ readonly path: string;
11
+ constructor(path: string, options?: Partial<GGUFReadStreamOptions>);
12
+ readNBytes(numBytes: number, offset?: number): Promise<Buffer>;
13
+ private _readBytesWithoutRetry;
14
+ }
15
+ export {};
@@ -0,0 +1,40 @@
1
+ import fs from "fs/promises";
2
+ import retry from "async-retry";
3
+ import { withLock } from "lifecycle-utils";
4
+ import GgufBaseStream, { ALLOCATION_SIZE } from "./GGUFBaseStream.js";
5
+ const DEFAULT_OPTIONS = {
6
+ mode: "r"
7
+ };
8
+ export default class GGUFReadStream extends GgufBaseStream {
9
+ options;
10
+ path;
11
+ constructor(path, options = {}) {
12
+ super();
13
+ this.path = path;
14
+ this.options = { ...DEFAULT_OPTIONS, ...options };
15
+ }
16
+ async readNBytes(numBytes, offset = 0) {
17
+ return await withLock(this, "_lock", async function readNBytesWithoutLock() {
18
+ if (offset + numBytes < this._buffer.length) {
19
+ return this._buffer.subarray(offset, offset + numBytes);
20
+ }
21
+ const readMissingBytes = await retry(async () => {
22
+ return await this._readBytesWithoutRetry(numBytes + ALLOCATION_SIZE, this._buffer.length);
23
+ }, this.options.retry);
24
+ this._addToBuffer(readMissingBytes);
25
+ return await readNBytesWithoutLock.call(this);
26
+ });
27
+ }
28
+ async _readBytesWithoutRetry(numBytes, offset) {
29
+ const fd = await fs.open(this.path, this.options.mode);
30
+ try {
31
+ const buffer = Buffer.alloc(numBytes);
32
+ await fd.read(buffer, 0, numBytes, offset);
33
+ return buffer;
34
+ }
35
+ finally {
36
+ await fd.close();
37
+ }
38
+ }
39
+ }
40
+ //# sourceMappingURL=GGUFReadStream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GGUFReadStream.js","sourceRoot":"","sources":["../../../../src/gguf/ggufParser/stream/GGUFReadStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,KAAK,MAAM,aAAa,CAAC;AAChC,OAAO,EAAC,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AACzC,OAAO,cAAc,EAAE,EAAC,eAAe,EAAC,MAAM,qBAAqB,CAAC;AAOpE,MAAM,eAAe,GAA0B;IAC3C,IAAI,EAAE,GAAG;CACZ,CAAC;AAEF,MAAM,CAAC,OAAO,OAAO,cAAe,SAAQ,cAAc;IACtC,OAAO,CAAwB;IAC/B,IAAI,CAAS;IAE7B,YAAmB,IAAY,EAAE,UAA0C,EAAE;QACzE,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,OAAO,GAAG,EAAC,GAAG,eAAe,EAAE,GAAG,OAAO,EAAC,CAAC;IACpD,CAAC;IAEe,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,MAAM,GAAG,CAAC;QACzD,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,UAAU,qBAAqB;YACrE,IAAI,MAAM,GAAG,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACzC,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,QAAQ,CAAC,CAAC;aAC3D;YAED,MAAM,gBAAgB,GAAG,MAAM,KAAK,CAAC,KAAK,IAAI,EAAE;gBAC5C,OAAO,MAAM,IAAI,CAAC,sBAAsB,CAAC,QAAQ,GAAG,eAAe,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YAC9F,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAEvB,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,CAAC;YACpC,OAAO,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,sBAAsB,CAAC,QAAgB,EAAE,MAAc;QACjE,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACvD,IAAI;YACA,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACtC,MAAM,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;YAC3C,OAAO,MAAM,CAAC;SACjB;gBAAS;YACN,MAAM,EAAE,CAAC,KAAK,EAAE,CAAC;SACpB;IACL,CAAC;CACJ"}
package/dist/index.d.ts CHANGED
@@ -24,6 +24,8 @@ import { ChatMLChatWrapper } from "./chatWrappers/ChatMLChatWrapper.js";
24
24
  import { FalconChatWrapper } from "./chatWrappers/FalconChatWrapper.js";
25
25
  import { AlpacaChatWrapper } from "./chatWrappers/AlpacaChatWrapper.js";
26
26
  import { FunctionaryChatWrapper } from "./chatWrappers/FunctionaryChatWrapper.js";
27
+ import { GemmaChatWrapper } from "./chatWrappers/GemmaChatWrapper.js";
28
+ import { TemplateChatWrapper } from "./TemplateChatWrapper.js";
27
29
  import { resolveChatWrapperBasedOnModel } from "./chatWrappers/resolveChatWrapperBasedOnModel.js";
28
30
  import { resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames, type ChatWrapperTypeName } from "./bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js";
29
31
  import { LlamaText, SpecialToken, BuiltinSpecialToken, isLlamaText, tokenizeText, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokenJSON } from "./utils/LlamaText.js";
@@ -31,4 +33,4 @@ import { appendUserMessageToChatHistory } from "./utils/appendUserMessageToChatH
31
33
  import { getModuleVersion } from "./utils/getModuleVersion.js";
32
34
  import { type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, isChatModelResponseFunctionCall, type LLamaContextualRepeatPenalty } from "./types.js";
33
35
  import { type GbnfJsonArraySchema, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonObjectSchema, type GbnfJsonOneOfSchema, type GbnfJsonSchema, type GbnfJsonSchemaImmutableType, type GbnfJsonSchemaToType } from "./utils/gbnfJson/types.js";
34
- export { Llama, getLlama, type LlamaOptions, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizeStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, LlamaEmbedding, type LlamaEmbeddingJSON, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LlamaChatSessionRepeatPenalty, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, UnsupportedError, DisposedError, ChatWrapper, type ChatWrapperSettings, EmptyChatWrapper, LlamaChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, resolveChatWrapperBasedOnModel, resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames, type ChatWrapperTypeName, LlamaText, SpecialToken, BuiltinSpecialToken, isLlamaText, tokenizeText, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokenJSON, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema };
36
+ export { Llama, getLlama, type LlamaOptions, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizeStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, LlamaEmbedding, type LlamaEmbeddingJSON, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LlamaChatSessionRepeatPenalty, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, UnsupportedError, DisposedError, ChatWrapper, type ChatWrapperSettings, EmptyChatWrapper, LlamaChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, resolveChatWrapperBasedOnModel, resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames, type ChatWrapperTypeName, LlamaText, SpecialToken, BuiltinSpecialToken, isLlamaText, tokenizeText, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokenJSON, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema };
package/dist/index.js CHANGED
@@ -23,11 +23,13 @@ import { ChatMLChatWrapper } from "./chatWrappers/ChatMLChatWrapper.js";
23
23
  import { FalconChatWrapper } from "./chatWrappers/FalconChatWrapper.js";
24
24
  import { AlpacaChatWrapper } from "./chatWrappers/AlpacaChatWrapper.js";
25
25
  import { FunctionaryChatWrapper } from "./chatWrappers/FunctionaryChatWrapper.js";
26
+ import { GemmaChatWrapper } from "./chatWrappers/GemmaChatWrapper.js";
27
+ import { TemplateChatWrapper } from "./TemplateChatWrapper.js";
26
28
  import { resolveChatWrapperBasedOnModel } from "./chatWrappers/resolveChatWrapperBasedOnModel.js";
27
29
  import { resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames } from "./bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js";
28
30
  import { LlamaText, SpecialToken, BuiltinSpecialToken, isLlamaText, tokenizeText } from "./utils/LlamaText.js";
29
31
  import { appendUserMessageToChatHistory } from "./utils/appendUserMessageToChatHistory.js";
30
32
  import { getModuleVersion } from "./utils/getModuleVersion.js";
31
33
  import { isChatModelResponseFunctionCall } from "./types.js";
32
- export { Llama, getLlama, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, LlamaGrammar, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, LlamaContext, LlamaContextSequence, LlamaEmbeddingContext, LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, LlamaChat, LlamaCompletion, UnsupportedError, DisposedError, ChatWrapper, EmptyChatWrapper, LlamaChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, resolveChatWrapperBasedOnModel, resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames, LlamaText, SpecialToken, BuiltinSpecialToken, isLlamaText, tokenizeText, appendUserMessageToChatHistory, getModuleVersion, isChatModelResponseFunctionCall };
34
+ export { Llama, getLlama, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, LlamaGrammar, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, LlamaContext, LlamaContextSequence, LlamaEmbeddingContext, LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, LlamaChat, LlamaCompletion, UnsupportedError, DisposedError, ChatWrapper, EmptyChatWrapper, LlamaChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, resolveChatWrapperBasedOnModel, resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames, LlamaText, SpecialToken, BuiltinSpecialToken, isLlamaText, tokenizeText, appendUserMessageToChatHistory, getModuleVersion, isChatModelResponseFunctionCall };
33
35
  //# sourceMappingURL=index.js.map