@aci-metrics/score 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +228 -0
- package/aci-score.js +861 -0
- package/config/default.json +29 -0
- package/config/models.json +49 -0
- package/lib/provider-factory.js +181 -0
- package/lib/providers/base.js +218 -0
- package/lib/providers/node-llama-cpp.js +196 -0
- package/lib/providers/ollama.js +432 -0
- package/models/.gitkeep +2 -0
- package/package.json +31 -0
- package/prompts/gemma.txt +15 -0
- package/prompts/llama.txt +17 -0
- package/prompts/phi.txt +16 -0
- package/prompts/qwen.txt +18 -0
- package/test-model.js +232 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"model": {
|
|
3
|
+
"id": "qwen2.5-1.5b",
|
|
4
|
+
"comment": "Change this to switch models: qwen2.5-1.5b, phi-3.5-mini, llama-3.2-3b, gemma-2-2b"
|
|
5
|
+
},
|
|
6
|
+
"runtime": {
|
|
7
|
+
"provider": "node-llama-cpp",
|
|
8
|
+
"comment": "Options: node-llama-cpp (local GGUF) or ollama (HTTP API)"
|
|
9
|
+
},
|
|
10
|
+
"inference": {
|
|
11
|
+
"temperature": 0.1,
|
|
12
|
+
"maxTokens": 1024,
|
|
13
|
+
"comment": "Low temperature for consistent structured output"
|
|
14
|
+
},
|
|
15
|
+
"schemas": {
|
|
16
|
+
"taskClassification": "schemas/task-classification.json",
|
|
17
|
+
"archetypeAssignment": "schemas/archetype-assignment.json",
|
|
18
|
+
"comment": "Paths to JSON schema files for grammar enforcement"
|
|
19
|
+
},
|
|
20
|
+
"paths": {
|
|
21
|
+
"modelsDir": "models",
|
|
22
|
+
"promptsDir": "prompts",
|
|
23
|
+
"schemasDir": "schemas"
|
|
24
|
+
},
|
|
25
|
+
"ollama": {
|
|
26
|
+
"host": "http://localhost:11434",
|
|
27
|
+
"comment": "Ollama server endpoint (only used when runtime.provider is ollama)"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"models": {
|
|
3
|
+
"qwen2.5-1.5b": {
|
|
4
|
+
"id": "qwen2.5-1.5b",
|
|
5
|
+
"name": "Qwen 2.5 1.5B Instruct",
|
|
6
|
+
"filename": "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf",
|
|
7
|
+
"huggingfaceRepo": "bartowski/Qwen2.5-1.5B-Instruct-GGUF",
|
|
8
|
+
"downloadCommand": "huggingface-cli download bartowski/Qwen2.5-1.5B-Instruct-GGUF Qwen2.5-1.5B-Instruct-Q4_K_M.gguf --local-dir ./models",
|
|
9
|
+
"promptTemplate": "prompts/qwen.txt",
|
|
10
|
+
"ollamaModel": "qwen2.5:1.5b",
|
|
11
|
+
"sizeGB": 0.9,
|
|
12
|
+
"description": "Excellent instruction following, reliable JSON output. Good balance of speed and quality."
|
|
13
|
+
},
|
|
14
|
+
"phi-3.5-mini": {
|
|
15
|
+
"id": "phi-3.5-mini",
|
|
16
|
+
"name": "Phi 3.5 Mini Instruct",
|
|
17
|
+
"filename": "Phi-3.5-mini-instruct-Q4_K_M.gguf",
|
|
18
|
+
"huggingfaceRepo": "bartowski/Phi-3.5-mini-instruct-GGUF",
|
|
19
|
+
"downloadCommand": "huggingface-cli download bartowski/Phi-3.5-mini-instruct-GGUF Phi-3.5-mini-instruct-Q4_K_M.gguf --local-dir ./models",
|
|
20
|
+
"promptTemplate": "prompts/phi.txt",
|
|
21
|
+
"ollamaModel": "phi3.5:3.8b",
|
|
22
|
+
"sizeGB": 2.2,
|
|
23
|
+
"description": "Microsoft's compact model. Strong reasoning capabilities for its size."
|
|
24
|
+
},
|
|
25
|
+
"llama-3.2-3b": {
|
|
26
|
+
"id": "llama-3.2-3b",
|
|
27
|
+
"name": "Llama 3.2 3B Instruct",
|
|
28
|
+
"filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf",
|
|
29
|
+
"huggingfaceRepo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
|
|
30
|
+
"downloadCommand": "huggingface-cli download bartowski/Llama-3.2-3B-Instruct-GGUF Llama-3.2-3B-Instruct-Q4_K_M.gguf --local-dir ./models",
|
|
31
|
+
"promptTemplate": "prompts/llama.txt",
|
|
32
|
+
"ollamaModel": "llama3.2:3b",
|
|
33
|
+
"sizeGB": 1.8,
|
|
34
|
+
"description": "Meta's latest small model. Excellent general capabilities."
|
|
35
|
+
},
|
|
36
|
+
"gemma-2-2b": {
|
|
37
|
+
"id": "gemma-2-2b",
|
|
38
|
+
"name": "Gemma 2 2B Instruct",
|
|
39
|
+
"filename": "gemma-2-2b-it-Q4_K_M.gguf",
|
|
40
|
+
"huggingfaceRepo": "bartowski/gemma-2-2b-it-GGUF",
|
|
41
|
+
"downloadCommand": "huggingface-cli download bartowski/gemma-2-2b-it-GGUF gemma-2-2b-it-Q4_K_M.gguf --local-dir ./models",
|
|
42
|
+
"promptTemplate": "prompts/gemma.txt",
|
|
43
|
+
"ollamaModel": "gemma2:2b",
|
|
44
|
+
"sizeGB": 1.4,
|
|
45
|
+
"description": "Google's efficient model. Fast inference with good quality."
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
"default": "qwen2.5-1.5b"
|
|
49
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider Factory - Creates the appropriate inference provider
|
|
3
|
+
*
|
|
4
|
+
* This module reads the configuration files and returns the correct
|
|
5
|
+
* provider instance based on the settings. It's the main entry point
|
|
6
|
+
* for getting an inference provider.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* const createProvider = require('./lib/provider-factory');
|
|
10
|
+
* const provider = createProvider();
|
|
11
|
+
* await provider.initialize();
|
|
12
|
+
* const result = await provider.generate(prompt, schema);
|
|
13
|
+
* await provider.destroy();
|
|
14
|
+
*
|
|
15
|
+
* To switch models or runtimes, edit config/default.json:
|
|
16
|
+
* - Change model.id to use a different model
|
|
17
|
+
* - Change runtime.provider to use a different inference backend
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
var fs = require('fs');
|
|
21
|
+
var path = require('path');
|
|
22
|
+
|
|
23
|
+
// Import provider implementations
|
|
24
|
+
var NodeLlamaCppProvider = require('./providers/node-llama-cpp');
|
|
25
|
+
var OllamaProvider = require('./providers/ollama');
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Load and parse a JSON configuration file
|
|
29
|
+
*
|
|
30
|
+
* @param {string} filePath - Path to the JSON file
|
|
31
|
+
* @returns {Object} Parsed JSON content
|
|
32
|
+
* @throws {Error} If file not found or invalid JSON
|
|
33
|
+
*/
|
|
34
|
+
function loadConfig(filePath) {
|
|
35
|
+
if (!fs.existsSync(filePath)) {
|
|
36
|
+
throw new Error('Configuration file not found: ' + filePath);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
var content = fs.readFileSync(filePath, 'utf-8');
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
return JSON.parse(content);
|
|
43
|
+
} catch (parseError) {
|
|
44
|
+
throw new Error('Invalid JSON in ' + filePath + ': ' + parseError.message);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Create an inference provider based on configuration
|
|
50
|
+
*
|
|
51
|
+
* Reads config/default.json and config/models.json to determine
|
|
52
|
+
* which provider to instantiate and with what settings.
|
|
53
|
+
*
|
|
54
|
+
* @param {Object} [options] - Optional overrides
|
|
55
|
+
* @param {string} [options.modelId] - Override the model ID
|
|
56
|
+
* @param {string} [options.provider] - Override the runtime provider
|
|
57
|
+
* @param {string} [options.basePath] - Override the base path for file resolution
|
|
58
|
+
* @returns {BaseProvider} Configured provider instance (not yet initialized)
|
|
59
|
+
*/
|
|
60
|
+
function createProvider(options) {
|
|
61
|
+
// Default options
|
|
62
|
+
options = options || {};
|
|
63
|
+
|
|
64
|
+
// Determine base path (where config and models directories are)
|
|
65
|
+
var basePath = options.basePath || path.join(__dirname, '..');
|
|
66
|
+
|
|
67
|
+
// Load configuration files
|
|
68
|
+
var configPath = path.join(basePath, 'config', 'default.json');
|
|
69
|
+
var modelsPath = path.join(basePath, 'config', 'models.json');
|
|
70
|
+
|
|
71
|
+
console.log('[provider-factory] Loading configuration...');
|
|
72
|
+
var config = loadConfig(configPath);
|
|
73
|
+
var modelsRegistry = loadConfig(modelsPath);
|
|
74
|
+
|
|
75
|
+
// Determine which model to use
|
|
76
|
+
var modelId = options.modelId || config.model.id || modelsRegistry.default;
|
|
77
|
+
console.log('[provider-factory] Selected model: ' + modelId);
|
|
78
|
+
|
|
79
|
+
// Get model configuration
|
|
80
|
+
var modelConfig = modelsRegistry.models[modelId];
|
|
81
|
+
if (!modelConfig) {
|
|
82
|
+
var availableModels = Object.keys(modelsRegistry.models);
|
|
83
|
+
throw new Error(
|
|
84
|
+
'Unknown model: ' + modelId + '\n' +
|
|
85
|
+
'Available models: ' + availableModels.join(', ')
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Determine which provider to use
|
|
90
|
+
var providerType = options.provider || config.runtime.provider || 'node-llama-cpp';
|
|
91
|
+
console.log('[provider-factory] Selected provider: ' + providerType);
|
|
92
|
+
|
|
93
|
+
// Create the appropriate provider
|
|
94
|
+
var provider;
|
|
95
|
+
switch (providerType) {
|
|
96
|
+
case 'node-llama-cpp':
|
|
97
|
+
provider = new NodeLlamaCppProvider(config, modelConfig, basePath);
|
|
98
|
+
break;
|
|
99
|
+
|
|
100
|
+
case 'ollama':
|
|
101
|
+
provider = new OllamaProvider(config, modelConfig, basePath);
|
|
102
|
+
break;
|
|
103
|
+
|
|
104
|
+
default:
|
|
105
|
+
throw new Error(
|
|
106
|
+
'Unknown provider: ' + providerType + '\n' +
|
|
107
|
+
'Available providers: node-llama-cpp, ollama'
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
console.log('[provider-factory] Provider created successfully');
|
|
112
|
+
return provider;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Get list of available models
|
|
117
|
+
*
|
|
118
|
+
* @param {string} [basePath] - Base path for config files
|
|
119
|
+
* @returns {Object} Models registry with all available models
|
|
120
|
+
*/
|
|
121
|
+
function getAvailableModels(basePath) {
|
|
122
|
+
basePath = basePath || path.join(__dirname, '..');
|
|
123
|
+
var modelsPath = path.join(basePath, 'config', 'models.json');
|
|
124
|
+
return loadConfig(modelsPath);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Get current configuration
|
|
129
|
+
*
|
|
130
|
+
* @param {string} [basePath] - Base path for config files
|
|
131
|
+
* @returns {Object} Current runtime configuration
|
|
132
|
+
*/
|
|
133
|
+
function getConfig(basePath) {
|
|
134
|
+
basePath = basePath || path.join(__dirname, '..');
|
|
135
|
+
var configPath = path.join(basePath, 'config', 'default.json');
|
|
136
|
+
return loadConfig(configPath);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Load a prompt template and replace placeholders
|
|
141
|
+
*
|
|
142
|
+
* @param {string} templatePath - Path to the template file (relative to basePath)
|
|
143
|
+
* @param {Object} variables - Key-value pairs to replace in the template
|
|
144
|
+
* @param {string} [basePath] - Base path for file resolution
|
|
145
|
+
* @returns {string} Processed prompt with placeholders replaced
|
|
146
|
+
*/
|
|
147
|
+
function loadPromptTemplate(templatePath, variables, basePath) {
|
|
148
|
+
basePath = basePath || path.join(__dirname, '..');
|
|
149
|
+
|
|
150
|
+
var fullPath = path.join(basePath, templatePath);
|
|
151
|
+
|
|
152
|
+
if (!fs.existsSync(fullPath)) {
|
|
153
|
+
throw new Error('Prompt template not found: ' + fullPath);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
var template = fs.readFileSync(fullPath, 'utf-8');
|
|
157
|
+
|
|
158
|
+
// Replace all {{VARIABLE}} placeholders
|
|
159
|
+
var variableNames = Object.keys(variables);
|
|
160
|
+
for (var i = 0; i < variableNames.length; i++) {
|
|
161
|
+
var name = variableNames[i];
|
|
162
|
+
var value = variables[name];
|
|
163
|
+
var placeholder = '{{' + name + '}}';
|
|
164
|
+
|
|
165
|
+
// Replace all occurrences
|
|
166
|
+
while (template.indexOf(placeholder) !== -1) {
|
|
167
|
+
template = template.replace(placeholder, value);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return template;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Export the factory function as the main export
|
|
175
|
+
module.exports = createProvider;
|
|
176
|
+
|
|
177
|
+
// Also export helper functions
|
|
178
|
+
module.exports.createProvider = createProvider;
|
|
179
|
+
module.exports.getAvailableModels = getAvailableModels;
|
|
180
|
+
module.exports.getConfig = getConfig;
|
|
181
|
+
module.exports.loadPromptTemplate = loadPromptTemplate;
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BaseProvider - Abstract base class for inference providers
|
|
3
|
+
*
|
|
4
|
+
* This defines the interface that all inference providers must implement.
|
|
5
|
+
* Both node-llama-cpp (local GGUF) and Ollama (HTTP API) providers extend this.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const provider = new SomeProvider(config, modelConfig);
|
|
9
|
+
* await provider.initialize();
|
|
10
|
+
* const result = await provider.generate(prompt, schema);
|
|
11
|
+
* await provider.destroy();
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Abstract base class for inference providers
|
|
16
|
+
*
|
|
17
|
+
* @class BaseProvider
|
|
18
|
+
*/
|
|
19
|
+
class BaseProvider {
|
|
20
|
+
/**
|
|
21
|
+
* Create a new provider instance
|
|
22
|
+
*
|
|
23
|
+
* @param {Object} config - Runtime configuration from config/default.json
|
|
24
|
+
* @param {Object} modelConfig - Model configuration from config/models.json
|
|
25
|
+
*/
|
|
26
|
+
constructor(config, modelConfig) {
|
|
27
|
+
// Store configuration for subclasses
|
|
28
|
+
this.config = config;
|
|
29
|
+
this.modelConfig = modelConfig;
|
|
30
|
+
|
|
31
|
+
// Track initialization state
|
|
32
|
+
this.isInitialized = false;
|
|
33
|
+
|
|
34
|
+
// Provider name for logging (override in subclass)
|
|
35
|
+
this.providerName = 'base';
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Initialize the provider
|
|
40
|
+
*
|
|
41
|
+
* This method must be called before generate(). It loads the model,
|
|
42
|
+
* establishes connections, or performs any other setup required.
|
|
43
|
+
*
|
|
44
|
+
* @abstract
|
|
45
|
+
* @returns {Promise<void>}
|
|
46
|
+
* @throws {Error} If initialization fails
|
|
47
|
+
*/
|
|
48
|
+
async initialize() {
|
|
49
|
+
throw new Error('BaseProvider.initialize() must be implemented by subclass');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Generate a response from the model
|
|
54
|
+
*
|
|
55
|
+
* Takes a prompt and optional JSON schema, returns parsed JSON response.
|
|
56
|
+
* The schema is used for grammar enforcement (node-llama-cpp) or
|
|
57
|
+
* manual validation (Ollama).
|
|
58
|
+
*
|
|
59
|
+
* @abstract
|
|
60
|
+
* @param {string} prompt - The prompt to send to the model
|
|
61
|
+
* @param {Object} [schema] - JSON schema for structured output
|
|
62
|
+
* @returns {Promise<Object>} Parsed JSON response from the model
|
|
63
|
+
* @throws {Error} If generation fails or response doesn't match schema
|
|
64
|
+
*/
|
|
65
|
+
async generate(prompt, schema) {
|
|
66
|
+
throw new Error('BaseProvider.generate() must be implemented by subclass');
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Clean up resources
|
|
71
|
+
*
|
|
72
|
+
* Call this when you're done with the provider. Releases model memory,
|
|
73
|
+
* closes connections, etc.
|
|
74
|
+
*
|
|
75
|
+
* @abstract
|
|
76
|
+
* @returns {Promise<void>}
|
|
77
|
+
*/
|
|
78
|
+
async destroy() {
|
|
79
|
+
throw new Error('BaseProvider.destroy() must be implemented by subclass');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Check if the provider is ready for generation
|
|
84
|
+
*
|
|
85
|
+
* @returns {boolean} True if initialized and ready
|
|
86
|
+
*/
|
|
87
|
+
isReady() {
|
|
88
|
+
return this.isInitialized;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Get provider information for logging
|
|
93
|
+
*
|
|
94
|
+
* @returns {Object} Provider info including name and model
|
|
95
|
+
*/
|
|
96
|
+
getInfo() {
|
|
97
|
+
return {
|
|
98
|
+
provider: this.providerName,
|
|
99
|
+
model: this.modelConfig ? this.modelConfig.id : 'unknown',
|
|
100
|
+
isReady: this.isInitialized
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Log a message with provider context
|
|
106
|
+
*
|
|
107
|
+
* @protected
|
|
108
|
+
* @param {string} message - Message to log
|
|
109
|
+
*/
|
|
110
|
+
log(message) {
|
|
111
|
+
console.log('[' + this.providerName + '] ' + message);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Log an error with provider context
|
|
116
|
+
*
|
|
117
|
+
* @protected
|
|
118
|
+
* @param {string} message - Error message
|
|
119
|
+
* @param {Error} [error] - Optional error object
|
|
120
|
+
*/
|
|
121
|
+
logError(message, error) {
|
|
122
|
+
console.error('[' + this.providerName + ' ERROR] ' + message);
|
|
123
|
+
if (error) {
|
|
124
|
+
console.error(' Details:', error.message);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Validate that a response matches a JSON schema
|
|
130
|
+
*
|
|
131
|
+
* This is a simple validator for required fields and basic types.
|
|
132
|
+
* For production use, consider a full JSON Schema validator like Ajv.
|
|
133
|
+
*
|
|
134
|
+
* @protected
|
|
135
|
+
* @param {Object} response - Parsed JSON response
|
|
136
|
+
* @param {Object} schema - JSON schema to validate against
|
|
137
|
+
* @returns {Object} Validation result with isValid and errors
|
|
138
|
+
*/
|
|
139
|
+
validateResponse(response, schema) {
|
|
140
|
+
var errors = [];
|
|
141
|
+
|
|
142
|
+
// Check if response is an object
|
|
143
|
+
if (typeof response !== 'object' || response === null) {
|
|
144
|
+
return {
|
|
145
|
+
isValid: false,
|
|
146
|
+
errors: ['Response must be an object']
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Check required fields
|
|
151
|
+
if (schema.required && Array.isArray(schema.required)) {
|
|
152
|
+
for (var i = 0; i < schema.required.length; i++) {
|
|
153
|
+
var field = schema.required[i];
|
|
154
|
+
if (!(field in response)) {
|
|
155
|
+
errors.push('Missing required field: ' + field);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Check property types
|
|
161
|
+
if (schema.properties) {
|
|
162
|
+
var properties = Object.keys(schema.properties);
|
|
163
|
+
for (var j = 0; j < properties.length; j++) {
|
|
164
|
+
var propName = properties[j];
|
|
165
|
+
var propSchema = schema.properties[propName];
|
|
166
|
+
|
|
167
|
+
// Skip if property not present (unless required, caught above)
|
|
168
|
+
if (!(propName in response)) {
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
var value = response[propName];
|
|
173
|
+
|
|
174
|
+
// Check type
|
|
175
|
+
if (propSchema.type) {
|
|
176
|
+
var expectedType = propSchema.type;
|
|
177
|
+
var actualType = typeof value;
|
|
178
|
+
|
|
179
|
+
// Handle array type specially
|
|
180
|
+
if (expectedType === 'array' && !Array.isArray(value)) {
|
|
181
|
+
errors.push(propName + ' must be an array');
|
|
182
|
+
} else if (expectedType === 'integer' || expectedType === 'number') {
|
|
183
|
+
if (actualType !== 'number') {
|
|
184
|
+
errors.push(propName + ' must be a number');
|
|
185
|
+
}
|
|
186
|
+
} else if (expectedType !== 'array' && actualType !== expectedType) {
|
|
187
|
+
errors.push(propName + ' must be ' + expectedType + ', got ' + actualType);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Check enum
|
|
192
|
+
if (propSchema.enum && Array.isArray(propSchema.enum)) {
|
|
193
|
+
if (propSchema.enum.indexOf(value) === -1) {
|
|
194
|
+
errors.push(propName + ' must be one of: ' + propSchema.enum.join(', '));
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Check minimum/maximum for numbers
|
|
199
|
+
if (typeof value === 'number') {
|
|
200
|
+
if (propSchema.minimum !== undefined && value < propSchema.minimum) {
|
|
201
|
+
errors.push(propName + ' must be >= ' + propSchema.minimum);
|
|
202
|
+
}
|
|
203
|
+
if (propSchema.maximum !== undefined && value > propSchema.maximum) {
|
|
204
|
+
errors.push(propName + ' must be <= ' + propSchema.maximum);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
isValid: errors.length === 0,
|
|
212
|
+
errors: errors
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Export the class
|
|
218
|
+
module.exports = BaseProvider;
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NodeLlamaCppProvider - Inference provider using node-llama-cpp
|
|
3
|
+
*
|
|
4
|
+
* This provider loads GGUF model files directly and uses node-llama-cpp
|
|
5
|
+
* for inference. It supports JSON schema grammar enforcement for
|
|
6
|
+
* guaranteed valid structured output.
|
|
7
|
+
*
|
|
8
|
+
* Benefits:
|
|
9
|
+
* - Air-gapped operation (no network required after download)
|
|
10
|
+
* - Grammar enforcement ensures valid JSON output
|
|
11
|
+
* - Native performance via llama.cpp
|
|
12
|
+
*
|
|
13
|
+
* Requirements:
|
|
14
|
+
* - GGUF model file in the models/ directory
|
|
15
|
+
* - node-llama-cpp installed (npm install)
|
|
16
|
+
* - Xcode Command Line Tools (macOS)
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
var path = require('path');
|
|
20
|
+
var fs = require('fs');
|
|
21
|
+
var BaseProvider = require('./base');
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Provider implementation for node-llama-cpp
|
|
25
|
+
*
|
|
26
|
+
* @class NodeLlamaCppProvider
|
|
27
|
+
* @extends BaseProvider
|
|
28
|
+
*/
|
|
29
|
+
class NodeLlamaCppProvider extends BaseProvider {
|
|
30
|
+
/**
|
|
31
|
+
* Create a new node-llama-cpp provider
|
|
32
|
+
*
|
|
33
|
+
* @param {Object} config - Runtime configuration
|
|
34
|
+
* @param {Object} modelConfig - Model configuration
|
|
35
|
+
* @param {string} basePath - Base path for resolving relative paths
|
|
36
|
+
*/
|
|
37
|
+
constructor(config, modelConfig, basePath) {
|
|
38
|
+
super(config, modelConfig);
|
|
39
|
+
|
|
40
|
+
this.providerName = 'node-llama-cpp';
|
|
41
|
+
this.basePath = basePath;
|
|
42
|
+
|
|
43
|
+
// These will be set during initialization
|
|
44
|
+
this.llama = null;
|
|
45
|
+
this.model = null;
|
|
46
|
+
this.context = null;
|
|
47
|
+
this.session = null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Initialize the model
|
|
52
|
+
*
|
|
53
|
+
* Loads the GGUF file and creates a chat session.
|
|
54
|
+
* This can take several seconds on first run.
|
|
55
|
+
*
|
|
56
|
+
* @returns {Promise<void>}
|
|
57
|
+
* @throws {Error} If model file not found or initialization fails
|
|
58
|
+
*/
|
|
59
|
+
async initialize() {
|
|
60
|
+
this.log('Initializing...');
|
|
61
|
+
|
|
62
|
+
// Build path to model file
|
|
63
|
+
var modelsDir = this.config.paths ? this.config.paths.modelsDir : 'models';
|
|
64
|
+
var modelPath = path.join(this.basePath, modelsDir, this.modelConfig.filename);
|
|
65
|
+
|
|
66
|
+
// Check if model file exists
|
|
67
|
+
this.log('Looking for model: ' + this.modelConfig.filename);
|
|
68
|
+
if (!fs.existsSync(modelPath)) {
|
|
69
|
+
var errorMessage = 'Model file not found: ' + modelPath + '\n\n';
|
|
70
|
+
errorMessage += 'Download the model with:\n';
|
|
71
|
+
errorMessage += ' ' + this.modelConfig.downloadCommand;
|
|
72
|
+
throw new Error(errorMessage);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
var stats = fs.statSync(modelPath);
|
|
76
|
+
var sizeMB = (stats.size / (1024 * 1024)).toFixed(2);
|
|
77
|
+
this.log('Model file found (' + sizeMB + ' MB)');
|
|
78
|
+
|
|
79
|
+
// Import node-llama-cpp (ESM module requires dynamic import)
|
|
80
|
+
this.log('Loading node-llama-cpp...');
|
|
81
|
+
var llamaModule = await import('node-llama-cpp');
|
|
82
|
+
var getLlama = llamaModule.getLlama;
|
|
83
|
+
var LlamaChatSession = llamaModule.LlamaChatSession;
|
|
84
|
+
|
|
85
|
+
// Store the chat session class for later use
|
|
86
|
+
this.LlamaChatSession = LlamaChatSession;
|
|
87
|
+
|
|
88
|
+
// Get the llama instance
|
|
89
|
+
this.log('Getting llama instance...');
|
|
90
|
+
this.llama = await getLlama();
|
|
91
|
+
|
|
92
|
+
// Load the model
|
|
93
|
+
this.log('Loading model (this may take a moment)...');
|
|
94
|
+
this.model = await this.llama.loadModel({
|
|
95
|
+
modelPath: modelPath
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Create a context
|
|
99
|
+
this.log('Creating context...');
|
|
100
|
+
this.context = await this.model.createContext();
|
|
101
|
+
|
|
102
|
+
// Create a chat session
|
|
103
|
+
this.log('Creating chat session...');
|
|
104
|
+
this.session = new this.LlamaChatSession({
|
|
105
|
+
contextSequence: this.context.getSequence()
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
this.isInitialized = true;
|
|
109
|
+
this.log('Ready');
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Generate a response with optional JSON schema enforcement
|
|
114
|
+
*
|
|
115
|
+
* @param {string} prompt - The prompt to send to the model
|
|
116
|
+
* @param {Object} [schema] - JSON schema for grammar enforcement
|
|
117
|
+
* @returns {Promise<Object>} Parsed JSON response
|
|
118
|
+
* @throws {Error} If not initialized or generation fails
|
|
119
|
+
*/
|
|
120
|
+
async generate(prompt, schema) {
|
|
121
|
+
// Check initialization
|
|
122
|
+
if (!this.isInitialized) {
|
|
123
|
+
throw new Error('Provider not initialized. Call initialize() first.');
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Get inference settings from config
|
|
127
|
+
var temperature = this.config.inference ? this.config.inference.temperature : 0.1;
|
|
128
|
+
var maxTokens = this.config.inference ? this.config.inference.maxTokens : 1024;
|
|
129
|
+
|
|
130
|
+
// Build generation options
|
|
131
|
+
var options = {
|
|
132
|
+
maxTokens: maxTokens,
|
|
133
|
+
temperature: temperature
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
// If schema provided, create grammar for enforcement
|
|
137
|
+
if (schema) {
|
|
138
|
+
this.log('Creating grammar from JSON schema...');
|
|
139
|
+
var grammar = await this.llama.createGrammarForJsonSchema(schema);
|
|
140
|
+
options.grammar = grammar;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Generate response
|
|
144
|
+
this.log('Generating response...');
|
|
145
|
+
var rawResponse = await this.session.prompt(prompt, options);
|
|
146
|
+
|
|
147
|
+
// Parse the JSON response
|
|
148
|
+
this.log('Parsing response...');
|
|
149
|
+
var parsed;
|
|
150
|
+
try {
|
|
151
|
+
parsed = JSON.parse(rawResponse);
|
|
152
|
+
} catch (parseError) {
|
|
153
|
+
this.logError('Failed to parse JSON response', parseError);
|
|
154
|
+
this.log('Raw response: ' + rawResponse);
|
|
155
|
+
throw new Error('Model returned invalid JSON: ' + parseError.message);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Validate against schema if provided
|
|
159
|
+
if (schema) {
|
|
160
|
+
var validation = this.validateResponse(parsed, schema);
|
|
161
|
+
if (!validation.isValid) {
|
|
162
|
+
this.logError('Response failed schema validation');
|
|
163
|
+
for (var i = 0; i < validation.errors.length; i++) {
|
|
164
|
+
this.log(' - ' + validation.errors[i]);
|
|
165
|
+
}
|
|
166
|
+
throw new Error('Schema validation failed: ' + validation.errors.join('; '));
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
this.log('Generation complete');
|
|
171
|
+
return parsed;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Clean up resources
|
|
176
|
+
*
|
|
177
|
+
* Releases model memory and closes the context.
|
|
178
|
+
*
|
|
179
|
+
* @returns {Promise<void>}
|
|
180
|
+
*/
|
|
181
|
+
async destroy() {
|
|
182
|
+
this.log('Cleaning up...');
|
|
183
|
+
|
|
184
|
+
// Clear references (garbage collection will handle cleanup)
|
|
185
|
+
this.session = null;
|
|
186
|
+
this.context = null;
|
|
187
|
+
this.model = null;
|
|
188
|
+
this.llama = null;
|
|
189
|
+
|
|
190
|
+
this.isInitialized = false;
|
|
191
|
+
this.log('Cleanup complete');
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Export the class
|
|
196
|
+
module.exports = NodeLlamaCppProvider;
|