objectivist-ner 0.0.4 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +536 -0
- package/dist/index.js.map +1 -0
- package/package.json +22 -8
- package/index.ts +0 -635
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
import { program } from "commander";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import os from "os";
|
|
4
|
+
import fs from "fs";
|
|
5
|
+
import { getLlama, LlamaChatSession, resolveModelFile } from "node-llama-cpp";
|
|
6
|
+
// === MODELS ===
|
|
7
|
+
const MODELS = {
|
|
8
|
+
fast: "hf:unsloth/Qwen3.5-0.8B-GGUF:Qwen3.5-0.8B-Q8_0.gguf",
|
|
9
|
+
balanced: "hf:unsloth/Qwen3.5-2B-GGUF:Qwen3.5-2B-Q8_0.gguf",
|
|
10
|
+
best: "hf:unsloth/Qwen3.5-4B-GGUF:Qwen3.5-4B-Q8_0.gguf",
|
|
11
|
+
};
|
|
12
|
+
// === SYSTEM PROMPTS ===
|
|
13
|
+
const SYSTEM_PROMPT = `You are a named entity recognition (NER) system. Your task is to extract entities from text.
|
|
14
|
+
|
|
15
|
+
Rules:
|
|
16
|
+
- "text" must be the EXACT substring from the input that refers to the entity. Do NOT paraphrase or include extra words.
|
|
17
|
+
- "class" is the entity type (e.g. person, animal, location, organization).
|
|
18
|
+
- "attributes" are properties of the entity found in context.
|
|
19
|
+
- Return one object per distinct entity mention.
|
|
20
|
+
- If no entities are found, return an empty array [].`;
|
|
21
|
+
const FEW_SHOT_EXAMPLES = `Example 1:
|
|
22
|
+
Input: "the cat is blue and is feeling sad"
|
|
23
|
+
Output: [{"class":"animal","text":"cat","attributes":{"color":"blue","emotional_state":"sad"}}]
|
|
24
|
+
|
|
25
|
+
Example 2:
|
|
26
|
+
Input: "John Smith lives in New York City and works at Google"
|
|
27
|
+
Output: [{"class":"person","text":"John Smith","attributes":{"location":"New York City","employer":"Google"}},{"class":"location","text":"New York City","attributes":{}},{"class":"organization","text":"Google","attributes":{}}]
|
|
28
|
+
|
|
29
|
+
Example 3:
|
|
30
|
+
Input: "The quick brown fox jumps over the lazy dog near the river"
|
|
31
|
+
Output: [{"class":"animal","text":"fox","attributes":{"color":"brown","speed":"quick"}},{"class":"animal","text":"dog","attributes":{"temperament":"lazy"}},{"class":"location","text":"the river","attributes":{}}]
|
|
32
|
+
|
|
33
|
+
Example 4:
|
|
34
|
+
Input: "Researchers at MIT found that the drug Riluzole slows progression of ALS in a trial last March"
|
|
35
|
+
Output: [{"class":"organization","text":"MIT","attributes":{}},{"class":"drug","text":"Riluzole","attributes":{}},{"class":"disease","text":"ALS","attributes":{}},{"class":"event","text":"trial last March","attributes":{"date":"last March"}}]`;
|
|
36
|
+
// === TAXONOMY HELPERS ===
|
|
37
|
+
// Taxonomy format: {"organism": ["person", {"animal": ["dog", "cat"]}], "idea": ["dream", "principle"]}
|
|
38
|
+
// Arrays contain leaf nodes, objects contain nested taxonomies
|
|
39
|
+
function getLeafNodes(taxonomy) {
|
|
40
|
+
const leaves = [];
|
|
41
|
+
function traverse(node) {
|
|
42
|
+
if (Array.isArray(node)) {
|
|
43
|
+
for (const item of node) {
|
|
44
|
+
if (typeof item === "string") {
|
|
45
|
+
leaves.push(item);
|
|
46
|
+
}
|
|
47
|
+
else if (typeof item === "object" && item !== null) {
|
|
48
|
+
traverse(item);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
else if (typeof node === "object" && node !== null) {
|
|
53
|
+
for (const [, value] of Object.entries(node)) {
|
|
54
|
+
traverse(value);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
traverse(taxonomy);
|
|
59
|
+
return leaves;
|
|
60
|
+
}
|
|
61
|
+
function getTaxonomyPath(leaf, taxonomy) {
|
|
62
|
+
function findPath(node, target, currentPath) {
|
|
63
|
+
if (Array.isArray(node)) {
|
|
64
|
+
for (const item of node) {
|
|
65
|
+
if (typeof item === "string" && item === target) {
|
|
66
|
+
return [...currentPath, item];
|
|
67
|
+
}
|
|
68
|
+
else if (typeof item === "object" && item !== null) {
|
|
69
|
+
const result = findPath(item, target, currentPath);
|
|
70
|
+
if (result)
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
else if (typeof node === "object" && node !== null) {
|
|
76
|
+
for (const [key, value] of Object.entries(node)) {
|
|
77
|
+
const result = findPath(value, target, [...currentPath, key]);
|
|
78
|
+
if (result)
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
// Try each root node
|
|
85
|
+
for (const [rootKey, rootValue] of Object.entries(taxonomy)) {
|
|
86
|
+
const path = findPath(rootValue, leaf, [rootKey]);
|
|
87
|
+
if (path)
|
|
88
|
+
return path;
|
|
89
|
+
}
|
|
90
|
+
return [leaf];
|
|
91
|
+
}
|
|
92
|
+
function taxonomyToPrompt(taxonomy) {
|
|
93
|
+
function formatNode(node, indent) {
|
|
94
|
+
const lines = [];
|
|
95
|
+
if (Array.isArray(node)) {
|
|
96
|
+
for (const item of node) {
|
|
97
|
+
if (typeof item === "string") {
|
|
98
|
+
lines.push(`${indent}- ${item}`);
|
|
99
|
+
}
|
|
100
|
+
else if (typeof item === "object" && item !== null) {
|
|
101
|
+
lines.push(...formatNode(item, indent));
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
else if (typeof node === "object" && node !== null) {
|
|
106
|
+
for (const [key, value] of Object.entries(node)) {
|
|
107
|
+
lines.push(`${indent}- ${key}`);
|
|
108
|
+
lines.push(...formatNode(value, indent + " "));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return lines;
|
|
112
|
+
}
|
|
113
|
+
const lines = [
|
|
114
|
+
"Use the following class hierarchy. Classify at the most specific (leaf) level:",
|
|
115
|
+
];
|
|
116
|
+
for (const [rootKey, rootValue] of Object.entries(taxonomy)) {
|
|
117
|
+
lines.push(`- ${rootKey}`);
|
|
118
|
+
lines.push(...formatNode(rootValue, " "));
|
|
119
|
+
}
|
|
120
|
+
return lines.join("\n");
|
|
121
|
+
}
|
|
122
|
+
// === CHUNKING ===
|
|
123
|
+
function chunkText(text, maxChars) {
|
|
124
|
+
if (text.length <= maxChars)
|
|
125
|
+
return [text];
|
|
126
|
+
const chunks = [];
|
|
127
|
+
const sentences = text.split(/(?<=[.!?])\s+/);
|
|
128
|
+
let current = "";
|
|
129
|
+
for (const sentence of sentences) {
|
|
130
|
+
if (current.length + sentence.length > maxChars && current.length > 0) {
|
|
131
|
+
chunks.push(current.trim());
|
|
132
|
+
current = "";
|
|
133
|
+
}
|
|
134
|
+
current += (current ? " " : "") + sentence;
|
|
135
|
+
}
|
|
136
|
+
if (current.trim())
|
|
137
|
+
chunks.push(current.trim());
|
|
138
|
+
return chunks;
|
|
139
|
+
}
|
|
140
|
+
// === CLI ===
|
|
141
|
+
program
|
|
142
|
+
.name("ner")
|
|
143
|
+
.description("Objectivist-inspired named entity recognition with grammar constraints")
|
|
144
|
+
.argument("[text]", "Text to extract entities from (omit to read from stdin)")
|
|
145
|
+
.option("-c, --classes <list>", "Comma-separated allowed entity classes")
|
|
146
|
+
.option("-a, --attributes <list>", "Comma-separated allowed attribute keys")
|
|
147
|
+
.option("--attr-values <json>", 'JSON enum map for attributes e.g. {"color":["blue","red"]}')
|
|
148
|
+
.option("--taxonomy <json>", 'Class hierarchy JSON e.g. {"organism":["animal","plant"]}')
|
|
149
|
+
.option("--relations", "Extract relations between entities")
|
|
150
|
+
.option("--relation-classes <list>", "Comma-separated allowed relation classes (e.g. employment,location,causal)")
|
|
151
|
+
.option("--resolve", "Resolve coreferences (group mentions of same entity)")
|
|
152
|
+
.option("--include-confidence", "Include confidence scores per entity")
|
|
153
|
+
.option("--detect-negation", "Detect negated/hypothetical entities")
|
|
154
|
+
.option("--schema <path>", "Load entity schema definition from a JSON file")
|
|
155
|
+
.option("--file <path>", "Read input from a file (with chunking for long docs)")
|
|
156
|
+
.option("--batch <path>", "Process JSONL file (one text per line) or directory of .txt files")
|
|
157
|
+
.option("--system-prompt <string>", "Replace the built-in system prompt entirely")
|
|
158
|
+
.option("--system-prompt-append <string>", "Append to the built-in system prompt")
|
|
159
|
+
.option("-m, --model <uri>", "Model URI or path to GGUF file")
|
|
160
|
+
.option("--fast", "Use smallest model (0.8B) -- quick, simple text only")
|
|
161
|
+
.option("--balanced", "Use mid-size model (2B) -- good accuracy/speed tradeoff")
|
|
162
|
+
.option("--best", "Use largest model (4B) -- best accuracy (default)")
|
|
163
|
+
.option("--compact", "Output compact JSON (also auto-enabled for non-TTY)")
|
|
164
|
+
.addHelpText("after", `
|
|
165
|
+
Examples:
|
|
166
|
+
ner "the cat is blue"
|
|
167
|
+
ner "John works at Google" --classes person,organization
|
|
168
|
+
ner "sky is blue" --attr-values '{"color":["blue","red"]}'
|
|
169
|
+
ner --relations "Dr. Chen works at MIT"
|
|
170
|
+
ner --resolve "Dr. Chen published a paper. She won an award."
|
|
171
|
+
ner --detect-negation "The patient does not have cancer"
|
|
172
|
+
ner --schema schema.json "complex text"
|
|
173
|
+
ner --file document.txt
|
|
174
|
+
ner --batch inputs.jsonl
|
|
175
|
+
echo "the cat is blue" | ner
|
|
176
|
+
`)
|
|
177
|
+
.parse();
|
|
178
|
+
const opts = program.opts();
|
|
179
|
+
// === VALIDATIONS ===
|
|
180
|
+
const tierFlags = [opts.fast, opts.balanced, opts.best].filter(Boolean).length;
|
|
181
|
+
if (tierFlags > 1) {
|
|
182
|
+
console.error("Error: --fast, --balanced, and --best are mutually exclusive.");
|
|
183
|
+
process.exit(1);
|
|
184
|
+
}
|
|
185
|
+
if (opts.systemPrompt && opts.systemPromptAppend) {
|
|
186
|
+
console.error("Error: --system-prompt and --system-prompt-append are mutually exclusive.");
|
|
187
|
+
process.exit(1);
|
|
188
|
+
}
|
|
189
|
+
// === LOAD SCHEMA FILE ===
|
|
190
|
+
let schemaFile;
|
|
191
|
+
if (opts.schema) {
|
|
192
|
+
try {
|
|
193
|
+
const raw = fs.readFileSync(opts.schema, "utf-8");
|
|
194
|
+
schemaFile = JSON.parse(raw);
|
|
195
|
+
}
|
|
196
|
+
catch (e) {
|
|
197
|
+
console.error(`Error: Failed to load schema file: ${e.message}`);
|
|
198
|
+
process.exit(1);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
// === MERGE OPTIONS (CLI flags override schema file) ===
|
|
202
|
+
const allowedClasses = opts.classes
|
|
203
|
+
? opts.classes.split(",").map((s) => s.trim())
|
|
204
|
+
: schemaFile?.classes;
|
|
205
|
+
const allowedAttrs = opts.attributes
|
|
206
|
+
? opts.attributes.split(",").map((s) => s.trim())
|
|
207
|
+
: schemaFile?.attributes;
|
|
208
|
+
let attrValuesMap;
|
|
209
|
+
if (opts.attrValues) {
|
|
210
|
+
try {
|
|
211
|
+
attrValuesMap = JSON.parse(opts.attrValues);
|
|
212
|
+
}
|
|
213
|
+
catch (e) {
|
|
214
|
+
console.error(`Error: Invalid JSON for --attr-values: ${e.message}`);
|
|
215
|
+
process.exit(1);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
else if (schemaFile?.attrValues) {
|
|
219
|
+
attrValuesMap = schemaFile.attrValues;
|
|
220
|
+
}
|
|
221
|
+
let taxonomy;
|
|
222
|
+
if (opts.taxonomy) {
|
|
223
|
+
try {
|
|
224
|
+
taxonomy = JSON.parse(opts.taxonomy);
|
|
225
|
+
}
|
|
226
|
+
catch (e) {
|
|
227
|
+
console.error(`Error: Invalid JSON for --taxonomy: ${e.message}`);
|
|
228
|
+
process.exit(1);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
else if (schemaFile?.taxonomy) {
|
|
232
|
+
taxonomy = schemaFile.taxonomy;
|
|
233
|
+
}
|
|
234
|
+
const enableRelations = opts.relations || !!schemaFile?.relations;
|
|
235
|
+
const relationTypes = schemaFile?.relations || undefined;
|
|
236
|
+
const relationClasses = opts.relationClasses
|
|
237
|
+
? opts.relationClasses.split(",").map((s) => s.trim())
|
|
238
|
+
: schemaFile?.relationClasses;
|
|
239
|
+
const enableResolve = !!opts.resolve;
|
|
240
|
+
const enableConfidence = !!opts.includeConfidence;
|
|
241
|
+
const enableNegation = !!opts.detectNegation;
|
|
242
|
+
// === READ INPUT ===
|
|
243
|
+
let inputTexts = [];
|
|
244
|
+
if (opts.batch) {
|
|
245
|
+
const batchPath = opts.batch;
|
|
246
|
+
const stat = fs.statSync(batchPath);
|
|
247
|
+
if (stat.isDirectory()) {
|
|
248
|
+
const files = fs
|
|
249
|
+
.readdirSync(batchPath)
|
|
250
|
+
.filter((f) => f.endsWith(".txt"));
|
|
251
|
+
inputTexts = files.map((f) => fs.readFileSync(path.join(batchPath, f), "utf-8").trim());
|
|
252
|
+
}
|
|
253
|
+
else {
|
|
254
|
+
const content = fs.readFileSync(batchPath, "utf-8").trim();
|
|
255
|
+
inputTexts = content.split("\n").map((line) => {
|
|
256
|
+
try {
|
|
257
|
+
const parsed = JSON.parse(line);
|
|
258
|
+
return typeof parsed === "string" ? parsed : parsed.text || line;
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return line;
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
else if (opts.file) {
|
|
267
|
+
const content = fs.readFileSync(opts.file, "utf-8").trim();
|
|
268
|
+
inputTexts = chunkText(content, 2000);
|
|
269
|
+
}
|
|
270
|
+
else {
|
|
271
|
+
let text = program.args[0];
|
|
272
|
+
if (!text) {
|
|
273
|
+
const chunks = [];
|
|
274
|
+
for await (const chunk of process.stdin) {
|
|
275
|
+
chunks.push(chunk);
|
|
276
|
+
}
|
|
277
|
+
text = Buffer.concat(chunks).toString().trim();
|
|
278
|
+
}
|
|
279
|
+
if (!text) {
|
|
280
|
+
console.error("Error: No input text provided. Pass as argument, --file, --batch, or stdin.");
|
|
281
|
+
process.exit(1);
|
|
282
|
+
}
|
|
283
|
+
inputTexts = [text];
|
|
284
|
+
}
|
|
285
|
+
// === RESOLVE MODEL ===
|
|
286
|
+
const modelUri = opts.model
|
|
287
|
+
? opts.model
|
|
288
|
+
: opts.fast
|
|
289
|
+
? MODELS.fast
|
|
290
|
+
: opts.balanced
|
|
291
|
+
? MODELS.balanced
|
|
292
|
+
: MODELS.best;
|
|
293
|
+
const modelsDir = path.join(os.homedir(), ".fastner", "models");
|
|
294
|
+
const modelPath = await resolveModelFile(modelUri, modelsDir);
|
|
295
|
+
const llama = await getLlama();
|
|
296
|
+
const model = await llama.loadModel({ modelPath });
|
|
297
|
+
const context = await model.createContext();
|
|
298
|
+
// === BUILD SYSTEM PROMPT ===
|
|
299
|
+
function buildSystemPrompt() {
|
|
300
|
+
if (opts.systemPrompt)
|
|
301
|
+
return opts.systemPrompt;
|
|
302
|
+
let base = SYSTEM_PROMPT;
|
|
303
|
+
if (enableNegation) {
|
|
304
|
+
base += `\n- Every entity has a top-level "assertion" field: "present", "negated", or "hypothetical". "negated" means the text explicitly denies it (e.g. "does not have"). "hypothetical" means it is speculative (e.g. "might develop").`;
|
|
305
|
+
}
|
|
306
|
+
if (enableConfidence) {
|
|
307
|
+
base += `\n- Every entity has a top-level "confidence" field: "low", "medium", or "high".`;
|
|
308
|
+
}
|
|
309
|
+
if (enableResolve) {
|
|
310
|
+
base += `\n- Every entity has a top-level "entity_id" field. If multiple text spans refer to the same real-world entity (e.g. "Dr. Chen" and "she"), they share the same entity_id. Use short IDs like "e1", "e2".`;
|
|
311
|
+
base += `\n- When multiple mentions share an entity_id, exactly ONE of them must have "is_canonical": true (the most specific reference like a proper name). The others must have "is_canonical": false.`;
|
|
312
|
+
}
|
|
313
|
+
let prompt = `${base}\n\n${FEW_SHOT_EXAMPLES}`;
|
|
314
|
+
if (opts.systemPromptAppend) {
|
|
315
|
+
prompt += `\n\n${opts.systemPromptAppend}`;
|
|
316
|
+
}
|
|
317
|
+
return prompt;
|
|
318
|
+
}
|
|
319
|
+
// === BUILD GRAMMAR SCHEMA ===
|
|
320
|
+
function buildGrammarSchema() {
|
|
321
|
+
// When using taxonomy, only allow leaf nodes as valid classes.
|
|
322
|
+
// This forces the model to classify at the most specific level.
|
|
323
|
+
const classEnum = taxonomy ? getLeafNodes(taxonomy) : allowedClasses;
|
|
324
|
+
const attributesSchema = {
|
|
325
|
+
type: "object",
|
|
326
|
+
additionalProperties: { type: "string" },
|
|
327
|
+
};
|
|
328
|
+
const properties = {
|
|
329
|
+
class: {
|
|
330
|
+
type: "string",
|
|
331
|
+
...(classEnum && { enum: classEnum }),
|
|
332
|
+
},
|
|
333
|
+
text: { type: "string" },
|
|
334
|
+
attributes: attributesSchema,
|
|
335
|
+
};
|
|
336
|
+
const required = ["class", "text"];
|
|
337
|
+
// Grammar-enforced fields for enabled features.
|
|
338
|
+
// These are top-level entity properties (not inside attributes)
|
|
339
|
+
// so the grammar can enforce them as required on every entity.
|
|
340
|
+
if (enableNegation) {
|
|
341
|
+
properties.assertion = {
|
|
342
|
+
type: "string",
|
|
343
|
+
enum: ["present", "negated", "hypothetical"],
|
|
344
|
+
};
|
|
345
|
+
required.push("assertion");
|
|
346
|
+
}
|
|
347
|
+
if (enableConfidence) {
|
|
348
|
+
properties.confidence = {
|
|
349
|
+
type: "string",
|
|
350
|
+
enum: ["low", "medium", "high"],
|
|
351
|
+
};
|
|
352
|
+
required.push("confidence");
|
|
353
|
+
}
|
|
354
|
+
if (enableResolve) {
|
|
355
|
+
properties.entity_id = { type: "string" };
|
|
356
|
+
properties.is_canonical = { type: "boolean" };
|
|
357
|
+
required.push("entity_id");
|
|
358
|
+
required.push("is_canonical");
|
|
359
|
+
}
|
|
360
|
+
const schema = {
|
|
361
|
+
type: "array",
|
|
362
|
+
items: {
|
|
363
|
+
type: "object",
|
|
364
|
+
properties,
|
|
365
|
+
required,
|
|
366
|
+
additionalProperties: false,
|
|
367
|
+
},
|
|
368
|
+
};
|
|
369
|
+
return schema;
|
|
370
|
+
}
|
|
371
|
+
// === BUILD RELATIONS SCHEMA ===
|
|
372
|
+
function buildRelationsSchema() {
|
|
373
|
+
const relationProperties = {
|
|
374
|
+
source: { type: "string" },
|
|
375
|
+
target: { type: "string" },
|
|
376
|
+
relation: {
|
|
377
|
+
type: "string",
|
|
378
|
+
...(relationTypes && { enum: relationTypes }),
|
|
379
|
+
},
|
|
380
|
+
};
|
|
381
|
+
// Add class field if relationClasses is specified
|
|
382
|
+
if (relationClasses) {
|
|
383
|
+
relationProperties.class = {
|
|
384
|
+
type: "string",
|
|
385
|
+
enum: relationClasses,
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
const relSchema = {
|
|
389
|
+
type: "object",
|
|
390
|
+
properties: {
|
|
391
|
+
entities: buildGrammarSchema(),
|
|
392
|
+
relations: {
|
|
393
|
+
type: "array",
|
|
394
|
+
items: {
|
|
395
|
+
type: "object",
|
|
396
|
+
properties: relationProperties,
|
|
397
|
+
required: relationClasses
|
|
398
|
+
? ["source", "target", "relation", "class"]
|
|
399
|
+
: ["source", "target", "relation"],
|
|
400
|
+
additionalProperties: false,
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
},
|
|
404
|
+
required: ["entities", "relations"],
|
|
405
|
+
additionalProperties: false,
|
|
406
|
+
};
|
|
407
|
+
return relSchema;
|
|
408
|
+
}
|
|
409
|
+
// === BUILD PROMPT CONSTRAINTS ===
|
|
410
|
+
function buildConstraints() {
|
|
411
|
+
let constraints = "";
|
|
412
|
+
if (taxonomy) {
|
|
413
|
+
constraints += `\n${taxonomyToPrompt(taxonomy)}`;
|
|
414
|
+
}
|
|
415
|
+
else if (allowedClasses) {
|
|
416
|
+
constraints += `\nAllowed entity classes: ${allowedClasses.join(", ")}. Only use these classes.`;
|
|
417
|
+
}
|
|
418
|
+
if (attrValuesMap) {
|
|
419
|
+
const desc = Object.entries(attrValuesMap)
|
|
420
|
+
.map(([k, v]) => `${k}: ${v.join(", ")}`)
|
|
421
|
+
.join("; ");
|
|
422
|
+
constraints += `\nOnly use these attribute keys and values: ${desc}. Omit attributes that don't apply to an entity.`;
|
|
423
|
+
}
|
|
424
|
+
else if (allowedAttrs) {
|
|
425
|
+
constraints += `\nOnly use these attribute keys: ${allowedAttrs.join(", ")}. Omit attributes that don't apply to an entity.`;
|
|
426
|
+
}
|
|
427
|
+
if (enableNegation) {
|
|
428
|
+
constraints += `\nEvery entity has an "assertion" field (not in attributes). Example: [{"class":"disease","text":"diabetes","assertion":"present","attributes":{}},{"class":"disease","text":"cancer","assertion":"negated","attributes":{}}]`;
|
|
429
|
+
}
|
|
430
|
+
if (enableConfidence) {
|
|
431
|
+
constraints += `\nEvery entity has a "confidence" field (not in attributes). Example: [{"class":"person","text":"John","confidence":"high","attributes":{}}]`;
|
|
432
|
+
}
|
|
433
|
+
if (enableResolve) {
|
|
434
|
+
constraints += `\nEvery entity has "entity_id" and "is_canonical" fields. Coreferent mentions share entity_id; exactly one per group has is_canonical:true (the most specific reference). Example: [{"class":"person","text":"Dr. Chen","entity_id":"e1","is_canonical":true,"attributes":{}},{"class":"person","text":"She","entity_id":"e1","is_canonical":false,"attributes":{}}]`;
|
|
435
|
+
}
|
|
436
|
+
if (enableRelations) {
|
|
437
|
+
let relDesc = `\nAlso extract relations between entities.`;
|
|
438
|
+
if (relationClasses) {
|
|
439
|
+
relDesc += ` Each relation must have a "class" field categorizing the relation type.`;
|
|
440
|
+
}
|
|
441
|
+
relDesc += ` Return {"entities": [...], "relations": [{"source": "entity text", "target": "entity text", "relation": "relation type"${relationClasses ? ', "class": "relation class"' : ""}}]}.`;
|
|
442
|
+
constraints += relDesc;
|
|
443
|
+
if (relationTypes) {
|
|
444
|
+
constraints += ` Allowed relation types: ${relationTypes.join(", ")}.`;
|
|
445
|
+
}
|
|
446
|
+
if (relationClasses) {
|
|
447
|
+
constraints += ` Allowed relation classes: ${relationClasses.join(", ")}.`;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
return constraints;
|
|
451
|
+
}
|
|
452
|
+
// === PROCESS A SINGLE TEXT ===
|
|
453
|
+
async function processText(inputText, session) {
|
|
454
|
+
const constraints = buildConstraints();
|
|
455
|
+
const prompt = `Extract all named entities from the following text.${constraints}\n\nText: ${inputText}`;
|
|
456
|
+
const schema = enableRelations
|
|
457
|
+
? buildRelationsSchema()
|
|
458
|
+
: buildGrammarSchema();
|
|
459
|
+
const grammar = await llama.createGrammarForJsonSchema(schema);
|
|
460
|
+
const res = await session.prompt(prompt, { grammar });
|
|
461
|
+
let parsed;
|
|
462
|
+
try {
|
|
463
|
+
parsed = grammar.parse(res);
|
|
464
|
+
}
|
|
465
|
+
catch {
|
|
466
|
+
try {
|
|
467
|
+
parsed = JSON.parse(res.trim());
|
|
468
|
+
}
|
|
469
|
+
catch {
|
|
470
|
+
console.error("Warning: Failed to parse model output. Raw response:", res);
|
|
471
|
+
parsed = enableRelations ? { entities: [], relations: [] } : [];
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
// If taxonomy is used, add taxonomyPath showing full hierarchy
|
|
475
|
+
if (taxonomy && !enableRelations && Array.isArray(parsed)) {
|
|
476
|
+
for (const entity of parsed) {
|
|
477
|
+
if (entity.class && typeof entity.class === "string") {
|
|
478
|
+
entity.taxonomyPath = getTaxonomyPath(entity.class, taxonomy);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
return parsed;
|
|
483
|
+
}
|
|
484
|
+
// === MAIN ===
|
|
485
|
+
const systemPrompt = buildSystemPrompt();
|
|
486
|
+
const compact = opts.compact || !process.stdout.isTTY;
|
|
487
|
+
const contextSequence = context.getSequence();
|
|
488
|
+
if (inputTexts.length === 1) {
|
|
489
|
+
const session = new LlamaChatSession({
|
|
490
|
+
contextSequence,
|
|
491
|
+
systemPrompt,
|
|
492
|
+
});
|
|
493
|
+
const result = await processText(inputTexts[0], session);
|
|
494
|
+
console.log(JSON.stringify(result, null, compact ? 0 : 2));
|
|
495
|
+
}
|
|
496
|
+
else {
|
|
497
|
+
// Batch / chunked: process each text, collect results
|
|
498
|
+
const allResults = [];
|
|
499
|
+
for (const inputText of inputTexts) {
|
|
500
|
+
// Erase context and create fresh session for each input
|
|
501
|
+
await contextSequence.eraseContextTokenRanges([
|
|
502
|
+
{ start: 0, end: contextSequence.nextTokenIndex },
|
|
503
|
+
]);
|
|
504
|
+
const session = new LlamaChatSession({
|
|
505
|
+
contextSequence,
|
|
506
|
+
systemPrompt,
|
|
507
|
+
});
|
|
508
|
+
const result = await processText(inputText, session);
|
|
509
|
+
allResults.push(result);
|
|
510
|
+
}
|
|
511
|
+
if (opts.file) {
|
|
512
|
+
// Merge chunked results into one
|
|
513
|
+
if (enableRelations) {
|
|
514
|
+
const merged = {
|
|
515
|
+
entities: allResults.flatMap((r) => r.entities || []),
|
|
516
|
+
relations: allResults.flatMap((r) => r.relations || []),
|
|
517
|
+
};
|
|
518
|
+
console.log(JSON.stringify(merged, null, compact ? 0 : 2));
|
|
519
|
+
}
|
|
520
|
+
else {
|
|
521
|
+
const merged = allResults.flat();
|
|
522
|
+
console.log(JSON.stringify(merged, null, compact ? 0 : 2));
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
else {
|
|
526
|
+
// Batch: output one result per line (JSONL)
|
|
527
|
+
for (const result of allResults) {
|
|
528
|
+
console.log(JSON.stringify(result));
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
// === CLEANUP ===
|
|
533
|
+
// Bun segfaults if process.exit() triggers synchronous native addon unloading.
|
|
534
|
+
// Setting exitCode lets the event loop drain naturally, avoiding the crash.
|
|
535
|
+
process.exitCode = 0;
|
|
536
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,QAAQ,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAE9E,iBAAiB;AACjB,MAAM,MAAM,GAAG;IACb,IAAI,EAAE,qDAAqD;IAC3D,QAAQ,EAAE,iDAAiD;IAC3D,IAAI,EAAE,iDAAiD;CAC/C,CAAC;AAEX,yBAAyB;AACzB,MAAM,aAAa,GAAG;;;;;;;sDAOgC,CAAC;AAEvD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;mPAcyN,CAAC;AAYpP,2BAA2B;AAC3B,wGAAwG;AACxG,+DAA+D;AAE/D,SAAS,YAAY,CAAC,QAA6B;IACjD,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,SAAS,QAAQ,CAAC,IAAS;QACzB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;gBACxB,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YACrD,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7C,QAAQ,CAAC,KAAK,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IAED,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACnB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,eAAe,CACtB,IAAY,EACZ,QAA6B;IAE7B,SAAS,QAAQ,CACf,IAAS,EACT,MAAc,EACd,WAAqB;QAErB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;gBACxB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;oBAChD,OAAO,CAAC,GAAG,WAAW,EAAE,IAAI,CAAC,CAAC;gBAChC,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;oBACnD,IAAI,MAAM;wBAAE,OAAO,MAAM,CAAC;gBAC5B,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YACrD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,GAAG,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC;gBAC9D,IAAI,MAAM;oBAAE,OAAO,MAAM,CAAC;YAC5B,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,qBAAqB;IACrB,KAAK,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,GAAG,QAAQ,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;QAClD,IAAI,IAAI;YAAE,OAAO,IAAI,CAAC;IACxB,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,gBAAgB,CAAC,QAA6B;IACrD,SAAS,UAAU,CAAC,IAAS,EAAE,MAAc;QAC3C,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;gBACxB,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;gBACnC,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YACrD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAChD,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,KAAK,GAAG,EAAE,CAAC,CAAC;gBAChC,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAa;QACtB,gFAAgF;KACjF,CAAC;IAEF,KAAK,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC5D,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,EAAE,CAAC,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,mBAAmB;AACnB,SAAS,SAAS,CAAC,IAAY,EAAE,QAAgB;IAC/C,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,QAAQ,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5B,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC;IAC7C,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAChD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,cAAc;AACd,OAAO;KACJ,IAAI,CAAC,KAAK,CAAC;KACX,WAAW,CACV,wEAAwE,CACzE;KACA,QAAQ,CAAC,QAAQ,EAAE,yDAAyD,CAAC;KAC7E,MAAM,CAAC,sBAAsB,EAAE,wCAAwC,CAAC;KACxE,MAAM,CAAC,yBAAyB,EAAE,wCAAwC,CAAC;KAC3E,MAAM,CACL,sBAAsB,EACtB,4DAA4D,CAC7D;KACA,MAAM,CACL,mBAAmB,EACnB,2DAA2D,CAC5D;KACA,MAAM,CAAC,aAAa,EAAE,oCAAoC,CAAC;KAC3D,MAAM,CACL,2BAA2B,EAC3B,4EAA4E,CAC7E;KACA,MAAM,CAAC,WAAW,EAAE,sDAAsD,CAAC;KAC3E,MAAM,CAAC,sBAAsB,EAAE,sCAAsC,CAAC;KACtE,MAAM,CAAC,mBAAmB,EAAE,sCAAsC,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,gDAAgD,CAAC;KAC3E,MAAM,CACL,eAAe,EACf,sDAAsD,CACvD;KACA,MAAM,CACL,gBAAgB,EAChB,mEAAmE,CACpE;KACA,MAAM,CACL,0BAA0B,EAC1B,6CAA6C,CAC9C;KACA,MAAM,CACL,iCAAiC,EACjC,sCAAsC,CACvC;KACA,MAAM,CAAC,mBAAmB,EAAE,gCAAgC,CAAC;KAC7D,MAAM,CAAC,QAAQ,EAAE,sDAAsD,CAAC;KACxE,MAAM,CACL,YAAY,EACZ,yDAAyD,CAC1D;KACA,MAAM,CAAC,QAAQ,EAAE,mDAAmD,CAAC;KACrE,MAAM,CAAC,WAAW,EAAE,qDAAqD,CAAC;KAC1E,WAAW,CACV,OAAO,EACP;;;;;;;;;;;;CAYH,CACE;KACA,KAAK,EAAE,CAAC;AAEX,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;AAE5B,sBAAsB;AACtB,MAAM,SAAS,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAC/E,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;IAClB,OAAO,CAAC,KAAK,CACX,+DAA+D,CAChE,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;IACjD,OAAO,CAAC,KAAK,CACX,2EAA2E,CAC5E,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,2BAA2B;AAC3B,IAAI,UAAkC,CAAC;AACvC,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAClD,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,sCAAuC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,yDAAyD;AACzD,MAAM,cAAc,GAAyB,IAAI,CAAC,OAAO;IACvD,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,CAAC,CAAC,UAAU,EAAE,OAAO,CAAC;AAExB,MAAM,YAAY,GAAyB,IAAI,CAAC,UAAU;IACxD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzD,CAAC,CAAC,UAAU,EAAE,UAAU,CAAC;AAE3B,IAAI,aAAmD,CAAC;AACxD,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,IAAI,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC9C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CACX,0CAA2C,CAAW,CAAC,OAAO,EAAE,CACjE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;KAAM,IAAI,UAAU,EAAE,UAAU,EAAE,CAAC;IAClC,aAAa,GAAG,UAAU,CAAC,UAAU,CAAC;AACxC,CAAC;AAED,IAAI,QAA8C,CAAC;AACnD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAClB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CACX,uCAAwC,CAAW,CAAC,OAAO,EAAE,CAC9D,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;KAAM,IAAI,UAAU,EAAE,QAAQ,EAAE,CAAC;IAChC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC;AACjC,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,SAAS,IAAI,CAAC,CAAC,UAAU,EAAE,SAAS,CAAC;AAClE,MAAM,aAAa,GAAyB,UAAU,EAAE,SAAS,IAAI,SAAS,CAAC;AAC/E,MAAM,eAAe,GAAyB,IAAI,CAAC,eAAe;IAChE,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC,CAAC,UAAU,EAAE,eAAe,CAAC;AAChC,MAAM,aAAa,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AACrC,MAAM,gBAAgB,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC;AAClD,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC;AAE7C,qBAAqB;AACrB,IAAI,UAAU,GAAa,EAAE,CAAC;AAE9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,MAAM,SAAS,GAAG,IAAI,CAAC,KAAe,CAAC;IACvC,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IACpC,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,EAAE;aACb,WAAW,CAAC,SAAS,CAAC;aACtB,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QAC7C,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CACnC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CACzD,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QAC3D,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE;YACpD,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAChC,OAAO,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,IAAI,IAAI,CAAC;YACnE,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;AACH,CAAC;KAAM,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;IACrB,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3D,UAAU,GAAG,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACxC,CAAC;KAAM,CAAC;IACN,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,KAAe,CAAC,CAAC;QAC/B,CAAC;QACD,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;IACjD,CAAC;IACD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,CAAC,KAAK,CACX,6EAA6E,CAC9E,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,UAAU,GAAG,CAAC,IAAI,CAAC,CAAC;AACtB,CAAC;AAED,wBAAwB;AACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK;IACzB,CAAC,CAAC,IAAI,CAAC,KAAK;IACZ,CAAC,CAAC,IAAI,CAAC,IAAI;QACT,CAAC,CAAC,MAAM,CAAC,IAAI;QACb,CAAC,CAAC,IAAI,CAAC,QAAQ;YACb,CAAC,CAAC,MAAM,CAAC,QAAQ;YACjB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;AAEpB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAChE,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;AAE9D,MAAM,KAAK,GAAG,MAAM,QAAQ,EAAE,CAAC;AAC/B,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC;AACnD,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,aAAa,EAAE,CAAC;AAE5C,8BAA8B;AAC9B,SAAS,iBAAiB;IACxB,IAAI,IAAI,CAAC,YAAY;QAAE,OAAO,IAAI,CAAC,YAAY,CAAC;IAEhD,IAAI,IAAI,GAAG,aAAa,CAAC;IAEzB,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,IAAI,mOAAmO,CAAC;IAC9O,CAAC;IACD,IAAI,gBAAgB,EAAE,CAAC;QACrB,IAAI,IAAI,kFAAkF,CAAC;IAC7F,CAAC;IACD,IAAI,aAAa,EAAE,CAAC;QAClB,IAAI,IAAI,2MAA2M,CAAC;QACpN,IAAI,IAAI,iMAAiM,CAAC;IAC5M,CAAC;IAED,IAAI,MAAM,GAAG,GAAG,IAAI,OAAO,iBAAiB,EAAE,CAAC;IAE/C,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC5B,MAAM,IAAI,OAAO,IAAI,CAAC,kBAAkB,EAAE,CAAC;IAC7C,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+BAA+B;AAC/B,SAAS,kBAAkB;IACzB,+DAA+D;IAC/D,gEAAgE;IAChE,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC;IAErE,MAAM,gBAAgB,GAAQ;QAC5B,IAAI,EAAE,QAAQ;QACd,oBAAoB,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;KACzC,CAAC;IAEF,MAAM,UAAU,GAAQ;QACtB,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,GAAG,CAAC,SAAS,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;SACtC;QACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;QACxB,UAAU,EAAE,gBAAgB;KAC7B,CAAC;IACF,MAAM,QAAQ,GAAa,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAE7C,gDAAgD;IAChD,gEAAgE;IAChE,+DAA+D;IAC/D,IAAI,cAAc,EAAE,CAAC;QACnB,UAAU,CAAC,SAAS,GAAG;YACrB,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,CAAC,SAAS,EAAE,SAAS,EAAE,cAAc,CAAC;SAC7C,CAAC;QACF,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7B,CAAC;IACD,IAAI,gBAAgB,EAAE,CAAC;QACrB,UAAU,CAAC,UAAU,GAAG;YACtB,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC;SAChC,CAAC;QACF,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC9B,CAAC;IACD,IAAI,aAAa,EAAE,CAAC;QAClB,UAAU,CAAC,SAAS,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QAC1C,UAAU,CAAC,YAAY,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC9C,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3B,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAChC,CAAC;IAED,MAAM,MAAM,GAAQ;QAClB,IAAI,EAAE,OAAO;QACb,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,UAAU;YACV,QAAQ;YACR,oBAAoB,EAAE,KAAK;SAC5B;KACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,iCAAiC;AACjC,SAAS,oBAAoB;IAC3B,MAAM,kBAAkB,GAAQ;QAC9B,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;QAC1B,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;QAC1B,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;YACd,GAAG,CAAC,aAAa,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC;SAC9C;KACF,CAAC;IAEF,kDAAkD;IAClD,IAAI,eAAe,EAAE,CAAC;QACpB,kBAAkB,CAAC,KAAK,GAAG;YACzB,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,eAAe;SACtB,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAQ;QACrB,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,QAAQ,EAAE,kBAAkB,EAAE;YAC9B,SAAS,EAAE;gBACT,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE;oBACL,IAAI,EAAE,QAAQ;oBACd,UAAU,EAAE,kBAAkB;oBAC9B,QAAQ,EAAE,eAAe;wBACvB,CAAC,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC;wBAC3C,CAAC,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,CAAC;oBACpC,oBAAoB,EAAE,KAAK;iBAC5B;aACF;SACF;QACD,QAAQ,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC;QACnC,oBAAoB,EAAE,KAAK;KAC5B,CAAC;IACF,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,mCAAmC;AACnC,SAAS,gBAAgB;IACvB,IAAI,WAAW,GAAG,EAAE,CAAC;IAErB,IAAI,QAAQ,EAAE,CAAC;QACb,WAAW,IAAI,KAAK,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;IACnD,CAAC;SAAM,IAAI,cAAc,EAAE,CAAC;QAC1B,WAAW,IAAI,6BAA6B,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,2BAA2B,CAAC;IACnG,CAAC;IAED,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC;aACvC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;aACxC,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,WAAW,IAAI,+CAA+C,IAAI,kDAAkD,CAAC;IACvH,CAAC;SAAM,IAAI,YAAY,EAAE,CAAC;QACxB,WAAW,IAAI,oCAAoC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,kDAAkD,CAAC;IAC/H,CAAC;IAED,IAAI,cAAc,EAAE,CAAC;QACnB,WAAW,IAAI,+NAA+N,CAAC;IACjP,CAAC;IACD,IAAI,gBAAgB,EAAE,CAAC;QACrB,WAAW,IAAI,8IAA8I,CAAC;IAChK,CAAC;IACD,IAAI,aAAa,EAAE,CAAC;QAClB,WAAW,IAAI,sWAAsW,CAAC;IACxX,CAAC;IACD,IAAI,eAAe,EAAE,CAAC;QACpB,IAAI,OAAO,GAAG,4CAA4C,CAAC;QAC3D,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,IAAI,0EAA0E,CAAC;QACxF,CAAC;QACD,OAAO,IAAI,2HAA2H,eAAe,CAAC,CAAC,CAAC,6BAA6B,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC;QACjM,WAAW,IAAI,OAAO,CAAC;QACvB,IAAI,aAAa,EAAE,CAAC;YAClB,WAAW,IAAI,4BAA4B,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;QACzE,CAAC;QACD,IAAI,eAAe,EAAE,CAAC;YACpB,WAAW,IAAI,8BAA8B,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;QAC7E,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,gCAAgC;AAChC,KAAK,UAAU,WAAW,CACxB,SAAiB,EACjB,OAAyB;IAEzB,MAAM,WAAW,GAAG,gBAAgB,EAAE,CAAC;IACvC,MAAM,MAAM,GAAG,sDAAsD,WAAW,aAAa,SAAS,EAAE,CAAC;IAEzG,MAAM,MAAM,GAAG,eAAe;QAC5B,CAAC,CAAC,oBAAoB,EAAE;QACxB,CAAC,CAAC,kBAAkB,EAAE,CAAC;IACzB,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,0BAA0B,CAAC,MAAM,CAAC,CAAC;IAE/D,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;IAEtD,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,KAAK,CACX,sDAAsD,EACtD,GAAG,CACJ,CAAC;YACF,MAAM,GAAG,eAAe,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAClE,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,QAAQ,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1D,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;YAC5B,IAAI,MAAM,CAAC,KAAK,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACrD,MAAM,CAAC,YAAY,GAAG,eAAe,CAAC,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,eAAe;AACf,MAAM,YAAY,GAAG,iBAAiB,EAAE,CAAC;AACzC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC;AAEtD,MAAM,eAAe,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;AAE9C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC;QACnC,eAAe;QACf,YAAY;KACb,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC,CAAE,EAAE,OAAO,CAAC,CAAC;IAC1D,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7D,CAAC;KAAM,CAAC;IACN,sDAAsD;IACtD,MAAM,UAAU,GAAU,EAAE,CAAC;IAC7B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,wDAAwD;QACxD,MAAM,eAAe,CAAC,uBAAuB,CAAC;YAC5C,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,eAAe,CAAC,cAAc,EAAE;SAClD,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC;YACnC,eAAe;YACf,YAAY;SACb,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACrD,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1B,CAAC;IAED,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,iCAAiC;QACjC,IAAI,eAAe,EAAE,CAAC;YACpB,MAAM,MAAM,GAAG;gBACb,QAAQ,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC;gBACrD,SAAS,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC;aACxD,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,4CAA4C;QAC5C,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;AACH,CAAC;AAED,kBAAkB;AAClB,+EAA+E;AAC/E,4EAA4E;AAC5E,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,21 +1,35 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "objectivist-ner",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.9",
|
|
4
4
|
"description": "Objectivist-inspired Named Entity Recognition with grammar-constrained LLM output",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
5
7
|
"bin": {
|
|
6
|
-
"ner": "index.
|
|
8
|
+
"ner": "dist/index.js"
|
|
7
9
|
},
|
|
8
|
-
"module": "index.ts",
|
|
9
10
|
"type": "module",
|
|
10
11
|
"license": "MIT",
|
|
11
12
|
"author": "Richard Anaya",
|
|
12
13
|
"repository": "git@github.com:richardanaya/objectivist-ner.git",
|
|
13
|
-
"
|
|
14
|
-
"
|
|
15
|
-
"
|
|
14
|
+
"files": [
|
|
15
|
+
"dist/",
|
|
16
|
+
"LICENSE",
|
|
17
|
+
"README.md"
|
|
18
|
+
],
|
|
19
|
+
"scripts": {
|
|
20
|
+
"dev": "tsx index.ts",
|
|
21
|
+
"build": "tsc",
|
|
22
|
+
"prepublishOnly": "npm run build",
|
|
23
|
+
"postinstall": "node-llama-cpp download || true"
|
|
24
|
+
},
|
|
25
|
+
"engines": {
|
|
26
|
+
"node": ">=18"
|
|
16
27
|
},
|
|
17
|
-
"
|
|
18
|
-
"
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"@types/node": "^20.0.0",
|
|
30
|
+
"prettier": "^3.8.1",
|
|
31
|
+
"tsx": "^4.0.0",
|
|
32
|
+
"typescript": "^5.0.0"
|
|
19
33
|
},
|
|
20
34
|
"dependencies": {
|
|
21
35
|
"commander": "^14.0.3",
|
package/index.ts
DELETED
|
@@ -1,635 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
import { program } from "commander";
|
|
3
|
-
import path from "path";
|
|
4
|
-
import os from "os";
|
|
5
|
-
import fs from "fs";
|
|
6
|
-
import { getLlama, LlamaChatSession, resolveModelFile } from "node-llama-cpp";
|
|
7
|
-
|
|
8
|
-
// === MODELS ===
|
|
9
|
-
const MODELS = {
|
|
10
|
-
fast: "hf:unsloth/Qwen3.5-0.8B-GGUF:Qwen3.5-0.8B-Q8_0.gguf",
|
|
11
|
-
balanced: "hf:unsloth/Qwen3.5-2B-GGUF:Qwen3.5-2B-Q8_0.gguf",
|
|
12
|
-
best: "hf:unsloth/Qwen3.5-4B-GGUF:Qwen3.5-4B-Q8_0.gguf",
|
|
13
|
-
} as const;
|
|
14
|
-
|
|
15
|
-
// === SYSTEM PROMPTS ===
|
|
16
|
-
const SYSTEM_PROMPT = `You are a named entity recognition (NER) system. Your task is to extract entities from text.
|
|
17
|
-
|
|
18
|
-
Rules:
|
|
19
|
-
- "text" must be the EXACT substring from the input that refers to the entity. Do NOT paraphrase or include extra words.
|
|
20
|
-
- "class" is the entity type (e.g. person, animal, location, organization).
|
|
21
|
-
- "attributes" are properties of the entity found in context.
|
|
22
|
-
- Return one object per distinct entity mention.
|
|
23
|
-
- If no entities are found, return an empty array [].`;
|
|
24
|
-
|
|
25
|
-
const FEW_SHOT_EXAMPLES = `Example 1:
|
|
26
|
-
Input: "the cat is blue and is feeling sad"
|
|
27
|
-
Output: [{"class":"animal","text":"cat","attributes":{"color":"blue","emotional_state":"sad"}}]
|
|
28
|
-
|
|
29
|
-
Example 2:
|
|
30
|
-
Input: "John Smith lives in New York City and works at Google"
|
|
31
|
-
Output: [{"class":"person","text":"John Smith","attributes":{"location":"New York City","employer":"Google"}},{"class":"location","text":"New York City","attributes":{}},{"class":"organization","text":"Google","attributes":{}}]
|
|
32
|
-
|
|
33
|
-
Example 3:
|
|
34
|
-
Input: "The quick brown fox jumps over the lazy dog near the river"
|
|
35
|
-
Output: [{"class":"animal","text":"fox","attributes":{"color":"brown","speed":"quick"}},{"class":"animal","text":"dog","attributes":{"temperament":"lazy"}},{"class":"location","text":"the river","attributes":{}}]
|
|
36
|
-
|
|
37
|
-
Example 4:
|
|
38
|
-
Input: "Researchers at MIT found that the drug Riluzole slows progression of ALS in a trial last March"
|
|
39
|
-
Output: [{"class":"organization","text":"MIT","attributes":{}},{"class":"drug","text":"Riluzole","attributes":{}},{"class":"disease","text":"ALS","attributes":{}},{"class":"event","text":"trial last March","attributes":{"date":"last March"}}]`;
|
|
40
|
-
|
|
41
|
-
// === SCHEMA FILE TYPES ===
|
|
42
|
-
interface SchemaFile {
|
|
43
|
-
taxonomy?: Record<string, string[]>;
|
|
44
|
-
classes?: string[];
|
|
45
|
-
attributes?: string[];
|
|
46
|
-
attrValues?: Record<string, string[]>;
|
|
47
|
-
relations?: string[];
|
|
48
|
-
relationClasses?: string[];
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// === TAXONOMY HELPERS ===
|
|
52
|
-
// Taxonomy format: {"organism": ["person", {"animal": ["dog", "cat"]}], "idea": ["dream", "principle"]}
|
|
53
|
-
// Arrays contain leaf nodes, objects contain nested taxonomies
|
|
54
|
-
|
|
55
|
-
function getLeafNodes(taxonomy: Record<string, any>): string[] {
|
|
56
|
-
const leaves: string[] = [];
|
|
57
|
-
|
|
58
|
-
function traverse(node: any) {
|
|
59
|
-
if (Array.isArray(node)) {
|
|
60
|
-
for (const item of node) {
|
|
61
|
-
if (typeof item === "string") {
|
|
62
|
-
leaves.push(item);
|
|
63
|
-
} else if (typeof item === "object" && item !== null) {
|
|
64
|
-
traverse(item);
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
} else if (typeof node === "object" && node !== null) {
|
|
68
|
-
for (const [, value] of Object.entries(node)) {
|
|
69
|
-
traverse(value);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
traverse(taxonomy);
|
|
75
|
-
return leaves;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function getTaxonomyPath(
|
|
79
|
-
leaf: string,
|
|
80
|
-
taxonomy: Record<string, any>,
|
|
81
|
-
): string[] {
|
|
82
|
-
function findPath(
|
|
83
|
-
node: any,
|
|
84
|
-
target: string,
|
|
85
|
-
currentPath: string[],
|
|
86
|
-
): string[] | null {
|
|
87
|
-
if (Array.isArray(node)) {
|
|
88
|
-
for (const item of node) {
|
|
89
|
-
if (typeof item === "string" && item === target) {
|
|
90
|
-
return [...currentPath, item];
|
|
91
|
-
} else if (typeof item === "object" && item !== null) {
|
|
92
|
-
const result = findPath(item, target, currentPath);
|
|
93
|
-
if (result) return result;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
} else if (typeof node === "object" && node !== null) {
|
|
97
|
-
for (const [key, value] of Object.entries(node)) {
|
|
98
|
-
const result = findPath(value, target, [...currentPath, key]);
|
|
99
|
-
if (result) return result;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
return null;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// Try each root node
|
|
106
|
-
for (const [rootKey, rootValue] of Object.entries(taxonomy)) {
|
|
107
|
-
const path = findPath(rootValue, leaf, [rootKey]);
|
|
108
|
-
if (path) return path;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
return [leaf];
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
function taxonomyToPrompt(taxonomy: Record<string, any>): string {
|
|
115
|
-
function formatNode(node: any, indent: string): string[] {
|
|
116
|
-
const lines: string[] = [];
|
|
117
|
-
|
|
118
|
-
if (Array.isArray(node)) {
|
|
119
|
-
for (const item of node) {
|
|
120
|
-
if (typeof item === "string") {
|
|
121
|
-
lines.push(`${indent}- ${item}`);
|
|
122
|
-
} else if (typeof item === "object" && item !== null) {
|
|
123
|
-
lines.push(...formatNode(item, indent));
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
} else if (typeof node === "object" && node !== null) {
|
|
127
|
-
for (const [key, value] of Object.entries(node)) {
|
|
128
|
-
lines.push(`${indent}- ${key}`);
|
|
129
|
-
lines.push(...formatNode(value, indent + " "));
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
return lines;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const lines: string[] = [
|
|
137
|
-
"Use the following class hierarchy. Classify at the most specific (leaf) level:",
|
|
138
|
-
];
|
|
139
|
-
|
|
140
|
-
for (const [rootKey, rootValue] of Object.entries(taxonomy)) {
|
|
141
|
-
lines.push(`- ${rootKey}`);
|
|
142
|
-
lines.push(...formatNode(rootValue, " "));
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return lines.join("\n");
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// === CHUNKING ===
|
|
149
|
-
function chunkText(text: string, maxChars: number): string[] {
|
|
150
|
-
if (text.length <= maxChars) return [text];
|
|
151
|
-
const chunks: string[] = [];
|
|
152
|
-
const sentences = text.split(/(?<=[.!?])\s+/);
|
|
153
|
-
let current = "";
|
|
154
|
-
for (const sentence of sentences) {
|
|
155
|
-
if (current.length + sentence.length > maxChars && current.length > 0) {
|
|
156
|
-
chunks.push(current.trim());
|
|
157
|
-
current = "";
|
|
158
|
-
}
|
|
159
|
-
current += (current ? " " : "") + sentence;
|
|
160
|
-
}
|
|
161
|
-
if (current.trim()) chunks.push(current.trim());
|
|
162
|
-
return chunks;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// === CLI ===
|
|
166
|
-
program
|
|
167
|
-
.name("ner")
|
|
168
|
-
.description(
|
|
169
|
-
"Objectivist-inspired named entity recognition with grammar constraints",
|
|
170
|
-
)
|
|
171
|
-
.argument("[text]", "Text to extract entities from (omit to read from stdin)")
|
|
172
|
-
.option("-c, --classes <list>", "Comma-separated allowed entity classes")
|
|
173
|
-
.option("-a, --attributes <list>", "Comma-separated allowed attribute keys")
|
|
174
|
-
.option(
|
|
175
|
-
"--attr-values <json>",
|
|
176
|
-
'JSON enum map for attributes e.g. {"color":["blue","red"]}',
|
|
177
|
-
)
|
|
178
|
-
.option(
|
|
179
|
-
"--taxonomy <json>",
|
|
180
|
-
'Class hierarchy JSON e.g. {"organism":["animal","plant"]}',
|
|
181
|
-
)
|
|
182
|
-
.option("--relations", "Extract relations between entities")
|
|
183
|
-
.option(
|
|
184
|
-
"--relation-classes <list>",
|
|
185
|
-
"Comma-separated allowed relation classes (e.g. employment,location,causal)",
|
|
186
|
-
)
|
|
187
|
-
.option("--resolve", "Resolve coreferences (group mentions of same entity)")
|
|
188
|
-
.option("--include-confidence", "Include confidence scores per entity")
|
|
189
|
-
.option("--detect-negation", "Detect negated/hypothetical entities")
|
|
190
|
-
.option("--schema <path>", "Load entity schema definition from a JSON file")
|
|
191
|
-
.option(
|
|
192
|
-
"--file <path>",
|
|
193
|
-
"Read input from a file (with chunking for long docs)",
|
|
194
|
-
)
|
|
195
|
-
.option(
|
|
196
|
-
"--batch <path>",
|
|
197
|
-
"Process JSONL file (one text per line) or directory of .txt files",
|
|
198
|
-
)
|
|
199
|
-
.option(
|
|
200
|
-
"--system-prompt <string>",
|
|
201
|
-
"Replace the built-in system prompt entirely",
|
|
202
|
-
)
|
|
203
|
-
.option(
|
|
204
|
-
"--system-prompt-append <string>",
|
|
205
|
-
"Append to the built-in system prompt",
|
|
206
|
-
)
|
|
207
|
-
.option("-m, --model <uri>", "Model URI or path to GGUF file")
|
|
208
|
-
.option("--fast", "Use smallest model (0.8B) -- quick, simple text only")
|
|
209
|
-
.option(
|
|
210
|
-
"--balanced",
|
|
211
|
-
"Use mid-size model (2B) -- good accuracy/speed tradeoff",
|
|
212
|
-
)
|
|
213
|
-
.option("--best", "Use largest model (4B) -- best accuracy (default)")
|
|
214
|
-
.option("--compact", "Output compact JSON (also auto-enabled for non-TTY)")
|
|
215
|
-
.addHelpText(
|
|
216
|
-
"after",
|
|
217
|
-
`
|
|
218
|
-
Examples:
|
|
219
|
-
ner "the cat is blue"
|
|
220
|
-
ner "John works at Google" --classes person,organization
|
|
221
|
-
ner "sky is blue" --attr-values '{"color":["blue","red"]}'
|
|
222
|
-
ner --relations "Dr. Chen works at MIT"
|
|
223
|
-
ner --resolve "Dr. Chen published a paper. She won an award."
|
|
224
|
-
ner --detect-negation "The patient does not have cancer"
|
|
225
|
-
ner --schema schema.json "complex text"
|
|
226
|
-
ner --file document.txt
|
|
227
|
-
ner --batch inputs.jsonl
|
|
228
|
-
echo "the cat is blue" | ner
|
|
229
|
-
`,
|
|
230
|
-
)
|
|
231
|
-
.parse();
|
|
232
|
-
|
|
233
|
-
const opts = program.opts();
|
|
234
|
-
|
|
235
|
-
// === VALIDATIONS ===
|
|
236
|
-
const tierFlags = [opts.fast, opts.balanced, opts.best].filter(Boolean).length;
|
|
237
|
-
if (tierFlags > 1) {
|
|
238
|
-
console.error(
|
|
239
|
-
"Error: --fast, --balanced, and --best are mutually exclusive.",
|
|
240
|
-
);
|
|
241
|
-
process.exit(1);
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
if (opts.systemPrompt && opts.systemPromptAppend) {
|
|
245
|
-
console.error(
|
|
246
|
-
"Error: --system-prompt and --system-prompt-append are mutually exclusive.",
|
|
247
|
-
);
|
|
248
|
-
process.exit(1);
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
// === LOAD SCHEMA FILE ===
|
|
252
|
-
let schemaFile: SchemaFile | undefined;
|
|
253
|
-
if (opts.schema) {
|
|
254
|
-
try {
|
|
255
|
-
const raw = fs.readFileSync(opts.schema, "utf-8");
|
|
256
|
-
schemaFile = JSON.parse(raw);
|
|
257
|
-
} catch (e) {
|
|
258
|
-
console.error(`Error: Failed to load schema file: ${(e as Error).message}`);
|
|
259
|
-
process.exit(1);
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// === MERGE OPTIONS (CLI flags override schema file) ===
|
|
264
|
-
const allowedClasses: string[] | undefined = opts.classes
|
|
265
|
-
? opts.classes.split(",").map((s: string) => s.trim())
|
|
266
|
-
: schemaFile?.classes;
|
|
267
|
-
|
|
268
|
-
const allowedAttrs: string[] | undefined = opts.attributes
|
|
269
|
-
? opts.attributes.split(",").map((s: string) => s.trim())
|
|
270
|
-
: schemaFile?.attributes;
|
|
271
|
-
|
|
272
|
-
let attrValuesMap: Record<string, string[]> | undefined;
|
|
273
|
-
if (opts.attrValues) {
|
|
274
|
-
try {
|
|
275
|
-
attrValuesMap = JSON.parse(opts.attrValues);
|
|
276
|
-
} catch (e) {
|
|
277
|
-
console.error(
|
|
278
|
-
`Error: Invalid JSON for --attr-values: ${(e as Error).message}`,
|
|
279
|
-
);
|
|
280
|
-
process.exit(1);
|
|
281
|
-
}
|
|
282
|
-
} else if (schemaFile?.attrValues) {
|
|
283
|
-
attrValuesMap = schemaFile.attrValues;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
let taxonomy: Record<string, string[]> | undefined;
|
|
287
|
-
if (opts.taxonomy) {
|
|
288
|
-
try {
|
|
289
|
-
taxonomy = JSON.parse(opts.taxonomy);
|
|
290
|
-
} catch (e) {
|
|
291
|
-
console.error(
|
|
292
|
-
`Error: Invalid JSON for --taxonomy: ${(e as Error).message}`,
|
|
293
|
-
);
|
|
294
|
-
process.exit(1);
|
|
295
|
-
}
|
|
296
|
-
} else if (schemaFile?.taxonomy) {
|
|
297
|
-
taxonomy = schemaFile.taxonomy;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
const enableRelations = opts.relations || !!schemaFile?.relations;
|
|
301
|
-
const relationTypes: string[] | undefined = schemaFile?.relations || undefined;
|
|
302
|
-
const relationClasses: string[] | undefined = opts.relationClasses
|
|
303
|
-
? opts.relationClasses.split(",").map((s: string) => s.trim())
|
|
304
|
-
: schemaFile?.relationClasses;
|
|
305
|
-
const enableResolve = !!opts.resolve;
|
|
306
|
-
const enableConfidence = !!opts.includeConfidence;
|
|
307
|
-
const enableNegation = !!opts.detectNegation;
|
|
308
|
-
|
|
309
|
-
// === READ INPUT ===
|
|
310
|
-
let inputTexts: string[] = [];
|
|
311
|
-
|
|
312
|
-
if (opts.batch) {
|
|
313
|
-
const batchPath = opts.batch as string;
|
|
314
|
-
const stat = fs.statSync(batchPath);
|
|
315
|
-
if (stat.isDirectory()) {
|
|
316
|
-
const files = fs
|
|
317
|
-
.readdirSync(batchPath)
|
|
318
|
-
.filter((f: string) => f.endsWith(".txt"));
|
|
319
|
-
inputTexts = files.map((f: string) =>
|
|
320
|
-
fs.readFileSync(path.join(batchPath, f), "utf-8").trim(),
|
|
321
|
-
);
|
|
322
|
-
} else {
|
|
323
|
-
const content = fs.readFileSync(batchPath, "utf-8").trim();
|
|
324
|
-
inputTexts = content.split("\n").map((line: string) => {
|
|
325
|
-
try {
|
|
326
|
-
const parsed = JSON.parse(line);
|
|
327
|
-
return typeof parsed === "string" ? parsed : parsed.text || line;
|
|
328
|
-
} catch {
|
|
329
|
-
return line;
|
|
330
|
-
}
|
|
331
|
-
});
|
|
332
|
-
}
|
|
333
|
-
} else if (opts.file) {
|
|
334
|
-
const content = fs.readFileSync(opts.file, "utf-8").trim();
|
|
335
|
-
inputTexts = chunkText(content, 2000);
|
|
336
|
-
} else {
|
|
337
|
-
let text = program.args[0];
|
|
338
|
-
if (!text) {
|
|
339
|
-
const chunks: Buffer[] = [];
|
|
340
|
-
for await (const chunk of process.stdin) {
|
|
341
|
-
chunks.push(chunk as Buffer);
|
|
342
|
-
}
|
|
343
|
-
text = Buffer.concat(chunks).toString().trim();
|
|
344
|
-
}
|
|
345
|
-
if (!text) {
|
|
346
|
-
console.error(
|
|
347
|
-
"Error: No input text provided. Pass as argument, --file, --batch, or stdin.",
|
|
348
|
-
);
|
|
349
|
-
process.exit(1);
|
|
350
|
-
}
|
|
351
|
-
inputTexts = [text];
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// === RESOLVE MODEL ===
|
|
355
|
-
const modelUri = opts.model
|
|
356
|
-
? opts.model
|
|
357
|
-
: opts.fast
|
|
358
|
-
? MODELS.fast
|
|
359
|
-
: opts.balanced
|
|
360
|
-
? MODELS.balanced
|
|
361
|
-
: MODELS.best;
|
|
362
|
-
|
|
363
|
-
const modelsDir = path.join(os.homedir(), ".fastner", "models");
|
|
364
|
-
const modelPath = await resolveModelFile(modelUri, modelsDir);
|
|
365
|
-
|
|
366
|
-
const llama = await getLlama();
|
|
367
|
-
const model = await llama.loadModel({ modelPath });
|
|
368
|
-
const context = await model.createContext();
|
|
369
|
-
|
|
370
|
-
// === BUILD SYSTEM PROMPT ===
|
|
371
|
-
function buildSystemPrompt(): string {
|
|
372
|
-
if (opts.systemPrompt) return opts.systemPrompt;
|
|
373
|
-
|
|
374
|
-
let base = SYSTEM_PROMPT;
|
|
375
|
-
|
|
376
|
-
if (enableNegation) {
|
|
377
|
-
base += `\n- Every entity has a top-level "assertion" field: "present", "negated", or "hypothetical". "negated" means the text explicitly denies it (e.g. "does not have"). "hypothetical" means it is speculative (e.g. "might develop").`;
|
|
378
|
-
}
|
|
379
|
-
if (enableConfidence) {
|
|
380
|
-
base += `\n- Every entity has a top-level "confidence" field: "low", "medium", or "high".`;
|
|
381
|
-
}
|
|
382
|
-
if (enableResolve) {
|
|
383
|
-
base += `\n- Every entity has a top-level "entity_id" field. If multiple text spans refer to the same real-world entity (e.g. "Dr. Chen" and "she"), they share the same entity_id. Use short IDs like "e1", "e2".`;
|
|
384
|
-
base += `\n- When multiple mentions share an entity_id, exactly ONE of them must have "is_canonical": true (the most specific reference like a proper name). The others must have "is_canonical": false.`;
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
let prompt = `${base}\n\n${FEW_SHOT_EXAMPLES}`;
|
|
388
|
-
|
|
389
|
-
if (opts.systemPromptAppend) {
|
|
390
|
-
prompt += `\n\n${opts.systemPromptAppend}`;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
return prompt;
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// === BUILD GRAMMAR SCHEMA ===
|
|
397
|
-
function buildGrammarSchema() {
|
|
398
|
-
// When using taxonomy, only allow leaf nodes as valid classes.
|
|
399
|
-
// This forces the model to classify at the most specific level.
|
|
400
|
-
const classEnum = taxonomy ? getLeafNodes(taxonomy) : allowedClasses;
|
|
401
|
-
|
|
402
|
-
const attributesSchema: any = {
|
|
403
|
-
type: "object",
|
|
404
|
-
additionalProperties: { type: "string" },
|
|
405
|
-
};
|
|
406
|
-
|
|
407
|
-
const properties: any = {
|
|
408
|
-
class: {
|
|
409
|
-
type: "string",
|
|
410
|
-
...(classEnum && { enum: classEnum }),
|
|
411
|
-
},
|
|
412
|
-
text: { type: "string" },
|
|
413
|
-
attributes: attributesSchema,
|
|
414
|
-
};
|
|
415
|
-
const required: string[] = ["class", "text"];
|
|
416
|
-
|
|
417
|
-
// Grammar-enforced fields for enabled features.
|
|
418
|
-
// These are top-level entity properties (not inside attributes)
|
|
419
|
-
// so the grammar can enforce them as required on every entity.
|
|
420
|
-
if (enableNegation) {
|
|
421
|
-
properties.assertion = {
|
|
422
|
-
type: "string",
|
|
423
|
-
enum: ["present", "negated", "hypothetical"],
|
|
424
|
-
};
|
|
425
|
-
required.push("assertion");
|
|
426
|
-
}
|
|
427
|
-
if (enableConfidence) {
|
|
428
|
-
properties.confidence = {
|
|
429
|
-
type: "string",
|
|
430
|
-
enum: ["low", "medium", "high"],
|
|
431
|
-
};
|
|
432
|
-
required.push("confidence");
|
|
433
|
-
}
|
|
434
|
-
if (enableResolve) {
|
|
435
|
-
properties.entity_id = { type: "string" };
|
|
436
|
-
properties.is_canonical = { type: "boolean" };
|
|
437
|
-
required.push("entity_id");
|
|
438
|
-
required.push("is_canonical");
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
const schema: any = {
|
|
442
|
-
type: "array",
|
|
443
|
-
items: {
|
|
444
|
-
type: "object",
|
|
445
|
-
properties,
|
|
446
|
-
required,
|
|
447
|
-
additionalProperties: false,
|
|
448
|
-
},
|
|
449
|
-
};
|
|
450
|
-
|
|
451
|
-
return schema;
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
// === BUILD RELATIONS SCHEMA ===
|
|
455
|
-
function buildRelationsSchema() {
|
|
456
|
-
const relationProperties: any = {
|
|
457
|
-
source: { type: "string" },
|
|
458
|
-
target: { type: "string" },
|
|
459
|
-
relation: {
|
|
460
|
-
type: "string",
|
|
461
|
-
...(relationTypes && { enum: relationTypes }),
|
|
462
|
-
},
|
|
463
|
-
};
|
|
464
|
-
|
|
465
|
-
// Add class field if relationClasses is specified
|
|
466
|
-
if (relationClasses) {
|
|
467
|
-
relationProperties.class = {
|
|
468
|
-
type: "string",
|
|
469
|
-
enum: relationClasses,
|
|
470
|
-
};
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
const relSchema: any = {
|
|
474
|
-
type: "object",
|
|
475
|
-
properties: {
|
|
476
|
-
entities: buildGrammarSchema(),
|
|
477
|
-
relations: {
|
|
478
|
-
type: "array",
|
|
479
|
-
items: {
|
|
480
|
-
type: "object",
|
|
481
|
-
properties: relationProperties,
|
|
482
|
-
required: relationClasses
|
|
483
|
-
? ["source", "target", "relation", "class"]
|
|
484
|
-
: ["source", "target", "relation"],
|
|
485
|
-
additionalProperties: false,
|
|
486
|
-
},
|
|
487
|
-
},
|
|
488
|
-
},
|
|
489
|
-
required: ["entities", "relations"],
|
|
490
|
-
additionalProperties: false,
|
|
491
|
-
};
|
|
492
|
-
return relSchema;
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
// === BUILD PROMPT CONSTRAINTS ===
|
|
496
|
-
function buildConstraints(): string {
|
|
497
|
-
let constraints = "";
|
|
498
|
-
|
|
499
|
-
if (taxonomy) {
|
|
500
|
-
constraints += `\n${taxonomyToPrompt(taxonomy)}`;
|
|
501
|
-
} else if (allowedClasses) {
|
|
502
|
-
constraints += `\nAllowed entity classes: ${allowedClasses.join(", ")}. Only use these classes.`;
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
if (attrValuesMap) {
|
|
506
|
-
const desc = Object.entries(attrValuesMap)
|
|
507
|
-
.map(([k, v]) => `${k}: ${v.join(", ")}`)
|
|
508
|
-
.join("; ");
|
|
509
|
-
constraints += `\nOnly use these attribute keys and values: ${desc}. Omit attributes that don't apply to an entity.`;
|
|
510
|
-
} else if (allowedAttrs) {
|
|
511
|
-
constraints += `\nOnly use these attribute keys: ${allowedAttrs.join(", ")}. Omit attributes that don't apply to an entity.`;
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
if (enableNegation) {
|
|
515
|
-
constraints += `\nEvery entity has an "assertion" field (not in attributes). Example: [{"class":"disease","text":"diabetes","assertion":"present","attributes":{}},{"class":"disease","text":"cancer","assertion":"negated","attributes":{}}]`;
|
|
516
|
-
}
|
|
517
|
-
if (enableConfidence) {
|
|
518
|
-
constraints += `\nEvery entity has a "confidence" field (not in attributes). Example: [{"class":"person","text":"John","confidence":"high","attributes":{}}]`;
|
|
519
|
-
}
|
|
520
|
-
if (enableResolve) {
|
|
521
|
-
constraints += `\nEvery entity has "entity_id" and "is_canonical" fields. Coreferent mentions share entity_id; exactly one per group has is_canonical:true (the most specific reference). Example: [{"class":"person","text":"Dr. Chen","entity_id":"e1","is_canonical":true,"attributes":{}},{"class":"person","text":"She","entity_id":"e1","is_canonical":false,"attributes":{}}]`;
|
|
522
|
-
}
|
|
523
|
-
if (enableRelations) {
|
|
524
|
-
let relDesc = `\nAlso extract relations between entities.`;
|
|
525
|
-
if (relationClasses) {
|
|
526
|
-
relDesc += ` Each relation must have a "class" field categorizing the relation type.`;
|
|
527
|
-
}
|
|
528
|
-
relDesc += ` Return {"entities": [...], "relations": [{"source": "entity text", "target": "entity text", "relation": "relation type"${relationClasses ? ', "class": "relation class"' : ""}}]}.`;
|
|
529
|
-
constraints += relDesc;
|
|
530
|
-
if (relationTypes) {
|
|
531
|
-
constraints += ` Allowed relation types: ${relationTypes.join(", ")}.`;
|
|
532
|
-
}
|
|
533
|
-
if (relationClasses) {
|
|
534
|
-
constraints += ` Allowed relation classes: ${relationClasses.join(", ")}.`;
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
return constraints;
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
// === PROCESS A SINGLE TEXT ===
|
|
542
|
-
async function processText(
|
|
543
|
-
inputText: string,
|
|
544
|
-
session: LlamaChatSession,
|
|
545
|
-
): Promise<any> {
|
|
546
|
-
const constraints = buildConstraints();
|
|
547
|
-
const prompt = `Extract all named entities from the following text.${constraints}\n\nText: ${inputText}`;
|
|
548
|
-
|
|
549
|
-
const schema = enableRelations
|
|
550
|
-
? buildRelationsSchema()
|
|
551
|
-
: buildGrammarSchema();
|
|
552
|
-
const grammar = await llama.createGrammarForJsonSchema(schema);
|
|
553
|
-
|
|
554
|
-
const res = await session.prompt(prompt, { grammar });
|
|
555
|
-
|
|
556
|
-
let parsed: any;
|
|
557
|
-
try {
|
|
558
|
-
parsed = grammar.parse(res);
|
|
559
|
-
} catch {
|
|
560
|
-
try {
|
|
561
|
-
parsed = JSON.parse(res.trim());
|
|
562
|
-
} catch {
|
|
563
|
-
console.error(
|
|
564
|
-
"Warning: Failed to parse model output. Raw response:",
|
|
565
|
-
res,
|
|
566
|
-
);
|
|
567
|
-
parsed = enableRelations ? { entities: [], relations: [] } : [];
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
// If taxonomy is used, add taxonomyPath showing full hierarchy
|
|
572
|
-
if (taxonomy && !enableRelations && Array.isArray(parsed)) {
|
|
573
|
-
for (const entity of parsed) {
|
|
574
|
-
if (entity.class && typeof entity.class === "string") {
|
|
575
|
-
entity.taxonomyPath = getTaxonomyPath(entity.class, taxonomy);
|
|
576
|
-
}
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
|
|
580
|
-
return parsed;
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
// === MAIN ===
|
|
584
|
-
const systemPrompt = buildSystemPrompt();
|
|
585
|
-
const compact = opts.compact || !process.stdout.isTTY;
|
|
586
|
-
|
|
587
|
-
const contextSequence = context.getSequence();
|
|
588
|
-
|
|
589
|
-
if (inputTexts.length === 1) {
|
|
590
|
-
const session = new LlamaChatSession({
|
|
591
|
-
contextSequence,
|
|
592
|
-
systemPrompt,
|
|
593
|
-
});
|
|
594
|
-
const result = await processText(inputTexts[0]!, session);
|
|
595
|
-
console.log(JSON.stringify(result, null, compact ? 0 : 2));
|
|
596
|
-
} else {
|
|
597
|
-
// Batch / chunked: process each text, collect results
|
|
598
|
-
const allResults: any[] = [];
|
|
599
|
-
for (const inputText of inputTexts) {
|
|
600
|
-
// Erase context and create fresh session for each input
|
|
601
|
-
await contextSequence.eraseContextTokenRanges([
|
|
602
|
-
{ start: 0, end: contextSequence.nextTokenIndex },
|
|
603
|
-
]);
|
|
604
|
-
const session = new LlamaChatSession({
|
|
605
|
-
contextSequence,
|
|
606
|
-
systemPrompt,
|
|
607
|
-
});
|
|
608
|
-
const result = await processText(inputText, session);
|
|
609
|
-
allResults.push(result);
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
if (opts.file) {
|
|
613
|
-
// Merge chunked results into one
|
|
614
|
-
if (enableRelations) {
|
|
615
|
-
const merged = {
|
|
616
|
-
entities: allResults.flatMap((r) => r.entities || []),
|
|
617
|
-
relations: allResults.flatMap((r) => r.relations || []),
|
|
618
|
-
};
|
|
619
|
-
console.log(JSON.stringify(merged, null, compact ? 0 : 2));
|
|
620
|
-
} else {
|
|
621
|
-
const merged = allResults.flat();
|
|
622
|
-
console.log(JSON.stringify(merged, null, compact ? 0 : 2));
|
|
623
|
-
}
|
|
624
|
-
} else {
|
|
625
|
-
// Batch: output one result per line (JSONL)
|
|
626
|
-
for (const result of allResults) {
|
|
627
|
-
console.log(JSON.stringify(result));
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
// === CLEANUP ===
|
|
633
|
-
// Bun segfaults if process.exit() triggers synchronous native addon unloading.
|
|
634
|
-
// Setting exitCode lets the event loop drain naturally, avoiding the crash.
|
|
635
|
-
process.exitCode = 0;
|