objectivist-ner 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +226 -241
- package/index.ts +117 -25
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,83 +1,139 @@
|
|
|
1
1
|
# objectivist-ner
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Most Named Entity Recognition tools treat language as a bag of words to be statistically tagged.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
This tool takes a different approach.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
It is built on the Objectivist recognition that concepts are not arbitrary labels — they are integrations of observed reality, formed by identifying essential characteristics and omitting measurements. A valid concept must be grounded in percepts, organized hierarchically, and maintain identity across contexts.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
That is why `objectivist-ner` emphasizes:
|
|
10
10
|
|
|
11
|
-
- Exact
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
- Coreference resolution (group mentions of the same entity)
|
|
17
|
-
- Negation and modality detection
|
|
18
|
-
- Confidence scores
|
|
19
|
-
- Schema definition files for reusable ontologies
|
|
20
|
-
- Long document chunking with `--file`
|
|
21
|
-
- Batch processing with `--batch`
|
|
22
|
-
- Three built-in model tiers: `--fast`, `--balanced`, `--best`
|
|
23
|
-
- Reads from argument, file, or stdin
|
|
24
|
-
- Compact JSON output for non-TTY / piping
|
|
11
|
+
- **Exact entity spans** — because a concept must refer to something specific in reality
|
|
12
|
+
- **Hierarchical classification** — because proper concept formation requires understanding genus and differentia
|
|
13
|
+
- **Negation detection** — because the relationship of a concept to existence is epistemologically essential
|
|
14
|
+
- **Coreference resolution** — because the law of identity demands we recognize the same existent across multiple descriptions
|
|
15
|
+
- **Relations** — because concepts do not exist in isolation, they integrate into propositions
|
|
25
16
|
|
|
26
|
-
|
|
17
|
+
It runs completely locally using a small language model. No API keys. No data leaves your machine.
|
|
18
|
+
|
|
19
|
+
## What Makes This Different
|
|
20
|
+
|
|
21
|
+
### 1. Assertion vs Negation vs Hypothetical
|
|
22
|
+
|
|
23
|
+
> "The patient has diabetes but does not have cancer. He might develop hypertension."
|
|
24
|
+
|
|
25
|
+
**Typical NER** sees three diseases. **objectivist-ner** sees three different relationships to reality:
|
|
27
26
|
|
|
28
27
|
```bash
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
ner --detect-negation "The patient has diabetes but does not have cancer. He might develop hypertension."
|
|
29
|
+
```
|
|
31
30
|
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
```json
|
|
32
|
+
[
|
|
33
|
+
{ "class": "disease", "text": "diabetes", "assertion": "present" },
|
|
34
|
+
{ "class": "disease", "text": "cancer", "assertion": "negated" },
|
|
35
|
+
{ "class": "disease", "text": "hypertension", "assertion": "hypothetical" }
|
|
36
|
+
]
|
|
34
37
|
```
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
The `assertion` field tells you whether the text claims something is **present**, **negated**, or **hypothetical**.
|
|
40
|
+
|
|
41
|
+
### 2. Identity Across References
|
|
42
|
+
|
|
43
|
+
> "Dr. Chen published a paper. She later won the Nobel Prize. The neurologist was celebrated."
|
|
37
44
|
|
|
38
|
-
|
|
45
|
+
**Typical NER** sees three separate people. **objectivist-ner** knows they are the same person:
|
|
39
46
|
|
|
40
47
|
```bash
|
|
41
|
-
|
|
42
|
-
|
|
48
|
+
ner --resolve "Dr. Chen published a paper. She later won the Nobel Prize. The neurologist was celebrated."
|
|
49
|
+
```
|
|
43
50
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
```json
|
|
52
|
+
[
|
|
53
|
+
{
|
|
54
|
+
"class": "person",
|
|
55
|
+
"text": "Dr. Chen",
|
|
56
|
+
"entity_id": "e1",
|
|
57
|
+
"is_canonical": true
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"class": "person",
|
|
61
|
+
"text": "She",
|
|
62
|
+
"entity_id": "e1",
|
|
63
|
+
"is_canonical": false
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"class": "person",
|
|
67
|
+
"text": "The neurologist",
|
|
68
|
+
"entity_id": "e1",
|
|
69
|
+
"is_canonical": false
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"class": "event",
|
|
73
|
+
"text": "the Nobel Prize",
|
|
74
|
+
"entity_id": "e2",
|
|
75
|
+
"is_canonical": true
|
|
76
|
+
}
|
|
77
|
+
]
|
|
48
78
|
```
|
|
49
79
|
|
|
50
|
-
|
|
80
|
+
`entity_id` groups coreferent mentions. `is_canonical` marks the most specific reference.
|
|
51
81
|
|
|
52
|
-
|
|
53
|
-
# Restrict entity classes
|
|
54
|
-
ner "John works at Google" --classes person,organization
|
|
82
|
+
### 3. Hierarchical Classification
|
|
55
83
|
|
|
56
|
-
|
|
57
|
-
ner "Alice is sad in Paris" --attributes emotional_state,location
|
|
84
|
+
Define your ontology as a tree with mixed arrays (leaf nodes) and objects (nested hierarchies):
|
|
58
85
|
|
|
59
|
-
|
|
60
|
-
|
|
86
|
+
```
|
|
87
|
+
organism
|
|
88
|
+
├── person
|
|
89
|
+
└── animal
|
|
90
|
+
├── dog
|
|
91
|
+
└── cat
|
|
92
|
+
|
|
93
|
+
idea
|
|
94
|
+
├── dream
|
|
95
|
+
└── principle
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
ner --taxonomy '{"organism":["person",{"animal":["dog","cat"]}],"idea":["dream","principle"]}' \
|
|
100
|
+
"The child recounted a vivid dream about the golden retriever."
|
|
101
|
+
```
|
|
61
102
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
103
|
+
```json
|
|
104
|
+
[
|
|
105
|
+
{
|
|
106
|
+
"class": "person",
|
|
107
|
+
"text": "The child",
|
|
108
|
+
"taxonomyPath": ["organism", "person"]
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"class": "dream",
|
|
112
|
+
"text": "a vivid dream",
|
|
113
|
+
"taxonomyPath": ["idea", "dream"]
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"class": "dog",
|
|
117
|
+
"text": "the golden retriever",
|
|
118
|
+
"taxonomyPath": ["organism", "animal", "dog"]
|
|
119
|
+
}
|
|
120
|
+
]
|
|
65
121
|
```
|
|
66
122
|
|
|
67
|
-
|
|
123
|
+
The model classifies at the most specific (leaf) level, and `taxonomyPath` preserves the full hierarchy.
|
|
124
|
+
|
|
125
|
+
### 4. Conceptual Integration (Relations)
|
|
68
126
|
|
|
69
127
|
```bash
|
|
70
128
|
ner --relations "Dr. Chen works at MIT and collaborates with Prof. Wright"
|
|
71
129
|
```
|
|
72
130
|
|
|
73
|
-
Output:
|
|
74
|
-
|
|
75
131
|
```json
|
|
76
132
|
{
|
|
77
133
|
"entities": [
|
|
78
|
-
{ "class": "person", "text": "Dr. Chen"
|
|
79
|
-
{ "class": "organization", "text": "MIT"
|
|
80
|
-
{ "class": "person", "text": "Prof. Wright"
|
|
134
|
+
{ "class": "person", "text": "Dr. Chen" },
|
|
135
|
+
{ "class": "organization", "text": "MIT" },
|
|
136
|
+
{ "class": "person", "text": "Prof. Wright" }
|
|
81
137
|
],
|
|
82
138
|
"relations": [
|
|
83
139
|
{ "source": "Dr. Chen", "target": "MIT", "relation": "works at" },
|
|
@@ -90,266 +146,195 @@ Output:
|
|
|
90
146
|
}
|
|
91
147
|
```
|
|
92
148
|
|
|
93
|
-
|
|
149
|
+
Relations show how entities connect — extracting the "connective tissue" between concepts.
|
|
150
|
+
|
|
151
|
+
## Installation
|
|
94
152
|
|
|
95
153
|
```bash
|
|
96
|
-
|
|
154
|
+
bun install -g objectivist-ner
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Quick Start
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# Basic extraction
|
|
161
|
+
ner "the cat is blue and is feeling sad"
|
|
162
|
+
|
|
163
|
+
# Choose quality vs speed
|
|
164
|
+
ner --fast "simple text"
|
|
165
|
+
er --balanced "moderate text"
|
|
166
|
+
er --best "complex text"
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Usage Examples
|
|
170
|
+
|
|
171
|
+
### Constrain entity classes
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
ner "John works at Google" --classes person,organization
|
|
97
175
|
```
|
|
98
176
|
|
|
99
|
-
|
|
177
|
+
```json
|
|
178
|
+
[
|
|
179
|
+
{ "class": "person", "text": "John" },
|
|
180
|
+
{ "class": "organization", "text": "Google" }
|
|
181
|
+
]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Constrain attribute keys
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
ner "Alice is sad in Paris" --attributes emotional_state,location
|
|
188
|
+
```
|
|
100
189
|
|
|
101
190
|
```json
|
|
102
191
|
[
|
|
103
192
|
{
|
|
104
193
|
"class": "person",
|
|
105
|
-
"text": "
|
|
106
|
-
"attributes": {}
|
|
107
|
-
"entity_id": "e1"
|
|
108
|
-
},
|
|
109
|
-
{ "class": "person", "text": "She", "attributes": {}, "entity_id": "e1" },
|
|
110
|
-
{
|
|
111
|
-
"class": "person",
|
|
112
|
-
"text": "The neurologist",
|
|
113
|
-
"attributes": {},
|
|
114
|
-
"entity_id": "e1"
|
|
115
|
-
},
|
|
116
|
-
{
|
|
117
|
-
"class": "event",
|
|
118
|
-
"text": "the Nobel Prize",
|
|
119
|
-
"attributes": {},
|
|
120
|
-
"entity_id": "e2"
|
|
194
|
+
"text": "Alice",
|
|
195
|
+
"attributes": { "emotional_state": "sad", "location": "Paris" }
|
|
121
196
|
}
|
|
122
197
|
]
|
|
123
198
|
```
|
|
124
199
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
### Negation detection
|
|
200
|
+
### Constrain attribute values
|
|
128
201
|
|
|
129
202
|
```bash
|
|
130
|
-
ner
|
|
203
|
+
ner "The sky is blue" --attr-values '{"color":["blue","red","green"]}'
|
|
131
204
|
```
|
|
132
205
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
206
|
+
```json
|
|
207
|
+
[
|
|
208
|
+
{
|
|
209
|
+
"class": "object",
|
|
210
|
+
"text": "sky",
|
|
211
|
+
"attributes": { "color": "blue" }
|
|
212
|
+
}
|
|
213
|
+
]
|
|
137
214
|
```
|
|
138
215
|
|
|
139
|
-
### Schema
|
|
216
|
+
### Schema files
|
|
140
217
|
|
|
141
|
-
Define your ontology
|
|
218
|
+
Define your ontology once and reuse it:
|
|
142
219
|
|
|
143
220
|
```json
|
|
144
221
|
{
|
|
145
222
|
"taxonomy": {
|
|
146
223
|
"organism": ["person", "animal"],
|
|
147
|
-
"
|
|
148
|
-
"institution": ["company", "university", "government_agency"]
|
|
224
|
+
"animal": ["dog", "cat"]
|
|
149
225
|
},
|
|
150
|
-
"attributes": ["role", "
|
|
151
|
-
"relations": ["works_at", "
|
|
226
|
+
"attributes": ["role", "location"],
|
|
227
|
+
"relations": ["works_at", "collaborates_with"]
|
|
152
228
|
}
|
|
153
229
|
```
|
|
154
230
|
|
|
155
231
|
```bash
|
|
156
|
-
ner --schema
|
|
232
|
+
ner --schema ontology.json "Dr. Chen works at MIT"
|
|
157
233
|
```
|
|
158
234
|
|
|
159
|
-
Schema files support `taxonomy`, `classes`, `attributes`, `attrValues`, and `relations`. CLI flags override schema file values.
|
|
160
|
-
|
|
161
235
|
### File and batch processing
|
|
162
236
|
|
|
163
237
|
```bash
|
|
164
238
|
# Process a long document (auto-chunked)
|
|
165
239
|
ner --file document.txt
|
|
166
240
|
|
|
167
|
-
# Process a JSONL file
|
|
241
|
+
# Process a JSONL file
|
|
168
242
|
ner --batch inputs.jsonl
|
|
169
243
|
|
|
170
244
|
# Process a directory of .txt files
|
|
171
245
|
ner --batch ./documents/
|
|
172
246
|
```
|
|
173
247
|
|
|
174
|
-
###
|
|
248
|
+
### Read from stdin
|
|
175
249
|
|
|
176
250
|
```bash
|
|
177
|
-
# Append to the built-in system prompt
|
|
178
|
-
ner "text" --system-prompt-append "Focus only on emotions"
|
|
179
|
-
|
|
180
|
-
# Replace the system prompt entirely
|
|
181
|
-
ner "text" --system-prompt "You are a custom extractor."
|
|
182
|
-
|
|
183
|
-
# Read from stdin
|
|
184
251
|
echo "the cat is blue" | ner
|
|
185
|
-
|
|
186
|
-
# Compact JSON output
|
|
187
|
-
ner "the cat is blue" --compact
|
|
252
|
+
cat article.txt | ner --detect-negation
|
|
188
253
|
```
|
|
189
254
|
|
|
190
|
-
##
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
|
195
|
-
|
|
|
196
|
-
| `--
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
|
205
|
-
|
|
|
206
|
-
| `--
|
|
207
|
-
|
|
|
208
|
-
|
|
|
209
|
-
|
|
|
210
|
-
|
|
|
211
|
-
| `--
|
|
212
|
-
| `--
|
|
213
|
-
| `--
|
|
214
|
-
| `--
|
|
215
|
-
| `--
|
|
216
|
-
| `--
|
|
217
|
-
| `--
|
|
218
|
-
| `--
|
|
219
|
-
| `--
|
|
220
|
-
| `--
|
|
221
|
-
|
|
|
222
|
-
| `--compact` | Output compact JSON (auto-enabled for non-TTY) |
|
|
223
|
-
| `-m, --model <uri>` | Use any GGUF model (see below) |
|
|
255
|
+
## Model Tiers
|
|
256
|
+
|
|
257
|
+
| Flag | Size | Download | Best for |
|
|
258
|
+
| ------------ | ------ | -------- | ------------------------------- |
|
|
259
|
+
| `--fast` | Small | ~0.9 GB | Simple text, single entities |
|
|
260
|
+
| `--balanced` | Medium | ~2.3 GB | Moderate complexity, most tasks |
|
|
261
|
+
| `--best` | Large | ~4.5 GB | Dense text, rare entity types |
|
|
262
|
+
|
|
263
|
+
`--best` is the default. See [Benchmarks](#benchmarks).
|
|
264
|
+
|
|
265
|
+
## Options Reference
|
|
266
|
+
|
|
267
|
+
| Flag | Description |
|
|
268
|
+
| --------------------------------- | -------------------------------------------------- |
|
|
269
|
+
| `--fast` | Use smallest model |
|
|
270
|
+
| `--balanced` | Use mid-size model |
|
|
271
|
+
| `--best` | Use largest model (default) |
|
|
272
|
+
| `-c, --classes <list>` | Allowed entity classes |
|
|
273
|
+
| `-a, --attributes <list>` | Allowed attribute keys |
|
|
274
|
+
| `--attr-values <json>` | Enum map for attribute values |
|
|
275
|
+
| `--taxonomy <json>` | Class hierarchy (parent → children) |
|
|
276
|
+
| `--relations` | Extract relations between entities |
|
|
277
|
+
| `--resolve` | Resolve coreferences (adds entity_id) |
|
|
278
|
+
| `--detect-negation` | Add assertion field (present/negated/hypothetical) |
|
|
279
|
+
| `--include-confidence` | Add confidence field (low/medium/high) |
|
|
280
|
+
| `--schema <path>` | Load schema from JSON file |
|
|
281
|
+
| `--file <path>` | Read from file (with chunking) |
|
|
282
|
+
| `--batch <path>` | Process JSONL file or directory |
|
|
283
|
+
| `--system-prompt <string>` | Replace system prompt |
|
|
284
|
+
| `--system-prompt-append <string>` | Append to system prompt |
|
|
285
|
+
| `--compact` | Compact JSON output |
|
|
286
|
+
| `-m, --model <uri>` | Use custom GGUF model |
|
|
224
287
|
|
|
225
288
|
## Benchmarks
|
|
226
289
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
> "Dr. Maria Chen, a 42-year-old neurologist at Massachusetts General Hospital in Boston, published a groundbreaking paper with her colleague Prof. James Wright from Oxford University about a rare genetic mutation called BRCA3-delta found in 12 patients from rural Bangladesh, while simultaneously consulting for Pfizer on their new drug Nexavion priced at 450 dollars per dose, which the WHO classified as a Category A essential medicine last Tuesday during their Geneva summit"
|
|
230
|
-
|
|
231
|
-
| Entity | `--fast` | `--balanced` | `--best` (default) |
|
|
232
|
-
| ------------------ | ---------- | ------------ | ------------------------ |
|
|
233
|
-
| Dr. Maria Chen | person | person | person |
|
|
234
|
-
| Prof. James Wright | person | person | person, role: colleague |
|
|
235
|
-
| MGH | - | org | org |
|
|
236
|
-
| Oxford University | - | org | org |
|
|
237
|
-
| BRCA3-delta | - | disease | disease |
|
|
238
|
-
| Bangladesh | - | - | location |
|
|
239
|
-
| Pfizer | - | org | org |
|
|
240
|
-
| Nexavion | - | drug | drug, price: 450 dollars |
|
|
241
|
-
| WHO | - | - | org, category: Cat A |
|
|
242
|
-
| Geneva summit | - | event | location |
|
|
243
|
-
| Boston | location | - | location |
|
|
244
|
-
| **Entities found** | **3 / 11** | **8 / 11** | **11 / 11** |
|
|
245
|
-
|
|
246
|
-
All three tiers produce zero hallucinations with the current prompt design.
|
|
247
|
-
|
|
248
|
-
## Epistemological design
|
|
249
|
-
|
|
250
|
-
fastner's feature set is informed by Objectivist epistemology -- the theory that concepts are formed by abstracting essential characteristics from concretes, organized into hierarchical structures, and held in a specific relationship to reality.
|
|
251
|
-
|
|
252
|
-
### Identity: A is A (`--resolve`)
|
|
253
|
-
|
|
254
|
-
The law of identity demands that we track _what a thing is_ across all its references. When a text says "Dr. Chen", "she", and "the neurologist", these are three linguistic expressions of one entity. Without coreference resolution, an NER system treats them as three unrelated extractions -- a failure to maintain identity. `--resolve` enforces that A remains A regardless of how it is named.
|
|
255
|
-
|
|
256
|
-
### Hierarchical concept formation (`--taxonomy`)
|
|
257
|
-
|
|
258
|
-
Objectivist epistemology holds that concepts are organized hierarchically through a process of abstraction. "Cat" is subsumed under "animal", which is subsumed under "organism". Each level retains the essential characteristics of its parent while adding differentia. The `--taxonomy` flag mirrors this structure directly -- you define genus-species relationships between entity classes, and the model classifies at the most specific level it can justify. This isn't just organization; it's how valid concepts are formed.
|
|
259
|
-
|
|
260
|
-
### Distinguishing existence from assertion (`--detect-negation`)
|
|
261
|
-
|
|
262
|
-
A concept must be connected to reality. "The patient has diabetes" and "the patient does not have diabetes" both contain the entity "diabetes", but their relationship to existence is opposite. Naive NER systems that extract "diabetes" from both sentences without distinguishing assertion from negation commit a fundamental error -- they detach the concept from its existential status. `--detect-negation` forces every entity to declare its relationship to reality: present, negated, or hypothetical.
|
|
263
|
-
|
|
264
|
-
### Certainty and the hierarchy of evidence (`--include-confidence`)
|
|
265
|
-
|
|
266
|
-
Knowledge exists on a spectrum from certain to speculative. "Dr. Maria Chen" appearing with a full name and title is a high-confidence extraction. "Someone named Bob" is low-confidence. Objectivism rejects both dogmatism (asserting certainty where none exists) and skepticism (denying certainty where it does). `--include-confidence` makes the epistemic status of each extraction explicit, letting downstream systems apply appropriate thresholds.
|
|
267
|
-
|
|
268
|
-
### Relations as conceptual integration (`--relations`)
|
|
269
|
-
|
|
270
|
-
Entities don't exist in isolation. The relationship "Dr. Chen works at MIT" is not a property of Chen or of MIT alone -- it's a fact about reality that connects two existents. Extracting entities without their relations is like forming concepts without integrating them into propositions. `--relations` extracts the connective tissue between entities, producing a knowledge graph rather than an isolated list.
|
|
271
|
-
|
|
272
|
-
### Schema files as objective definitions (`--schema`)
|
|
290
|
+
Tested on a complex input with 11 entities across 6 classes:
|
|
273
291
|
|
|
274
|
-
|
|
292
|
+
| Entity | `--fast` | `--balanced` | `--best` |
|
|
293
|
+
| ------------------ | -------- | ------------ | --------- |
|
|
294
|
+
| Dr. Maria Chen | person | person | person |
|
|
295
|
+
| Prof. James Wright | person | person | person |
|
|
296
|
+
| MGH | — | org | org |
|
|
297
|
+
| Oxford University | — | org | org |
|
|
298
|
+
| BRCA3-delta | — | disease | disease |
|
|
299
|
+
| Bangladesh | — | — | location |
|
|
300
|
+
| Pfizer | — | org | org |
|
|
301
|
+
| Nexavion | — | drug | drug |
|
|
302
|
+
| WHO | — | — | org |
|
|
303
|
+
| Geneva summit | — | event | location |
|
|
304
|
+
| Boston | location | — | location |
|
|
305
|
+
| **Found** | **3/11** | **8/11** | **11/11** |
|
|
275
306
|
|
|
276
|
-
|
|
307
|
+
## Integration with objectivist-lattice
|
|
277
308
|
|
|
278
|
-
|
|
309
|
+
This tool is designed to work with **[objectivist-lattice](https://github.com/richardanaya/objectivist-lattice)** — a knowledge management system that enforces the Objectivist hierarchy: percepts → concepts → principles → actions.
|
|
279
310
|
|
|
280
|
-
|
|
311
|
+
**objectivist-ner** extracts the percepts and concepts. **objectivist-lattice** validates and organizes them into principles you can act on.
|
|
281
312
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
### The Epistemological Pipeline
|
|
285
|
-
|
|
286
|
-
Objectivism holds that all knowledge begins with **percepts** (raw sensory data), which are integrated into **concepts**, which are organized into **principles** (general truths), which are finally applied as **actions** in specific contexts.
|
|
287
|
-
|
|
288
|
-
`objectivist-lattice` enforces this hierarchy strictly on a filesystem of Markdown files with validation rules:
|
|
289
|
-
|
|
290
|
-
- **Axioms** and **percepts** are bedrock — they have no `reduces_to` links
|
|
291
|
-
- **Principles** must reduce to axioms or percepts
|
|
292
|
-
- **Applications** must reduce to principles
|
|
293
|
-
- Promotion from `Tentative/Hypothesis` to `Integrated/Validated` can only happen bottom-up
|
|
294
|
-
|
|
295
|
-
### How NER Helps Build the Lattice
|
|
296
|
-
|
|
297
|
-
fastner acts as the **percept-to-concept extraction layer** for this system:
|
|
298
|
-
|
|
299
|
-
1. **Percept Extraction** (`--detect-negation`)
|
|
300
|
-
- Identifies concrete entities from source material (books, articles, personal observations)
|
|
301
|
-
- Distinguishes what is asserted as present, negated, or hypothetical
|
|
302
|
-
- Feeds raw perceptual data into the `02-Percepts/` directory
|
|
303
|
-
|
|
304
|
-
2. **Concept Formation** (`--classes`, `--taxonomy`, `--resolve`)
|
|
305
|
-
- Groups multiple mentions of the same entity (`entity_id`)
|
|
306
|
-
- Classifies entities into hierarchical taxonomies (`organism > person > neurologist`)
|
|
307
|
-
- Maintains identity across contexts — "Dr. Chen", "she", and "the neurologist" are recognized as the same existent
|
|
308
|
-
|
|
309
|
-
3. **Principle Discovery** (`--relations`, `--schema`)
|
|
310
|
-
- Extracts relations between entities ("works at", "causes", "implies")
|
|
311
|
-
- Uses schema files to enforce your ontological commitments
|
|
312
|
-
- Surfaces potential principles by showing what consistently reduces to what
|
|
313
|
-
|
|
314
|
-
4. **Action Guidance** (`--include-confidence`)
|
|
315
|
-
- Rates confidence in each extraction
|
|
316
|
-
- Helps distinguish high-certainty principles (suitable for action) from speculative ones (still tentative)
|
|
317
|
-
|
|
318
|
-
### Practical Workflow
|
|
313
|
+
### Workflow
|
|
319
314
|
|
|
320
315
|
```bash
|
|
321
|
-
# Extract
|
|
322
|
-
ner --file chapter1.txt --detect-negation --resolve
|
|
316
|
+
# Extract structured observations from text
|
|
317
|
+
ner --file chapter1.txt --detect-negation --resolve > percepts.json
|
|
323
318
|
|
|
324
|
-
#
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
# Later, when forming principles
|
|
328
|
-
ner --relations --schema ontology.json "text from multiple chapters" > principles.json
|
|
319
|
+
# Import into your knowledge lattice
|
|
320
|
+
# (See objectivist-lattice documentation for details)
|
|
329
321
|
```
|
|
330
322
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
**Percepts → Concepts → Principles → Validated Knowledge → Action**
|
|
323
|
+
## Epistemological Design
|
|
334
324
|
|
|
335
|
-
|
|
325
|
+
Each feature maps to an Objectivist principle:
|
|
336
326
|
|
|
337
|
-
|
|
327
|
+
- **`--resolve`** — The law of identity (A is A)
|
|
328
|
+
- **`--taxonomy`** — Hierarchical concept formation (genus and differentia)
|
|
329
|
+
- **`--detect-negation`** — Grounding concepts in reality (existence vs non-existence)
|
|
330
|
+
- **`--relations`** — Conceptual integration (concepts form connected propositions)
|
|
331
|
+
- **Grammar enforcement** — Non-contradiction (structure prevents invalid values)
|
|
338
332
|
|
|
339
|
-
|
|
333
|
+
## Custom Models
|
|
340
334
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
If the built-in tiers don't fit your needs, you can pass any GGUF model with `--model`. This overrides `--fast`/`--balanced`/`--best`.
|
|
335
|
+
Use any GGUF model:
|
|
344
336
|
|
|
345
337
|
```bash
|
|
346
|
-
# HuggingFace URI
|
|
347
338
|
ner "text" --model "hf:unsloth/Qwen3-8B-GGUF:Qwen3-8B-Q4_K_M.gguf"
|
|
348
|
-
|
|
349
|
-
# Local file
|
|
350
|
-
ner "text" --model ./my-custom-model.gguf
|
|
339
|
+
ner "text" --model ./my-model.gguf
|
|
351
340
|
```
|
|
352
|
-
|
|
353
|
-
## License
|
|
354
|
-
|
|
355
|
-
MIT © Richard Anaya
|
package/index.ts
CHANGED
|
@@ -47,20 +47,100 @@ interface SchemaFile {
|
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
// === TAXONOMY HELPERS ===
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
50
|
+
// Taxonomy format: {"organism": ["person", {"animal": ["dog", "cat"]}], "idea": ["dream", "principle"]}
|
|
51
|
+
// Arrays contain leaf nodes, objects contain nested taxonomies
|
|
52
|
+
|
|
53
|
+
function getLeafNodes(taxonomy: Record<string, any>): string[] {
|
|
54
|
+
const leaves: string[] = [];
|
|
55
|
+
|
|
56
|
+
function traverse(node: any) {
|
|
57
|
+
if (Array.isArray(node)) {
|
|
58
|
+
for (const item of node) {
|
|
59
|
+
if (typeof item === "string") {
|
|
60
|
+
leaves.push(item);
|
|
61
|
+
} else if (typeof item === "object" && item !== null) {
|
|
62
|
+
traverse(item);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
} else if (typeof node === "object" && node !== null) {
|
|
66
|
+
for (const [, value] of Object.entries(node)) {
|
|
67
|
+
traverse(value);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
55
70
|
}
|
|
56
|
-
|
|
71
|
+
|
|
72
|
+
traverse(taxonomy);
|
|
73
|
+
return leaves;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function getTaxonomyPath(
|
|
77
|
+
leaf: string,
|
|
78
|
+
taxonomy: Record<string, any>,
|
|
79
|
+
): string[] {
|
|
80
|
+
function findPath(
|
|
81
|
+
node: any,
|
|
82
|
+
target: string,
|
|
83
|
+
currentPath: string[],
|
|
84
|
+
): string[] | null {
|
|
85
|
+
if (Array.isArray(node)) {
|
|
86
|
+
for (const item of node) {
|
|
87
|
+
if (typeof item === "string" && item === target) {
|
|
88
|
+
return [...currentPath, item];
|
|
89
|
+
} else if (typeof item === "object" && item !== null) {
|
|
90
|
+
const result = findPath(item, target, currentPath);
|
|
91
|
+
if (result) return result;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
} else if (typeof node === "object" && node !== null) {
|
|
95
|
+
for (const [key, value] of Object.entries(node)) {
|
|
96
|
+
const result = findPath(value, target, [...currentPath, key]);
|
|
97
|
+
if (result) return result;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Try each root node
|
|
104
|
+
for (const [rootKey, rootValue] of Object.entries(taxonomy)) {
|
|
105
|
+
const path = findPath(rootValue, leaf, [rootKey]);
|
|
106
|
+
if (path) return path;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return [leaf];
|
|
57
110
|
}
|
|
58
111
|
|
|
59
|
-
function taxonomyToPrompt(taxonomy: Record<string,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
112
|
+
function taxonomyToPrompt(taxonomy: Record<string, any>): string {
|
|
113
|
+
function formatNode(node: any, indent: string): string[] {
|
|
114
|
+
const lines: string[] = [];
|
|
115
|
+
|
|
116
|
+
if (Array.isArray(node)) {
|
|
117
|
+
for (const item of node) {
|
|
118
|
+
if (typeof item === "string") {
|
|
119
|
+
lines.push(`${indent}- ${item}`);
|
|
120
|
+
} else if (typeof item === "object" && item !== null) {
|
|
121
|
+
lines.push(...formatNode(item, indent));
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
} else if (typeof node === "object" && node !== null) {
|
|
125
|
+
for (const [key, value] of Object.entries(node)) {
|
|
126
|
+
lines.push(`${indent}- ${key}`);
|
|
127
|
+
lines.push(...formatNode(value, indent + " "));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return lines;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const lines: string[] = [
|
|
135
|
+
"Use the following class hierarchy. Classify at the most specific (leaf) level:",
|
|
136
|
+
];
|
|
137
|
+
|
|
138
|
+
for (const [rootKey, rootValue] of Object.entries(taxonomy)) {
|
|
139
|
+
lines.push(`- ${rootKey}`);
|
|
140
|
+
lines.push(...formatNode(rootValue, " "));
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return lines.join("\n");
|
|
64
144
|
}
|
|
65
145
|
|
|
66
146
|
// === CHUNKING ===
|
|
@@ -130,17 +210,16 @@ program
|
|
|
130
210
|
"after",
|
|
131
211
|
`
|
|
132
212
|
Examples:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
echo "the cat is blue" | fastner
|
|
213
|
+
ner "the cat is blue"
|
|
214
|
+
ner "John works at Google" --classes person,organization
|
|
215
|
+
ner "sky is blue" --attr-values '{"color":["blue","red"]}'
|
|
216
|
+
ner --relations "Dr. Chen works at MIT"
|
|
217
|
+
ner --resolve "Dr. Chen published a paper. She won an award."
|
|
218
|
+
ner --detect-negation "The patient does not have cancer"
|
|
219
|
+
ner --schema schema.json "complex text"
|
|
220
|
+
ner --file document.txt
|
|
221
|
+
ner --batch inputs.jsonl
|
|
222
|
+
echo "the cat is blue" | ner
|
|
144
223
|
`,
|
|
145
224
|
)
|
|
146
225
|
.parse();
|
|
@@ -293,6 +372,7 @@ function buildSystemPrompt(): string {
|
|
|
293
372
|
}
|
|
294
373
|
if (enableResolve) {
|
|
295
374
|
base += `\n- Every entity has a top-level "entity_id" field. If multiple text spans refer to the same real-world entity (e.g. "Dr. Chen" and "she"), they share the same entity_id. Use short IDs like "e1", "e2".`;
|
|
375
|
+
base += `\n- When multiple mentions share an entity_id, exactly ONE of them must have "is_canonical": true (the most specific reference like a proper name). The others must have "is_canonical": false.`;
|
|
296
376
|
}
|
|
297
377
|
|
|
298
378
|
let prompt = `${base}\n\n${FEW_SHOT_EXAMPLES}`;
|
|
@@ -306,8 +386,9 @@ function buildSystemPrompt(): string {
|
|
|
306
386
|
|
|
307
387
|
// === BUILD GRAMMAR SCHEMA ===
|
|
308
388
|
function buildGrammarSchema() {
|
|
309
|
-
//
|
|
310
|
-
|
|
389
|
+
// When using taxonomy, only allow leaf nodes as valid classes.
|
|
390
|
+
// This forces the model to classify at the most specific level.
|
|
391
|
+
const classEnum = taxonomy ? getLeafNodes(taxonomy) : allowedClasses;
|
|
311
392
|
|
|
312
393
|
const attributesSchema: any = {
|
|
313
394
|
type: "object",
|
|
@@ -343,7 +424,9 @@ function buildGrammarSchema() {
|
|
|
343
424
|
}
|
|
344
425
|
if (enableResolve) {
|
|
345
426
|
properties.entity_id = { type: "string" };
|
|
427
|
+
properties.is_canonical = { type: "boolean" };
|
|
346
428
|
required.push("entity_id");
|
|
429
|
+
required.push("is_canonical");
|
|
347
430
|
}
|
|
348
431
|
|
|
349
432
|
const schema: any = {
|
|
@@ -414,7 +497,7 @@ function buildConstraints(): string {
|
|
|
414
497
|
constraints += `\nEvery entity has a "confidence" field (not in attributes). Example: [{"class":"person","text":"John","confidence":"high","attributes":{}}]`;
|
|
415
498
|
}
|
|
416
499
|
if (enableResolve) {
|
|
417
|
-
constraints += `\nEvery entity has
|
|
500
|
+
constraints += `\nEvery entity has "entity_id" and "is_canonical" fields. Coreferent mentions share entity_id; exactly one per group has is_canonical:true (the most specific reference). Example: [{"class":"person","text":"Dr. Chen","entity_id":"e1","is_canonical":true,"attributes":{}},{"class":"person","text":"She","entity_id":"e1","is_canonical":false,"attributes":{}}]`;
|
|
418
501
|
}
|
|
419
502
|
if (enableRelations) {
|
|
420
503
|
constraints += `\nAlso extract relations between entities. Return {"entities": [...], "relations": [{"source": "entity text", "target": "entity text", "relation": "relation type"}]}.`;
|
|
@@ -456,6 +539,15 @@ async function processText(
|
|
|
456
539
|
}
|
|
457
540
|
}
|
|
458
541
|
|
|
542
|
+
// If taxonomy is used, add taxonomyPath showing full hierarchy
|
|
543
|
+
if (taxonomy && !enableRelations && Array.isArray(parsed)) {
|
|
544
|
+
for (const entity of parsed) {
|
|
545
|
+
if (entity.class && typeof entity.class === "string") {
|
|
546
|
+
entity.taxonomyPath = getTaxonomyPath(entity.class, taxonomy);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
459
551
|
return parsed;
|
|
460
552
|
}
|
|
461
553
|
|