@o-lang/legal-extractor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -0
- package/capability.js +395 -0
- package/index.js +35 -0
- package/package.json +25 -0
- package/resolver.js +52 -0
package/README.md
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# @o-lang/legal-extractor
|
|
2
|
+
|
|
3
|
+
O-Lang resolver for structured legal document extraction.
|
|
4
|
+
|
|
5
|
+
**Extract-only** — never provides legal advice, never predicts outcomes,
|
|
6
|
+
never opines on validity or enforceability.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
npm install @o-lang/legal-extractor
|
|
11
|
+
|
|
12
|
+
## Usage in .ol workflow
|
|
13
|
+
|
|
14
|
+
Workflow "Legal Document Review" with document_ref, jurisdiction, doc_type, document_text
|
|
15
|
+
|
|
16
|
+
Allow resolvers:
|
|
17
|
+
- legal-extractor
|
|
18
|
+
- llm-groq
|
|
19
|
+
|
|
20
|
+
Step 1: Ask legal-extractor "{document_ref}" "{jurisdiction}" "{doc_type}" "{document_text}"
|
|
21
|
+
Save as extracted
|
|
22
|
+
|
|
23
|
+
Step 2: Ask llm-groq "Summarise the key findings from this {doc_type} under {jurisdiction} law.
|
|
24
|
+
Reference: {document_ref}. Extracted data: {extracted.summary}.
|
|
25
|
+
NEVER provide legal advice, speculate on outcomes, or make liability claims."
|
|
26
|
+
Save as response
|
|
27
|
+
|
|
28
|
+
Return response
|
|
29
|
+
|
|
30
|
+
## What it extracts
|
|
31
|
+
|
|
32
|
+
| Field | Description |
|
|
33
|
+
|--------------|--------------------------------------------------|
|
|
34
|
+
| summary | Concise factual summary of the document |
|
|
35
|
+
| parties | Named parties identified in the document |
|
|
36
|
+
| clauses | Key clause types with excerpt |
|
|
37
|
+
| dates | All dates found in the document |
|
|
38
|
+
| obligations | Sentences containing obligation language |
|
|
39
|
+
| risks | Risk flags with severity: high / medium / low |
|
|
40
|
+
| jurisdiction | Resolved jurisdiction label |
|
|
41
|
+
| doc_type | Resolved document type label |
|
|
42
|
+
| word_count | Total word count |
|
|
43
|
+
|
|
44
|
+
## Supported jurisdictions
|
|
45
|
+
|
|
46
|
+
ng, gh, uk, us, za, ke, ug, tz, et, rw, eu, au, ca, in, general
|
|
47
|
+
|
|
48
|
+
## Supported document types
|
|
49
|
+
|
|
50
|
+
nda, contract, agreement, mou, loi, sla, employment, lease, tenancy,
|
|
51
|
+
shareholder, partnership, joint venture, terms, privacy, dpa, ip,
|
|
52
|
+
loan, service, consultancy, distribution, franchise, supply, settlement
|
|
53
|
+
|
|
54
|
+
## Clause types detected (24 total)
|
|
55
|
+
|
|
56
|
+
Confidentiality, Intellectual Property, Non-Compete, Non-Solicitation,
|
|
57
|
+
Indemnification, Limitation of Liability, Governing Law, Dispute Resolution,
|
|
58
|
+
Termination, Force Majeure, Automatic Renewal, Payment Terms,
|
|
59
|
+
Liquidated Damages, Warranty, Warranty Disclaimer, Data Protection,
|
|
60
|
+
Entire Agreement, Severability, Assignment, Exclusivity, Notice,
|
|
61
|
+
Confidential Information, Permitted Disclosure
|
|
62
|
+
|
|
63
|
+
## Risk severity levels
|
|
64
|
+
|
|
65
|
+
- **high** — Non-compete, unlimited liability, IP assignment, liquidated damages
|
|
66
|
+
- **medium** — Indemnification, liability cap, auto-renewal, data protection
|
|
67
|
+
- **low** — Arbitration, governing law, force majeure, confidentiality
|
|
68
|
+
|
|
69
|
+
## O-Lang kernel compliance
|
|
70
|
+
|
|
71
|
+
- resolverName: legal-extractor
|
|
72
|
+
- deterministic: true
|
|
73
|
+
- sideEffects: false
|
|
74
|
+
- requiresNetwork: false
|
|
75
|
+
- extract_only: true
|
|
76
|
+
|
|
77
|
+
## Legal notice
|
|
78
|
+
|
|
79
|
+
This resolver performs automated text extraction only. It does not
|
|
80
|
+
provide legal advice. Always consult a qualified legal professional
|
|
81
|
+
for advice on specific legal documents.
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
MIT — [Your Organisation Name]
|
package/capability.js
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
// capability.js
|
|
2
|
+
//
|
|
3
|
+
// O-Lang Legal Extractor Resolver — capability.js v1.0.0
|
|
4
|
+
//
|
|
5
|
+
// Deterministic, zero-dependency structured extraction from legal documents.
|
|
6
|
+
// Identifies parties, clauses, dates, obligations, and risk flags.
|
|
7
|
+
//
|
|
8
|
+
// EXTRACT-ONLY. This resolver:
|
|
9
|
+
// ✓ Extracts and classifies what IS in the document
|
|
10
|
+
// ✗ Never provides legal advice
|
|
11
|
+
// ✗ Never predicts legal outcomes
|
|
12
|
+
// ✗ Never opines on validity or enforceability
|
|
13
|
+
|
|
14
|
+
'use strict';
|
|
15
|
+
|
|
16
|
+
// ── Jurisdiction map ──────────────────────────────────────────────────────────
|
|
17
|
+
const JURISDICTIONS = {
|
|
18
|
+
'ng': 'Nigeria', 'nigeria': 'Nigeria',
|
|
19
|
+
'gh': 'Ghana', 'ghana': 'Ghana',
|
|
20
|
+
'uk': 'United Kingdom', 'england': 'United Kingdom', 'gb': 'United Kingdom',
|
|
21
|
+
'us': 'United States', 'usa': 'United States',
|
|
22
|
+
'za': 'South Africa', 'south africa': 'South Africa',
|
|
23
|
+
'ke': 'Kenya',
|
|
24
|
+
'ug': 'Uganda',
|
|
25
|
+
'tz': 'Tanzania',
|
|
26
|
+
'et': 'Ethiopia',
|
|
27
|
+
'rw': 'Rwanda',
|
|
28
|
+
'eu': 'European Union',
|
|
29
|
+
'au': 'Australia',
|
|
30
|
+
'ca': 'Canada',
|
|
31
|
+
'in': 'India',
|
|
32
|
+
'general': 'General / Unspecified',
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// ── Document type map ─────────────────────────────────────────────────────────
|
|
36
|
+
const DOC_TYPES = {
|
|
37
|
+
'nda': 'Non-Disclosure Agreement',
|
|
38
|
+
'non-disclosure': 'Non-Disclosure Agreement',
|
|
39
|
+
'confidentiality': 'Confidentiality Agreement',
|
|
40
|
+
'contract': 'Contract',
|
|
41
|
+
'agreement': 'Agreement',
|
|
42
|
+
'mou': 'Memorandum of Understanding',
|
|
43
|
+
'memorandum': 'Memorandum of Understanding',
|
|
44
|
+
'loi': 'Letter of Intent',
|
|
45
|
+
'letter of intent': 'Letter of Intent',
|
|
46
|
+
'sla': 'Service Level Agreement',
|
|
47
|
+
'service level': 'Service Level Agreement',
|
|
48
|
+
'employment': 'Employment Agreement',
|
|
49
|
+
'offer letter': 'Employment Offer Letter',
|
|
50
|
+
'lease': 'Lease Agreement',
|
|
51
|
+
'tenancy': 'Tenancy Agreement',
|
|
52
|
+
'rental': 'Rental Agreement',
|
|
53
|
+
'shareholder': 'Shareholder Agreement',
|
|
54
|
+
'partnership': 'Partnership Agreement',
|
|
55
|
+
'joint venture': 'Joint Venture Agreement',
|
|
56
|
+
'terms': 'Terms and Conditions',
|
|
57
|
+
'terms of service': 'Terms of Service',
|
|
58
|
+
'privacy': 'Privacy Policy',
|
|
59
|
+
'data processing': 'Data Processing Agreement',
|
|
60
|
+
'dpa': 'Data Processing Agreement',
|
|
61
|
+
'ip': 'Intellectual Property Agreement',
|
|
62
|
+
'assignment': 'Assignment Agreement',
|
|
63
|
+
'loan': 'Loan Agreement',
|
|
64
|
+
'facility': 'Facility Agreement',
|
|
65
|
+
'service': 'Service Agreement',
|
|
66
|
+
'consultancy': 'Consultancy Agreement',
|
|
67
|
+
'distribution': 'Distribution Agreement',
|
|
68
|
+
'reseller': 'Reseller Agreement',
|
|
69
|
+
'franchise': 'Franchise Agreement',
|
|
70
|
+
'supply': 'Supply Agreement',
|
|
71
|
+
'purchase': 'Purchase Agreement',
|
|
72
|
+
'sale': 'Sale Agreement',
|
|
73
|
+
'settlement': 'Settlement Agreement',
|
|
74
|
+
'general': 'General Legal Document',
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
// ── Clause type patterns ──────────────────────────────────────────────────────
|
|
78
|
+
const CLAUSE_PATTERNS = [
|
|
79
|
+
{ type: 'Confidentiality', pattern: /confidential(ity)?|non[- ]disclosure|trade\s+secret/gi },
|
|
80
|
+
{ type: 'Intellectual Property', pattern: /intellectual\s+property|ip\s+ownership|copyright|trademark|patent|assign(s|ed)?\s+(all\s+)?rights/gi },
|
|
81
|
+
{ type: 'Non-Compete', pattern: /non[- ]compete|restraint\s+of\s+trade|not\s+to\s+compete/gi },
|
|
82
|
+
{ type: 'Non-Solicitation', pattern: /non[- ]solicit(ation)?|not\s+to\s+solicit/gi },
|
|
83
|
+
{ type: 'Indemnification', pattern: /indemnif(y|ies|ied|ication)|hold\s+harmless/gi },
|
|
84
|
+
{ type: 'Limitation of Liability',pattern: /limitation\s+of\s+liability|limit(ed)?\s+liability|liability\s+cap|not\s+.*\s+liable/gi },
|
|
85
|
+
{ type: 'Governing Law', pattern: /governing\s+law|choice\s+of\s+law|shall\s+be\s+governed\s+by/gi },
|
|
86
|
+
{ type: 'Dispute Resolution', pattern: /arbitration|mediation|dispute\s+resolution|escalation\s+procedure/gi },
|
|
87
|
+
{ type: 'Termination', pattern: /terminat(e|ion|ing)|cancell(ation|ing)|rescission/gi },
|
|
88
|
+
{ type: 'Force Majeure', pattern: /force\s+majeure|act\s+of\s+god|circumstances\s+beyond/gi },
|
|
89
|
+
{ type: 'Automatic Renewal', pattern: /automatic(ally)?\s+renew(s|al|ed)?|evergreen\s+clause|renew\s+unless\s+notice/gi },
|
|
90
|
+
{ type: 'Payment Terms', pattern: /payment\s+terms?|invoice|due\s+date|net\s+\d+|late\s+payment|interest\s+on\s+.*\s+overdue/gi },
|
|
91
|
+
{ type: 'Liquidated Damages', pattern: /liquidated\s+damages|penalty\s+clause|agreed\s+damages/gi },
|
|
92
|
+
{ type: 'Warranty', pattern: /warrant(y|ies|s)|representation\s+and\s+warrant/gi },
|
|
93
|
+
{ type: 'Warranty Disclaimer', pattern: /disclaim(s|er)?.*warrant(y|ies)|no\s+warrant(y|ies)|as\s+is/gi },
|
|
94
|
+
{ type: 'Data Protection', pattern: /personal\s+data|data\s+protection|gdpr|ndpa|popia|data\s+subject|processing\s+of\s+.*\s+data/gi },
|
|
95
|
+
{ type: 'Entire Agreement', pattern: /entire\s+agreement|whole\s+agreement|supersedes\s+all\s+prior/gi },
|
|
96
|
+
{ type: 'Severability', pattern: /severab(le|ility)|if\s+any\s+provision\s+.*\s+invalid/gi },
|
|
97
|
+
{ type: 'Assignment', pattern: /assign(ment|ments|able|ability)|transfer\s+(this|the)\s+agreement/gi },
|
|
98
|
+
{ type: 'Exclusivity', pattern: /exclusiv(e|ity)|sole\s+(right|supplier|distributor)/gi },
|
|
99
|
+
{ type: 'Notice', pattern: /notice\s+(period|requirement|shall\s+be\s+given)|written\s+notice/gi },
|
|
100
|
+
{ type: 'Confidential Information',pattern: /confidential\s+information\s+(means|includes|shall\s+mean)/gi },
|
|
101
|
+
{ type: 'Permitted Disclosure', pattern: /permitted\s+disclosure|may\s+disclose|permitted\s+to\s+disclose/gi },
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
// ── Risk flag patterns ────────────────────────────────────────────────────────
|
|
105
|
+
const RISK_PATTERNS = [
|
|
106
|
+
{ pattern: /unlimited\s+liability/gi, label: 'Unlimited liability exposure identified', severity: 'high' },
|
|
107
|
+
{ pattern: /liquidated\s+damages|penalty\s+clause/gi, label: 'Liquidated damages or penalty clause present', severity: 'high' },
|
|
108
|
+
{ pattern: /non[- ]compete|restraint\s+of\s+trade/gi, label: 'Non-compete or restraint of trade clause present', severity: 'high' },
|
|
109
|
+
{ pattern: /intellectual\s+property.*assign|assign.*intellectual\s+property|assigns?\s+all\s+rights/gi, label: 'Broad IP assignment clause present', severity: 'high' },
|
|
110
|
+
{ pattern: /indemnif(y|ies|ied|ication)/gi, label: 'Indemnification clause present', severity: 'medium' },
|
|
111
|
+
{ pattern: /limitation\s+of\s+liability|liability\s+cap/gi, label: 'Limitation of liability clause present', severity: 'medium' },
|
|
112
|
+
{ pattern: /automatic(ally)?\s+renew|evergreen/gi, label: 'Automatic renewal clause — notice required to exit', severity: 'medium' },
|
|
113
|
+
{ pattern: /terminat(e|ion)\s+(for\s+cause|without\s+cause|at\s+will)/gi, label: 'Unilateral termination rights present', severity: 'medium' },
|
|
114
|
+
{ pattern: /exclusiv(e|ity)/gi, label: 'Exclusivity obligation present', severity: 'medium' },
|
|
115
|
+
{ pattern: /warrant(y|ies|s)\s+(disclaim|waiv|exclud)|as[- ]is/gi, label: 'Warranty disclaimer — no guarantees given', severity: 'medium' },
|
|
116
|
+
{ pattern: /personal\s+data|data\s+protection|gdpr|ndpa|popia/gi, label: 'Data protection obligations present', severity: 'medium' },
|
|
117
|
+
{ pattern: /arbitration/gi, label: 'Disputes referred to arbitration (not courts)', severity: 'low' },
|
|
118
|
+
{ pattern: /governing\s+law|shall\s+be\s+governed\s+by/gi, label: 'Governing law clause present', severity: 'low' },
|
|
119
|
+
{ pattern: /force\s+majeure/gi, label: 'Force majeure clause present', severity: 'low' },
|
|
120
|
+
{ pattern: /confidential(ity)?/gi, label: 'Confidentiality obligations present', severity: 'low' },
|
|
121
|
+
];
|
|
122
|
+
|
|
123
|
+
// ── Obligation keywords ───────────────────────────────────────────────────────
|
|
124
|
+
const OBLIGATION_KEYWORDS = [
|
|
125
|
+
'shall', 'must', 'is required to', 'agrees to', 'undertakes to',
|
|
126
|
+
'is obligated to', 'is liable for', 'covenants to',
|
|
127
|
+
'warrants that', 'represents that', 'will be responsible for',
|
|
128
|
+
'is bound to', 'hereby agrees', 'commits to',
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
// ── Date patterns ─────────────────────────────────────────────────────────────
|
|
132
|
+
const DATE_PATTERNS = [
|
|
133
|
+
/\b(\d{1,2}(?:st|nd|rd|th)?\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4})\b/gi,
|
|
134
|
+
/\b((?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}(?:st|nd|rd|th)?,?\s+\d{4})\b/gi,
|
|
135
|
+
/\b(\d{4}-\d{2}-\d{2})\b/g,
|
|
136
|
+
/\b(\d{1,2}\/\d{1,2}\/\d{2,4})\b/g,
|
|
137
|
+
/\b(\d{1,2}-\d{1,2}-\d{2,4})\b/g,
|
|
138
|
+
];
|
|
139
|
+
|
|
140
|
+
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
function resolveJurisdiction(raw) {
|
|
143
|
+
if (!raw) return 'General / Unspecified';
|
|
144
|
+
const key = raw.trim().toLowerCase();
|
|
145
|
+
return JURISDICTIONS[key] || raw.trim();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function resolveDocType(raw) {
|
|
149
|
+
if (!raw) return 'General Legal Document';
|
|
150
|
+
const key = raw.trim().toLowerCase();
|
|
151
|
+
return DOC_TYPES[key] || raw.trim();
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function extractParties(text) {
|
|
155
|
+
const parties = new Set();
|
|
156
|
+
|
|
157
|
+
// "between X and Y" — most common in contracts
|
|
158
|
+
const betweenMatch = text.match(
|
|
159
|
+
/between\s+([A-Z][A-Za-z\s,\.()&''"–\-]{2,80?})\s+and\s+([A-Z][A-Za-z\s,\.()&''"–\-]{2,80?})(?:\s*[\(,\.\;])/i
|
|
160
|
+
);
|
|
161
|
+
if (betweenMatch) {
|
|
162
|
+
parties.add(betweenMatch[1].replace(/\s+/g, ' ').trim());
|
|
163
|
+
parties.add(betweenMatch[2].replace(/\s+/g, ' ').trim());
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// "X (hereinafter "Y")" or "X (the "Y")"
|
|
167
|
+
const hereinafterMatches = [
|
|
168
|
+
...text.matchAll(
|
|
169
|
+
/([A-Z][A-Za-z\s,\.()&''"–\-]{3,80?})\s*\((?:hereinafter(?:\s+referred\s+to\s+as)?|the)\s*[""]([A-Z][A-Za-z\s]+)[""]\)/gi
|
|
170
|
+
)
|
|
171
|
+
];
|
|
172
|
+
for (const m of hereinafterMatches) {
|
|
173
|
+
parties.add(`${m[1].replace(/\s+/g, ' ').trim()} ("${m[2].trim()}")`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// "Party A" / "Party B" named styles
|
|
177
|
+
const partyLabels = [...text.matchAll(/\b(Party\s+[A-Z\d])\b/g)];
|
|
178
|
+
for (const m of partyLabels) parties.add(m[1]);
|
|
179
|
+
|
|
180
|
+
// Role-based: "the Employer", "the Employee", "the Disclosing Party" etc.
|
|
181
|
+
const roleMatches = [...text.matchAll(
|
|
182
|
+
/\bthe\s+(Employer|Employee|Disclosing\s+Party|Receiving\s+Party|Licensor|Licensee|Buyer|Seller|Supplier|Contractor|Client|Consultant|Lender|Borrower|Landlord|Tenant|Franchisor|Franchisee)\b/g
|
|
183
|
+
)];
|
|
184
|
+
for (const m of roleMatches) parties.add(`the ${m[1]}`);
|
|
185
|
+
|
|
186
|
+
return [...parties].filter(p => p.length > 2).slice(0, 10);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function extractDates(text) {
|
|
190
|
+
const found = new Set();
|
|
191
|
+
for (const pattern of DATE_PATTERNS) {
|
|
192
|
+
const matches = [...text.matchAll(pattern)];
|
|
193
|
+
for (const m of matches) {
|
|
194
|
+
found.add(m[1] || m[0]);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return [...found].slice(0, 20);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function extractClauses(text) {
|
|
201
|
+
const found = [];
|
|
202
|
+
for (const { type, pattern } of CLAUSE_PATTERNS) {
|
|
203
|
+
if (pattern.test(text)) {
|
|
204
|
+
// Find the sentence containing the match
|
|
205
|
+
pattern.lastIndex = 0;
|
|
206
|
+
const match = pattern.exec(text);
|
|
207
|
+
if (match) {
|
|
208
|
+
const start = Math.max(0, match.index - 80);
|
|
209
|
+
const end = Math.min(text.length, match.index + 160);
|
|
210
|
+
const excerpt = text.slice(start, end)
|
|
211
|
+
.replace(/\s+/g, ' ')
|
|
212
|
+
.trim()
|
|
213
|
+
.replace(/^[^A-Z]/, '')
|
|
214
|
+
.slice(0, 200);
|
|
215
|
+
found.push({ type, excerpt: excerpt + (excerpt.length === 200 ? '…' : '') });
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
pattern.lastIndex = 0;
|
|
219
|
+
}
|
|
220
|
+
// Deduplicate by type
|
|
221
|
+
const seen = new Set();
|
|
222
|
+
return found.filter(c => {
|
|
223
|
+
if (seen.has(c.type)) return false;
|
|
224
|
+
seen.add(c.type);
|
|
225
|
+
return true;
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function extractObligations(text) {
|
|
230
|
+
const sentences = text
|
|
231
|
+
.replace(/\r\n/g, '\n')
|
|
232
|
+
.split(/(?<=[.!?])\s+(?=[A-Z])/)
|
|
233
|
+
.map(s => s.replace(/\s+/g, ' ').trim())
|
|
234
|
+
.filter(s => s.length > 20 && s.length < 400);
|
|
235
|
+
|
|
236
|
+
return sentences
|
|
237
|
+
.filter(s => OBLIGATION_KEYWORDS.some(kw => s.toLowerCase().includes(kw)))
|
|
238
|
+
.slice(0, 15);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function extractRisks(text) {
|
|
242
|
+
const found = [];
|
|
243
|
+
for (const { pattern, label, severity } of RISK_PATTERNS) {
|
|
244
|
+
if (pattern.test(text)) {
|
|
245
|
+
found.push({ flag: label, severity });
|
|
246
|
+
}
|
|
247
|
+
pattern.lastIndex = 0;
|
|
248
|
+
}
|
|
249
|
+
// Sort: high → medium → low
|
|
250
|
+
const order = { high: 0, medium: 1, low: 2 };
|
|
251
|
+
return found.sort((a, b) => order[a.severity] - order[b.severity]);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function buildSummary(params) {
|
|
255
|
+
const {
|
|
256
|
+
document_ref, docTypeLabel, jurisdictionLabel,
|
|
257
|
+
wordCount, parties, clauses, dates, obligations, risks,
|
|
258
|
+
} = params;
|
|
259
|
+
|
|
260
|
+
const highRisks = risks.filter(r => r.severity === 'high').length;
|
|
261
|
+
const medRisks = risks.filter(r => r.severity === 'medium').length;
|
|
262
|
+
const clauseList = clauses.slice(0, 5).map(c => c.type).join(', ');
|
|
263
|
+
const partyList = parties.slice(0, 3).join(', ');
|
|
264
|
+
|
|
265
|
+
let summary = `Document reference ${document_ref} is a ${docTypeLabel}`;
|
|
266
|
+
if (jurisdictionLabel !== 'General / Unspecified') {
|
|
267
|
+
summary += ` governed under ${jurisdictionLabel} law`;
|
|
268
|
+
}
|
|
269
|
+
summary += `. The document contains ${wordCount.toLocaleString()} words`;
|
|
270
|
+
if (parties.length > 0) summary += ` and identifies the following parties: ${partyList}`;
|
|
271
|
+
summary += '.';
|
|
272
|
+
if (clauses.length > 0) summary += ` Key clause types identified include: ${clauseList}.`;
|
|
273
|
+
if (dates.length > 0) summary += ` ${dates.length} date reference${dates.length > 1 ? 's' : ''} found.`;
|
|
274
|
+
if (obligations.length > 0) summary += ` ${obligations.length} obligation statement${obligations.length > 1 ? 's' : ''} extracted.`;
|
|
275
|
+
if (highRisks > 0) summary += ` ${highRisks} high-severity risk flag${highRisks > 1 ? 's' : ''} identified.`;
|
|
276
|
+
if (medRisks > 0) summary += ` ${medRisks} medium-severity risk flag${medRisks > 1 ? 's' : ''} identified.`;
|
|
277
|
+
summary += ' This is a factual extraction only. No legal advice is provided.';
|
|
278
|
+
|
|
279
|
+
return summary;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ── Parse action string ───────────────────────────────────────────────────────
|
|
283
|
+
// Action format: legal-extractor "doc_ref" "jurisdiction" "doc_type" "document_text"
|
|
284
|
+
function parseActionArgs(action) {
|
|
285
|
+
const args = [];
|
|
286
|
+
const regex = /"((?:[^"\\]|\\.)*)"/g;
|
|
287
|
+
let match;
|
|
288
|
+
while ((match = regex.exec(action)) !== null) {
|
|
289
|
+
args.push(match[1].replace(/\\"/g, '"'));
|
|
290
|
+
}
|
|
291
|
+
return args;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ── Main resolver ─────────────────────────────────────────────────────────────
|
|
295
|
+
async function resolve(action, context = {}, options = {}) {
|
|
296
|
+
console.log('\n⚖️ LEGAL-EXTRACTOR RESOLVER CALLED');
|
|
297
|
+
console.log(' action preview:', typeof action === 'string' ? action.substring(0, 120) : '[non-string]');
|
|
298
|
+
|
|
299
|
+
try {
|
|
300
|
+
// ── 1. Parse arguments from action string ────────────────────────────
|
|
301
|
+
let document_ref, jurisdiction, doc_type, document_text;
|
|
302
|
+
|
|
303
|
+
if (typeof action === 'string') {
|
|
304
|
+
const args = parseActionArgs(action);
|
|
305
|
+
// Strip "legal-extractor" or "Action legal-extractor" prefix
|
|
306
|
+
const offset = args.length >= 4 ? 0 : 0;
|
|
307
|
+
document_ref = args[offset] || context.document_ref || 'REF-UNKNOWN';
|
|
308
|
+
jurisdiction = args[offset + 1] || context.jurisdiction || 'general';
|
|
309
|
+
doc_type = args[offset + 2] || context.doc_type || 'general';
|
|
310
|
+
document_text = args[offset + 3] || context.document_text || '';
|
|
311
|
+
} else {
|
|
312
|
+
document_ref = context.document_ref || 'REF-UNKNOWN';
|
|
313
|
+
jurisdiction = context.jurisdiction || 'general';
|
|
314
|
+
doc_type = context.doc_type || 'general';
|
|
315
|
+
document_text = context.document_text || '';
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// ── 2. Validate ───────────────────────────────────────────────────────
|
|
319
|
+
if (!document_text || document_text.trim().length < 10) {
|
|
320
|
+
console.warn('[legal-extractor] ⚠️ document_text is empty or too short');
|
|
321
|
+
return {
|
|
322
|
+
summary: 'No document text provided for extraction.',
|
|
323
|
+
parties: [],
|
|
324
|
+
clauses: [],
|
|
325
|
+
dates: [],
|
|
326
|
+
obligations: [],
|
|
327
|
+
risks: [],
|
|
328
|
+
jurisdiction: resolveJurisdiction(jurisdiction),
|
|
329
|
+
doc_type: resolveDocType(doc_type),
|
|
330
|
+
word_count: 0,
|
|
331
|
+
error: 'document_text required',
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const text = document_text.trim();
|
|
336
|
+
const wordCount = text.split(/\s+/).length;
|
|
337
|
+
const jurisdictionLabel = resolveJurisdiction(jurisdiction);
|
|
338
|
+
const docTypeLabel = resolveDocType(doc_type);
|
|
339
|
+
|
|
340
|
+
console.log(`[legal-extractor] Processing: ref=${document_ref} | type=${docTypeLabel} | jurisdiction=${jurisdictionLabel} | words=${wordCount}`);
|
|
341
|
+
|
|
342
|
+
// ── 3. Extract ────────────────────────────────────────────────────────
|
|
343
|
+
const parties = extractParties(text);
|
|
344
|
+
const dates = extractDates(text);
|
|
345
|
+
const clauses = extractClauses(text);
|
|
346
|
+
const obligations = extractObligations(text);
|
|
347
|
+
const risks = extractRisks(text);
|
|
348
|
+
|
|
349
|
+
// ── 4. Build summary ──────────────────────────────────────────────────
|
|
350
|
+
const summary = buildSummary({
|
|
351
|
+
document_ref, docTypeLabel, jurisdictionLabel,
|
|
352
|
+
wordCount, parties, clauses, dates, obligations, risks,
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
console.log(`[legal-extractor] ✅ Extracted: ${parties.length} parties, ${clauses.length} clauses, ${dates.length} dates, ${obligations.length} obligations, ${risks.length} risk flags`);
|
|
356
|
+
|
|
357
|
+
return {
|
|
358
|
+
summary,
|
|
359
|
+
parties,
|
|
360
|
+
clauses,
|
|
361
|
+
dates,
|
|
362
|
+
obligations,
|
|
363
|
+
risks,
|
|
364
|
+
jurisdiction: jurisdictionLabel,
|
|
365
|
+
doc_type: docTypeLabel,
|
|
366
|
+
word_count: wordCount,
|
|
367
|
+
document_ref,
|
|
368
|
+
extracted_at: new Date().toISOString(),
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
} catch (err) {
|
|
372
|
+
console.error('[legal-extractor] 💥 Error:', err.message);
|
|
373
|
+
return {
|
|
374
|
+
summary: `Extraction failed: ${err.message}`,
|
|
375
|
+
parties: [],
|
|
376
|
+
clauses: [],
|
|
377
|
+
dates: [],
|
|
378
|
+
obligations: [],
|
|
379
|
+
risks: [],
|
|
380
|
+
jurisdiction: 'Unknown',
|
|
381
|
+
doc_type: 'Unknown',
|
|
382
|
+
word_count: 0,
|
|
383
|
+
error: err.message,
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// ── Exports ───────────────────────────────────────────────────────────────────
|
|
389
|
+
module.exports = resolve;
|
|
390
|
+
module.exports.extractParties = extractParties;
|
|
391
|
+
module.exports.extractDates = extractDates;
|
|
392
|
+
module.exports.extractClauses = extractClauses;
|
|
393
|
+
module.exports.extractObligations = extractObligations;
|
|
394
|
+
module.exports.extractRisks = extractRisks;
|
|
395
|
+
module.exports.buildSummary = buildSummary;
|
package/index.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// index.js
|
|
2
|
+
// O-Lang resolver — kernel-compliant export
|
|
3
|
+
// Follows the same pattern as all @o-lang/* resolvers
|
|
4
|
+
|
|
5
|
+
const resolve = require('./capability');
|
|
6
|
+
const resolverMeta = require('./resolver');
|
|
7
|
+
|
|
8
|
+
// Attach kernel metadata to the resolver function
|
|
9
|
+
resolve.resolverName = resolverMeta.resolverName; // 'legal-extractor'
|
|
10
|
+
resolve.version = resolverMeta.version;
|
|
11
|
+
resolve.specVersion = resolverMeta.specVersion;
|
|
12
|
+
resolve.inputs = resolverMeta.inputs;
|
|
13
|
+
resolve.outputs = resolverMeta.outputs;
|
|
14
|
+
resolve.exampleAction = resolverMeta.exampleAction;
|
|
15
|
+
resolve.properties = resolverMeta.properties;
|
|
16
|
+
resolve.prohibited_outputs = resolverMeta.prohibited_outputs;
|
|
17
|
+
resolve.documentationUrl = resolverMeta.documentationUrl;
|
|
18
|
+
|
|
19
|
+
// Registry / HTTP server manifest
|
|
20
|
+
resolve.manifest = () => ({
|
|
21
|
+
name: resolverMeta.resolverName,
|
|
22
|
+
version: resolverMeta.version,
|
|
23
|
+
protocol: 'local',
|
|
24
|
+
resolvers: [
|
|
25
|
+
{
|
|
26
|
+
name: 'legal-extractor',
|
|
27
|
+
input: ['document_ref', 'jurisdiction', 'doc_type', 'document_text'],
|
|
28
|
+
output: ['summary', 'parties', 'clauses', 'dates', 'obligations', 'risks'],
|
|
29
|
+
deterministic: true,
|
|
30
|
+
extract_only: true,
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
module.exports = resolve;
|
package/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@o-lang/legal-extractor",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "O-Lang resolver for structured legal document extraction. Extract-only — never provides legal advice.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"o-lang",
|
|
8
|
+
"resolver",
|
|
9
|
+
"legal",
|
|
10
|
+
"contract",
|
|
11
|
+
"nda",
|
|
12
|
+
"extract",
|
|
13
|
+
"governance"
|
|
14
|
+
],
|
|
15
|
+
"author": "[Your Organisation Name]",
|
|
16
|
+
"license": "MIT",
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "https://github.com/O-Lang-Central/resolver-legal-extractor"
|
|
20
|
+
},
|
|
21
|
+
"engines": {
|
|
22
|
+
"node": ">=18.0.0"
|
|
23
|
+
},
|
|
24
|
+
"dependencies": {}
|
|
25
|
+
}
|
package/resolver.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// resolver.js
|
|
2
|
+
// O-Lang resolver metadata — read by the kernel and registry
|
|
3
|
+
|
|
4
|
+
module.exports = {
|
|
5
|
+
resolverName: 'legal-extractor',
|
|
6
|
+
version: '1.0.0',
|
|
7
|
+
specVersion: 'O-Lang/1.1',
|
|
8
|
+
|
|
9
|
+
inputs: [
|
|
10
|
+
{ name: 'document_ref', type: 'string', required: true, description: 'Unique reference ID for the document' },
|
|
11
|
+
{ name: 'jurisdiction', type: 'string', required: false, description: 'Legal jurisdiction (e.g. ng, gh, uk, us)' },
|
|
12
|
+
{ name: 'doc_type', type: 'string', required: false, description: 'Document type (e.g. nda, contract, employment)' },
|
|
13
|
+
{ name: 'document_text', type: 'string', required: true, description: 'Full text content of the legal document' },
|
|
14
|
+
],
|
|
15
|
+
|
|
16
|
+
outputs: [
|
|
17
|
+
{ name: 'summary', type: 'string', description: 'Concise factual summary of the document' },
|
|
18
|
+
{ name: 'parties', type: 'array', description: 'Named parties identified in the document' },
|
|
19
|
+
{ name: 'clauses', type: 'array', description: 'Key clauses identified by type' },
|
|
20
|
+
{ name: 'dates', type: 'array', description: 'All dates found in the document' },
|
|
21
|
+
{ name: 'obligations', type: 'array', description: 'Sentences containing obligation language' },
|
|
22
|
+
{ name: 'risks', type: 'array', description: 'Risk flags with severity levels' },
|
|
23
|
+
{ name: 'jurisdiction', type: 'string', description: 'Resolved jurisdiction label' },
|
|
24
|
+
{ name: 'doc_type', type: 'string', description: 'Resolved document type label' },
|
|
25
|
+
{ name: 'word_count', type: 'number', description: 'Total word count of the document' },
|
|
26
|
+
],
|
|
27
|
+
|
|
28
|
+
exampleAction: 'legal-extractor "REF-001" "ng" "nda" "This Non-Disclosure Agreement is entered into between Acme Ltd and Beta Corp..."',
|
|
29
|
+
|
|
30
|
+
properties: {
|
|
31
|
+
deterministic: true,
|
|
32
|
+
sideEffects: false,
|
|
33
|
+
requiresNetwork: false,
|
|
34
|
+
handlesTemplatedPrompts: true,
|
|
35
|
+
skipSchemaValidation: true,
|
|
36
|
+
extractOnly: true, // kernel hint: this resolver never takes actions
|
|
37
|
+
},
|
|
38
|
+
|
|
39
|
+
// Declared prohibited outputs — the kernel can use these for output validation
|
|
40
|
+
prohibited_outputs: [
|
|
41
|
+
'legal advice',
|
|
42
|
+
'you should sign',
|
|
43
|
+
'you should not sign',
|
|
44
|
+
'this contract is valid',
|
|
45
|
+
'this contract is invalid',
|
|
46
|
+
'you will win',
|
|
47
|
+
'you will lose',
|
|
48
|
+
'liability',
|
|
49
|
+
],
|
|
50
|
+
|
|
51
|
+
documentationUrl: 'https://olang.cloud/registry/legal-extractor',
|
|
52
|
+
};
|