rust-kgdb 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +828 -1255
- package/examples/fraud-detection-agent.js +346 -0
- package/examples/underwriting-agent.js +379 -0
- package/package.json +1 -1
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fraud Detection Agent - Production Example
|
|
3
|
+
*
|
|
4
|
+
* Real-world insurance fraud detection using rust-kgdb:
|
|
5
|
+
* - Knowledge graph for relationship analysis
|
|
6
|
+
* - GraphFrames for network pattern detection
|
|
7
|
+
* - Vector embeddings for semantic similarity
|
|
8
|
+
* - Datalog rules for fraud pattern inference
|
|
9
|
+
*
|
|
10
|
+
* Fraud patterns based on NICB (National Insurance Crime Bureau) data:
|
|
11
|
+
* - Staged accidents (20% of fraud)
|
|
12
|
+
* - Provider collusion (25%)
|
|
13
|
+
* - Ring operations (40%)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const {
|
|
17
|
+
GraphDB,
|
|
18
|
+
GraphFrame,
|
|
19
|
+
EmbeddingService,
|
|
20
|
+
DatalogProgram,
|
|
21
|
+
evaluateDatalog,
|
|
22
|
+
queryDatalog,
|
|
23
|
+
getVersion
|
|
24
|
+
} = require('../index.js')
|
|
25
|
+
|
|
26
|
+
// ============================================
|
|
27
|
+
// STEP 1: Insurance Claims Data (NICB patterns)
|
|
28
|
+
// ============================================
|
|
29
|
+
|
|
30
|
+
const INSURANCE_CLAIMS_TTL = `
|
|
31
|
+
@prefix : <http://insurance.org/> .
|
|
32
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
|
33
|
+
|
|
34
|
+
# === CLAIMS ===
|
|
35
|
+
:CLM001 :amount "18500" ; :type "collision" ; :claimant :P001 ; :provider :PROV001 .
|
|
36
|
+
:CLM002 :amount "22300" ; :type "bodily_injury" ; :claimant :P002 ; :provider :PROV001 .
|
|
37
|
+
:CLM003 :amount "15800" ; :type "collision" ; :claimant :P003 ; :provider :PROV002 .
|
|
38
|
+
:CLM004 :amount "31200" ; :type "total_loss" ; :claimant :P001 ; :provider :PROV001 .
|
|
39
|
+
:CLM005 :amount "8500" ; :type "collision" ; :claimant :P004 ; :provider :PROV003 .
|
|
40
|
+
|
|
41
|
+
# === PAYMENT FLOWS (Fraud Ring Pattern) ===
|
|
42
|
+
:P001 :paidTo :P002 .
|
|
43
|
+
:P002 :paidTo :P003 .
|
|
44
|
+
:P003 :paidTo :P001 .
|
|
45
|
+
|
|
46
|
+
# === RELATIONSHIPS ===
|
|
47
|
+
:P001 :relatedTo :P002 ; :sharedAddress "123 Oak St Miami" .
|
|
48
|
+
:P002 :relatedTo :P003 ; :sharedPhone "305-555-0199" .
|
|
49
|
+
|
|
50
|
+
# === PROVIDERS ===
|
|
51
|
+
:PROV001 :name "Miami Auto Body LLC" ; :avgCost "18500" .
|
|
52
|
+
:PROV002 :name "South FL Collision" ; :avgCost "12200" .
|
|
53
|
+
:PROV003 :name "Sunrise Body Shop" ; :avgCost "7800" .
|
|
54
|
+
`
|
|
55
|
+
|
|
56
|
+
// ============================================
|
|
57
|
+
// STEP 2: Generate Semantic Embeddings
|
|
58
|
+
// ============================================
|
|
59
|
+
|
|
60
|
+
function generateClaimEmbedding(type, amount, riskScore) {
|
|
61
|
+
// 384-dimensional embedding representing claim characteristics
|
|
62
|
+
const embedding = new Array(384).fill(0)
|
|
63
|
+
|
|
64
|
+
// Type encoding (dims 0-63)
|
|
65
|
+
const typeWeights = {
|
|
66
|
+
collision: 0.3,
|
|
67
|
+
bodily_injury: 0.7,
|
|
68
|
+
total_loss: 0.9,
|
|
69
|
+
theft: 0.8
|
|
70
|
+
}
|
|
71
|
+
const typeWeight = typeWeights[type] || 0.5
|
|
72
|
+
for (let i = 0; i < 64; i++) {
|
|
73
|
+
embedding[i] = typeWeight * (0.8 + Math.sin(i * 0.1) * 0.2)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Amount encoding (dims 64-127)
|
|
77
|
+
const normalizedAmount = Math.min(amount / 50000, 1)
|
|
78
|
+
for (let i = 64; i < 128; i++) {
|
|
79
|
+
embedding[i] = normalizedAmount * (0.5 + Math.cos((i - 64) * 0.1) * 0.5)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Risk score encoding (dims 128-191)
|
|
83
|
+
for (let i = 128; i < 192; i++) {
|
|
84
|
+
embedding[i] = riskScore * (0.6 + Math.sin((i - 128) * 0.15) * 0.4)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Pattern features (dims 192-383)
|
|
88
|
+
for (let i = 192; i < 384; i++) {
|
|
89
|
+
embedding[i] = (typeWeight + normalizedAmount + riskScore) / 3 * Math.cos((i - 192) * 0.05)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Normalize to unit vector
|
|
93
|
+
const magnitude = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0))
|
|
94
|
+
if (magnitude > 0) {
|
|
95
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
96
|
+
embedding[i] = embedding[i] / magnitude
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return embedding
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ============================================
|
|
104
|
+
// MAIN FRAUD DETECTION PIPELINE
|
|
105
|
+
// ============================================
|
|
106
|
+
|
|
107
|
+
async function runFraudDetection() {
|
|
108
|
+
console.log('='.repeat(70))
|
|
109
|
+
console.log(' FRAUD DETECTION AGENT - Production Pipeline')
|
|
110
|
+
console.log(' rust-kgdb v' + getVersion() + ' | Neuro-Symbolic AI Framework')
|
|
111
|
+
console.log('='.repeat(70))
|
|
112
|
+
console.log()
|
|
113
|
+
|
|
114
|
+
// ===== PHASE 1: Knowledge Graph =====
|
|
115
|
+
console.log('[PHASE 1] Knowledge Graph Initialization')
|
|
116
|
+
console.log('-'.repeat(50))
|
|
117
|
+
|
|
118
|
+
const db = new GraphDB('http://insurance.org/fraud-kb')
|
|
119
|
+
db.loadTtl(INSURANCE_CLAIMS_TTL, null)
|
|
120
|
+
|
|
121
|
+
console.log(` Graph URI: ${db.getGraphUri()}`)
|
|
122
|
+
console.log(` Triples: ${db.countTriples()}`)
|
|
123
|
+
|
|
124
|
+
// Query claims
|
|
125
|
+
const claims = db.querySelect(`
|
|
126
|
+
PREFIX : <http://insurance.org/>
|
|
127
|
+
SELECT ?claim ?amount ?type ?claimant WHERE {
|
|
128
|
+
?claim :amount ?amount ;
|
|
129
|
+
:type ?type ;
|
|
130
|
+
:claimant ?claimant .
|
|
131
|
+
}
|
|
132
|
+
`)
|
|
133
|
+
|
|
134
|
+
console.log(` Claims found: ${claims.length}`)
|
|
135
|
+
claims.forEach(c => {
|
|
136
|
+
console.log(` - ${c.bindings.claim}: $${c.bindings.amount} (${c.bindings.type}) by ${c.bindings.claimant}`)
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
// Query circular payments
|
|
140
|
+
const circular = db.querySelect(`
|
|
141
|
+
PREFIX : <http://insurance.org/>
|
|
142
|
+
SELECT ?p1 ?p2 ?p3 WHERE {
|
|
143
|
+
?p1 :paidTo ?p2 .
|
|
144
|
+
?p2 :paidTo ?p3 .
|
|
145
|
+
?p3 :paidTo ?p1 .
|
|
146
|
+
}
|
|
147
|
+
`)
|
|
148
|
+
console.log(` Circular payments: ${circular.length} pattern(s) detected`)
|
|
149
|
+
if (circular.length > 0) {
|
|
150
|
+
circular.forEach(p => {
|
|
151
|
+
console.log(` - RING: ${p.bindings.p1} -> ${p.bindings.p2} -> ${p.bindings.p3} -> (cycle)`)
|
|
152
|
+
})
|
|
153
|
+
}
|
|
154
|
+
console.log()
|
|
155
|
+
|
|
156
|
+
// ===== PHASE 2: GraphFrame Analytics =====
|
|
157
|
+
console.log('[PHASE 2] Graph Network Analysis')
|
|
158
|
+
console.log('-'.repeat(50))
|
|
159
|
+
|
|
160
|
+
const vertices = JSON.stringify([
|
|
161
|
+
{ id: 'P001' }, { id: 'P002' }, { id: 'P003' }, { id: 'P004' },
|
|
162
|
+
{ id: 'PROV001' }, { id: 'PROV002' }, { id: 'PROV003' }
|
|
163
|
+
])
|
|
164
|
+
|
|
165
|
+
const edges = JSON.stringify([
|
|
166
|
+
// Payment flows
|
|
167
|
+
{ src: 'P001', dst: 'P002' },
|
|
168
|
+
{ src: 'P002', dst: 'P003' },
|
|
169
|
+
{ src: 'P003', dst: 'P001' },
|
|
170
|
+
// Provider relationships
|
|
171
|
+
{ src: 'P001', dst: 'PROV001' },
|
|
172
|
+
{ src: 'P002', dst: 'PROV001' },
|
|
173
|
+
{ src: 'P003', dst: 'PROV002' },
|
|
174
|
+
{ src: 'P004', dst: 'PROV003' },
|
|
175
|
+
{ src: 'P001', dst: 'PROV001' } // Multiple claims
|
|
176
|
+
])
|
|
177
|
+
|
|
178
|
+
const graph = new GraphFrame(vertices, edges)
|
|
179
|
+
console.log(` Vertices: ${graph.vertexCount()}`)
|
|
180
|
+
console.log(` Edges: ${graph.edgeCount()}`)
|
|
181
|
+
|
|
182
|
+
// Triangle detection (fraud ring indicator)
|
|
183
|
+
const triangles = graph.triangleCount()
|
|
184
|
+
console.log(` Triangles: ${triangles} (fraud ring indicator)`)
|
|
185
|
+
|
|
186
|
+
// PageRank for central actors
|
|
187
|
+
const pr = JSON.parse(graph.pageRank(0.15, 20))
|
|
188
|
+
console.log(' PageRank (central actors):')
|
|
189
|
+
if (pr.ranks) {
|
|
190
|
+
const sorted = Object.entries(pr.ranks).sort((a, b) => b[1] - a[1])
|
|
191
|
+
sorted.slice(0, 4).forEach(([node, score]) => {
|
|
192
|
+
console.log(` - ${node}: ${score.toFixed(4)}`)
|
|
193
|
+
})
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Connected components
|
|
197
|
+
const cc = JSON.parse(graph.connectedComponents())
|
|
198
|
+
console.log(` Connected components: ${JSON.stringify(cc).length > 10 ? 'detected' : 'none'}`)
|
|
199
|
+
console.log()
|
|
200
|
+
|
|
201
|
+
// ===== PHASE 3: Semantic Embeddings =====
|
|
202
|
+
console.log('[PHASE 3] Semantic Similarity Analysis')
|
|
203
|
+
console.log('-'.repeat(50))
|
|
204
|
+
|
|
205
|
+
const embeddings = new EmbeddingService()
|
|
206
|
+
console.log(` Service enabled: ${embeddings.isEnabled()}`)
|
|
207
|
+
|
|
208
|
+
// Store claim embeddings
|
|
209
|
+
const claimData = [
|
|
210
|
+
{ id: 'CLM001', type: 'collision', amount: 18500, risk: 0.75 },
|
|
211
|
+
{ id: 'CLM002', type: 'bodily_injury', amount: 22300, risk: 0.85 },
|
|
212
|
+
{ id: 'CLM003', type: 'collision', amount: 15800, risk: 0.70 },
|
|
213
|
+
{ id: 'CLM004', type: 'total_loss', amount: 31200, risk: 0.95 },
|
|
214
|
+
{ id: 'CLM005', type: 'collision', amount: 8500, risk: 0.25 }
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
claimData.forEach(claim => {
|
|
218
|
+
const vec = generateClaimEmbedding(claim.type, claim.amount, claim.risk)
|
|
219
|
+
embeddings.storeVector(claim.id, vec)
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
console.log(` Embeddings stored: ${claimData.length}`)
|
|
223
|
+
|
|
224
|
+
// Verify embedding storage
|
|
225
|
+
const vec = embeddings.getVector('CLM004')
|
|
226
|
+
console.log(` Vector dimension: ${vec ? vec.length : 'N/A'}`)
|
|
227
|
+
|
|
228
|
+
// Rebuild index for similarity search
|
|
229
|
+
embeddings.rebuildIndex()
|
|
230
|
+
|
|
231
|
+
// Find similar claims to the suspicious high-value one
|
|
232
|
+
const similar = JSON.parse(embeddings.findSimilar('CLM004', 5, 0.3))
|
|
233
|
+
console.log(' Similar to CLM004 (high-risk):')
|
|
234
|
+
similar.filter(s => s.entity !== 'CLM004').forEach(s => {
|
|
235
|
+
const sim = typeof s.similarity === 'number' && !isNaN(s.similarity)
|
|
236
|
+
? (s.similarity * 100).toFixed(1) + '%'
|
|
237
|
+
: 'N/A'
|
|
238
|
+
console.log(` - ${s.entity}: ${sim} similarity`)
|
|
239
|
+
})
|
|
240
|
+
console.log()
|
|
241
|
+
|
|
242
|
+
// ===== PHASE 4: Datalog Reasoning =====
|
|
243
|
+
console.log('[PHASE 4] Datalog Rule-Based Inference')
|
|
244
|
+
console.log('-'.repeat(50))
|
|
245
|
+
|
|
246
|
+
const datalog = new DatalogProgram()
|
|
247
|
+
|
|
248
|
+
// Add facts
|
|
249
|
+
datalog.addFact(JSON.stringify({ predicate: 'claim', terms: ['CLM001', 'P001', 'PROV001'] }))
|
|
250
|
+
datalog.addFact(JSON.stringify({ predicate: 'claim', terms: ['CLM002', 'P002', 'PROV001'] }))
|
|
251
|
+
datalog.addFact(JSON.stringify({ predicate: 'claim', terms: ['CLM003', 'P003', 'PROV002'] }))
|
|
252
|
+
datalog.addFact(JSON.stringify({ predicate: 'claim', terms: ['CLM004', 'P001', 'PROV001'] }))
|
|
253
|
+
datalog.addFact(JSON.stringify({ predicate: 'related', terms: ['P001', 'P002'] }))
|
|
254
|
+
datalog.addFact(JSON.stringify({ predicate: 'related', terms: ['P002', 'P003'] }))
|
|
255
|
+
|
|
256
|
+
console.log(` Facts: ${datalog.factCount()}`)
|
|
257
|
+
|
|
258
|
+
// Rule: Provider collusion detection
|
|
259
|
+
// collusion(P1, P2, Provider) :- claim(_, P1, Provider), claim(_, P2, Provider), related(P1, P2)
|
|
260
|
+
datalog.addRule(JSON.stringify({
|
|
261
|
+
head: { predicate: 'collusion', terms: ['?P1', '?P2', '?Prov'] },
|
|
262
|
+
body: [
|
|
263
|
+
{ predicate: 'claim', terms: ['?C1', '?P1', '?Prov'] },
|
|
264
|
+
{ predicate: 'claim', terms: ['?C2', '?P2', '?Prov'] },
|
|
265
|
+
{ predicate: 'related', terms: ['?P1', '?P2'] }
|
|
266
|
+
]
|
|
267
|
+
}))
|
|
268
|
+
|
|
269
|
+
// Rule: Transitive relationship
|
|
270
|
+
// connected(X, Z) :- related(X, Y), related(Y, Z)
|
|
271
|
+
datalog.addRule(JSON.stringify({
|
|
272
|
+
head: { predicate: 'connected', terms: ['?X', '?Z'] },
|
|
273
|
+
body: [
|
|
274
|
+
{ predicate: 'related', terms: ['?X', '?Y'] },
|
|
275
|
+
{ predicate: 'related', terms: ['?Y', '?Z'] }
|
|
276
|
+
]
|
|
277
|
+
}))
|
|
278
|
+
|
|
279
|
+
console.log(` Rules: ${datalog.ruleCount()}`)
|
|
280
|
+
|
|
281
|
+
// Evaluate
|
|
282
|
+
const result = evaluateDatalog(datalog)
|
|
283
|
+
const parsed = JSON.parse(result)
|
|
284
|
+
|
|
285
|
+
console.log(' Inferred facts:')
|
|
286
|
+
if (parsed.collusion && parsed.collusion.length > 0) {
|
|
287
|
+
console.log(` - Collusion: ${JSON.stringify(parsed.collusion)}`)
|
|
288
|
+
}
|
|
289
|
+
if (parsed.connected && parsed.connected.length > 0) {
|
|
290
|
+
console.log(` - Connected: ${JSON.stringify(parsed.connected)}`)
|
|
291
|
+
}
|
|
292
|
+
console.log()
|
|
293
|
+
|
|
294
|
+
// ===== FINAL REPORT =====
|
|
295
|
+
console.log('='.repeat(70))
|
|
296
|
+
console.log(' FRAUD DETECTION REPORT')
|
|
297
|
+
console.log('='.repeat(70))
|
|
298
|
+
console.log()
|
|
299
|
+
|
|
300
|
+
const riskLevel = triangles > 0 || circular.length > 0 ? 'HIGH' : 'MEDIUM'
|
|
301
|
+
|
|
302
|
+
console.log(' SUMMARY:')
|
|
303
|
+
console.log(` Claims analyzed: ${claims.length}`)
|
|
304
|
+
console.log(` Circular payments: ${circular.length}`)
|
|
305
|
+
console.log(` Network triangles: ${triangles}`)
|
|
306
|
+
console.log(` Provider collusions: ${parsed.collusion ? parsed.collusion.length : 0}`)
|
|
307
|
+
console.log()
|
|
308
|
+
|
|
309
|
+
console.log(' RISK INDICATORS:')
|
|
310
|
+
if (triangles > 0) {
|
|
311
|
+
console.log(' [HIGH] Triangular payment pattern - classic fraud ring')
|
|
312
|
+
}
|
|
313
|
+
if (circular.length > 0) {
|
|
314
|
+
console.log(' [HIGH] Circular payment flow - money laundering pattern')
|
|
315
|
+
}
|
|
316
|
+
if (parsed.collusion && parsed.collusion.length > 0) {
|
|
317
|
+
console.log(' [HIGH] Provider collusion detected - coordinated fraud')
|
|
318
|
+
}
|
|
319
|
+
console.log(' [MEDIUM] Shared provider concentration')
|
|
320
|
+
console.log()
|
|
321
|
+
|
|
322
|
+
console.log(` OVERALL RISK: ${riskLevel}`)
|
|
323
|
+
console.log(' RECOMMENDATION: Refer to SIU for investigation')
|
|
324
|
+
console.log()
|
|
325
|
+
console.log('='.repeat(70))
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
claimsAnalyzed: claims.length,
|
|
329
|
+
circularPayments: circular.length,
|
|
330
|
+
triangles,
|
|
331
|
+
collusions: parsed.collusion ? parsed.collusion.length : 0,
|
|
332
|
+
riskLevel
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Execute
|
|
337
|
+
runFraudDetection()
|
|
338
|
+
.then(result => {
|
|
339
|
+
console.log('\nPipeline completed successfully.')
|
|
340
|
+
console.log('Output:', JSON.stringify(result, null, 2))
|
|
341
|
+
process.exit(0)
|
|
342
|
+
})
|
|
343
|
+
.catch(err => {
|
|
344
|
+
console.error('Pipeline failed:', err.message)
|
|
345
|
+
process.exit(1)
|
|
346
|
+
})
|