lwazi 1.8.4 → 1.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,33 +1,33 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
2
|
+
"name": "lwazi",
|
|
3
|
+
"version": "1.8.6",
|
|
4
|
+
"description": "Lwazi is an AI assistant for Laravel. Install with one command to add an AI assistant to your Laravel app.",
|
|
5
|
+
"main": "bin/lwazi.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"lwazi": "bin/lwazi.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {},
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "https://github.com/nigelnkomo/lwazi.git"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"laravel",
|
|
16
|
+
"ai",
|
|
17
|
+
"assistant",
|
|
18
|
+
"chatbot",
|
|
19
|
+
"ollama",
|
|
20
|
+
"php"
|
|
21
|
+
],
|
|
22
|
+
"author": "Nigel Nkomo",
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"bugs": {
|
|
25
|
+
"url": "https://github.com/nigelnkomo/lwazi/issues"
|
|
26
|
+
},
|
|
27
|
+
"homepage": "https://github.com/nigelnkomo/lwazi#readme",
|
|
28
|
+
"engines": {
|
|
29
|
+
"node": ">=14.0.0"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {},
|
|
32
|
+
"devDependencies": {}
|
|
33
33
|
}
|
|
@@ -202,7 +202,7 @@ class AnalyzeProjectCommand extends Command
|
|
|
202
202
|
|
|
203
203
|
$manifest['flat'][$fullUrl] = [
|
|
204
204
|
'label' => $text ?: basename($fullUrl),
|
|
205
|
-
'segments' => array_filter(explode('/', parse_url($fullUrl,
|
|
205
|
+
'segments' => array_filter(explode('/', parse_url($fullUrl, PHP_URL_PATH) ?? '')),
|
|
206
206
|
'_path' => $fullUrl,
|
|
207
207
|
'_weight' => 1,
|
|
208
208
|
];
|
|
@@ -210,7 +210,7 @@ class SetupCommand extends Command
|
|
|
210
210
|
|
|
211
211
|
$manifest['flat'][$fullUrl] = [
|
|
212
212
|
'label' => $text ?: basename($fullUrl),
|
|
213
|
-
'segments' => array_filter(explode('/', parse_url($fullUrl,
|
|
213
|
+
'segments' => array_filter(explode('/', parse_url($fullUrl, PHP_URL_PATH) ?? '')),
|
|
214
214
|
'_path' => $fullUrl,
|
|
215
215
|
'_weight' => 1,
|
|
216
216
|
];
|
|
@@ -127,6 +127,9 @@ class NavigationCrawler
|
|
|
127
127
|
if ($weight > ($this->flatIndex[$normalized]['_weight'] ?? 0)) {
|
|
128
128
|
$this->flatIndex[$normalized]['_weight'] = $weight;
|
|
129
129
|
}
|
|
130
|
+
if (!isset($this->linkWeights[$normalized])) {
|
|
131
|
+
$this->linkWeights[$normalized] = 0;
|
|
132
|
+
}
|
|
130
133
|
$this->linkWeights[$normalized] = max($this->linkWeights[$normalized], $weight);
|
|
131
134
|
|
|
132
135
|
if (!isset($this->visited[$normalized])) {
|
|
@@ -38,6 +38,13 @@ class LwaziService
|
|
|
38
38
|
{
|
|
39
39
|
error_log('Lwazi chat: ' . $message);
|
|
40
40
|
|
|
41
|
+
// Extract context from message if user provides identifier
|
|
42
|
+
$extractedContext = $this->extractContextFromMessage($message);
|
|
43
|
+
if ($extractedContext && empty($this->currentContextId)) {
|
|
44
|
+
$this->currentContextId = $extractedContext;
|
|
45
|
+
error_log('Lwazi: extracted context: ' . $extractedContext);
|
|
46
|
+
}
|
|
47
|
+
|
|
41
48
|
// Temporarily disable agent
|
|
42
49
|
// if ($this->agent->isReady()) {
|
|
43
50
|
// return $this->agent->reply($message, $this->currentContextId);
|
|
@@ -100,16 +107,82 @@ class LwaziService
|
|
|
100
107
|
return "I couldn't find information about that. Could you try a different question?";
|
|
101
108
|
}
|
|
102
109
|
|
|
103
|
-
|
|
104
|
-
$
|
|
105
|
-
|
|
106
|
-
$
|
|
107
|
-
|
|
110
|
+
// Check for general website questions BEFORE falling back to LLM
|
|
111
|
+
if ($intent === 'general' || $this->isGeneralWebsiteQuery($message)) {
|
|
112
|
+
// Simple greeting response first
|
|
113
|
+
if (preg_match('/^(hi|hello|hey|good morning|good afternoon|good evening|how are you)\b/i', $message)) {
|
|
114
|
+
$response = "Hello! I'm here to help you navigate this website. You can ask me things like 'where can I find events' or 'what is this website about'. What would you like to know?";
|
|
115
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $response];
|
|
116
|
+
return $response;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
$summary = $this->getWebsiteSummary($message);
|
|
120
|
+
if ($summary) {
|
|
121
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $summary];
|
|
122
|
+
return $summary;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Try content search as fallback
|
|
126
|
+
$contentResponse = $this->searchContent($message);
|
|
127
|
+
if ($contentResponse) {
|
|
128
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
129
|
+
return $contentResponse;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
108
132
|
|
|
109
|
-
$
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
133
|
+
if ($intent === null && $this->currentContextId) {
|
|
134
|
+
$dataResponse = $this->fetchRelevantData($message);
|
|
135
|
+
if ($dataResponse) {
|
|
136
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $dataResponse];
|
|
137
|
+
return $dataResponse;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Try content search as final fallback before LLM
|
|
142
|
+
$contentResponse = $this->searchContent($message);
|
|
143
|
+
if ($contentResponse) {
|
|
144
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
145
|
+
return $contentResponse;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Last resort: check if this could be a content query and show what's available
|
|
149
|
+
if (!$this->looksLikeDataQuery($message)) {
|
|
150
|
+
$navResponse = $this->getNavigationSuggestions($message);
|
|
151
|
+
if ($navResponse) {
|
|
152
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $navResponse];
|
|
153
|
+
return $navResponse;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Try one more content search before LLM fallback
|
|
158
|
+
$contentResponse = $this->searchContent($message);
|
|
159
|
+
if ($contentResponse) {
|
|
160
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
161
|
+
return $contentResponse;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Try navigation suggestions
|
|
165
|
+
$suggestions = $this->getNavigationSuggestions($message);
|
|
166
|
+
if ($suggestions) {
|
|
167
|
+
return $suggestions;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Last resort - try LLM but with timeout protection
|
|
171
|
+
try {
|
|
172
|
+
$prompt = $this->buildPrompt();
|
|
173
|
+
$messages = array_merge(
|
|
174
|
+
[['role' => 'system', 'content' => $prompt]],
|
|
175
|
+
$this->conversationHistory
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
$response = $this->callOllama($messages, 10000); // 10 second timeout
|
|
179
|
+
$clean = $this->sanitizeUserResponse($response['content'] ?? '');
|
|
180
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $clean];
|
|
181
|
+
return $clean;
|
|
182
|
+
} catch (\Throwable $e) {
|
|
183
|
+
// LLM failed, provide helpful fallback
|
|
184
|
+
return "I understand you're looking for something, but I'm having trouble processing that right now. You can try:\n- Asking about specific pages like 'events' or 'stories'\n- Using the navigation menu on the site\n- Contact support if you need immediate assistance.";
|
|
185
|
+
}
|
|
113
186
|
}
|
|
114
187
|
|
|
115
188
|
protected function selectIntentWithLLM(string $message): ?string
|
|
@@ -118,6 +191,10 @@ class LwaziService
|
|
|
118
191
|
return 'data';
|
|
119
192
|
}
|
|
120
193
|
|
|
194
|
+
if ($this->isGeneralWebsiteQuery($message)) {
|
|
195
|
+
return 'general';
|
|
196
|
+
}
|
|
197
|
+
|
|
121
198
|
if ($this->looksLikeNavigationQuery($message) && !$this->looksLikeContentQuery($message)) {
|
|
122
199
|
return 'navigation';
|
|
123
200
|
}
|
|
@@ -128,6 +205,11 @@ class LwaziService
|
|
|
128
205
|
return 'content';
|
|
129
206
|
}
|
|
130
207
|
|
|
208
|
+
// Check for greetings and casual conversation
|
|
209
|
+
if (preg_match('/^(hi|hello|hey|good morning|good afternoon|good evening|how are you|thanks|thank you)\b/i', $msgLower)) {
|
|
210
|
+
return 'general';
|
|
211
|
+
}
|
|
212
|
+
|
|
131
213
|
$prompt =
|
|
132
214
|
"Classify the user's intent as one of: navigation, content, data, general. Return JSON: {\"intent\":\"navigation|content|data|general\"}.\n\n" .
|
|
133
215
|
"- navigation: user wants to find a specific page or link (where, which page, how do i get to)\n" .
|
|
@@ -161,9 +243,11 @@ class LwaziService
|
|
|
161
243
|
$msgLower = strtolower($message);
|
|
162
244
|
|
|
163
245
|
$personalPatterns = [
|
|
164
|
-
'/\bmy\b/', '/\
|
|
246
|
+
'/\bmy\b/', '/\bmine\b/', '/\bmy own\b/',
|
|
165
247
|
'/\bmy account\b/', '/\bmy profile\b/', '/\bmy data\b/',
|
|
166
248
|
'/\bmy records\b/', '/\bmy applications\b/', '/\bmy submissions\b/',
|
|
249
|
+
'/\bmy courses?\b/', '/\bmy results?\b/', '/\bmy fees\b/',
|
|
250
|
+
'/\bmy grades?\b/', '/\bmy profile\b/', '/\bmy information\b/',
|
|
167
251
|
];
|
|
168
252
|
|
|
169
253
|
foreach ($personalPatterns as $pattern) {
|
|
@@ -175,6 +259,48 @@ class LwaziService
|
|
|
175
259
|
return false;
|
|
176
260
|
}
|
|
177
261
|
|
|
262
|
+
protected function isGeneralWebsiteQuery(string $message): bool
|
|
263
|
+
{
|
|
264
|
+
$patterns = [
|
|
265
|
+
'/\bwhat.*this (site|website|app|platform)\b/i',
|
|
266
|
+
'/\babout (this|the) (site|website|app|platform)\b/i',
|
|
267
|
+
'/\bwhat.*(do you|does this).*(offer|provide|have)\b/i',
|
|
268
|
+
'/\btell me about (this|the) (site|website|app)\b/i',
|
|
269
|
+
'/\bwhat is (this|it)\b/i',
|
|
270
|
+
'/\bwhat can (i|you) do (here|on this)\b/i',
|
|
271
|
+
'/\bhow (does|does this) (work|site work)\b/i',
|
|
272
|
+
'/\b(on|about) this (site|website)\b/i',
|
|
273
|
+
'/\bis .* on (this|the) (site|website)\b/i',
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
foreach ($patterns as $pattern) {
|
|
277
|
+
if (preg_match($pattern, $message)) {
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
protected function getWebsiteSummary(string $message): ?string
|
|
286
|
+
{
|
|
287
|
+
// For now, provide a helpful generic response based on navigation
|
|
288
|
+
$tree = $this->ragService->getNavigationTree();
|
|
289
|
+
if ($tree) {
|
|
290
|
+
$flat = $tree->getFlatIndex();
|
|
291
|
+
$pages = array_keys($flat);
|
|
292
|
+
|
|
293
|
+
if (!empty($pages)) {
|
|
294
|
+
$pageList = array_slice($pages, 0, 8);
|
|
295
|
+
$list = implode(', ', array_map(fn($p) => str_replace('-', ' ', trim($p, '/')), $pageList));
|
|
296
|
+
|
|
297
|
+
return "This appears to be a web application with pages like: {$list}. You can navigate to any of these pages, or ask me to help you find specific content. Use the menu or tell me what you're looking for!";
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return "This is a web application. You can use the navigation menu to find pages, or ask me to help you locate specific content. What would you like to find?";
|
|
302
|
+
}
|
|
303
|
+
|
|
178
304
|
protected function looksLikeContentQuery(string $message): bool
|
|
179
305
|
{
|
|
180
306
|
return (bool) preg_match(
|
|
@@ -183,6 +309,26 @@ class LwaziService
|
|
|
183
309
|
);
|
|
184
310
|
}
|
|
185
311
|
|
|
312
|
+
protected function extractContextFromMessage(string $message): ?string
|
|
313
|
+
{
|
|
314
|
+
$contextParam = config('lwazi.context_param', 'context_id');
|
|
315
|
+
|
|
316
|
+
if (str_contains($contextParam, 'student')) {
|
|
317
|
+
if (preg_match('/\b(STU\d+)\b/i', $message, $matches)) {
|
|
318
|
+
return $matches[1];
|
|
319
|
+
}
|
|
320
|
+
if (preg_match('/\b(\d{6,})\b/', $message, $matches)) {
|
|
321
|
+
return $matches[1];
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (preg_match('/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i', $message, $matches)) {
|
|
326
|
+
return strtolower($matches[0]);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
|
|
186
332
|
public function looksLikeNavigationQuery(string $message): bool
|
|
187
333
|
{
|
|
188
334
|
return (bool) preg_match(
|
|
@@ -304,50 +450,197 @@ class LwaziService
|
|
|
304
450
|
$response = "I found some relevant information:\n\n";
|
|
305
451
|
|
|
306
452
|
foreach ($results as $result) {
|
|
307
|
-
$
|
|
308
|
-
|
|
309
|
-
$
|
|
453
|
+
$url = $result['url'] ?? '/';
|
|
454
|
+
// Try to match with navigation for better URL
|
|
455
|
+
$navMatch = $this->getMatchingNavPath($url);
|
|
456
|
+
if ($navMatch) {
|
|
457
|
+
$url = $navMatch;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
$title = $result['title'] ?? basename($url);
|
|
461
|
+
$snippet = $this->cleanSnippet($result['snippet'] ?? '');
|
|
462
|
+
|
|
463
|
+
$response .= "**{$title}**\n";
|
|
464
|
+
if (!empty($snippet)) {
|
|
465
|
+
$response .= "{$snippet}\n";
|
|
466
|
+
}
|
|
467
|
+
$response .= "[View page]({$url})\n\n";
|
|
310
468
|
}
|
|
311
469
|
|
|
312
470
|
return $response;
|
|
313
471
|
}
|
|
314
472
|
|
|
473
|
+
protected function cleanSnippet(string $snippet): string
|
|
474
|
+
{
|
|
475
|
+
// Remove everything that looks like code/CSS
|
|
476
|
+
$lines = preg_split('/[\n;]/', $snippet);
|
|
477
|
+
$textLines = [];
|
|
478
|
+
|
|
479
|
+
foreach ($lines as $line) {
|
|
480
|
+
$line = trim($line);
|
|
481
|
+
// Skip lines that are mostly CSS-like
|
|
482
|
+
if (preg_match('/^[.#@]?[a-z0-9_-]+\s*\{?/', $line)) continue;
|
|
483
|
+
if (preg_match('/^[a-z-]+:\s*[0-9a-z#%,]+;?$/i', $line)) continue;
|
|
484
|
+
if (preg_match('/var\(--/', $line)) continue;
|
|
485
|
+
if (strlen($line) < 10) continue;
|
|
486
|
+
|
|
487
|
+
$letters = preg_match_all('/[a-zA-Z]/', $line);
|
|
488
|
+
if ($letters > strlen($line) * 0.5) {
|
|
489
|
+
$textLines[] = $line;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
if (empty($textLines)) {
|
|
494
|
+
return substr($snippet, 0, 80);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// Join and truncate
|
|
498
|
+
$text = implode('. ', $textLines);
|
|
499
|
+
return substr($text, 0, 120) . (strlen($text) > 120 ? '...' : '');
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
protected function getMatchingNavPath(string $contentUrl): ?string
|
|
503
|
+
{
|
|
504
|
+
$tree = $this->ragService->getNavigationTree();
|
|
505
|
+
if (!$tree) {
|
|
506
|
+
return null;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
$flat = $tree->getFlatIndex();
|
|
510
|
+
|
|
511
|
+
// Try exact match first
|
|
512
|
+
if (isset($flat[$contentUrl])) {
|
|
513
|
+
return $contentUrl;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Try fuzzy match
|
|
517
|
+
$contentSlug = basename($contentUrl);
|
|
518
|
+
foreach ($flat as $path => $entry) {
|
|
519
|
+
$pathSlug = basename($path);
|
|
520
|
+
if (levenshtein(strtolower($contentSlug), strtolower($pathSlug)) <= 3) {
|
|
521
|
+
return $path;
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
return null;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
protected function getNavigationSuggestions(string $message): ?string
|
|
529
|
+
{
|
|
530
|
+
$tree = $this->ragService->getNavigationTree();
|
|
531
|
+
if (!$tree) {
|
|
532
|
+
return null;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
$flat = $tree->getFlatIndex();
|
|
536
|
+
if (empty($flat)) {
|
|
537
|
+
return null;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Try to find partial matches based on message keywords
|
|
541
|
+
$msgLower = strtolower($message);
|
|
542
|
+
$keywords = preg_split('/\s+/', $msgLower);
|
|
543
|
+
$keywords = array_filter($keywords, fn($w) => strlen($w) >= 3);
|
|
544
|
+
|
|
545
|
+
$matched = [];
|
|
546
|
+
foreach ($flat as $path => $entry) {
|
|
547
|
+
$label = strtolower($flat[$path]['label'] ?? '');
|
|
548
|
+
foreach ($keywords as $kw) {
|
|
549
|
+
if (str_contains($label, $kw) || str_contains($path, $kw)) {
|
|
550
|
+
$matched[] = ['path' => $path, 'label' => $flat[$path]['label'] ?? basename($path)];
|
|
551
|
+
break;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// If we found matches, use those
|
|
557
|
+
if (!empty($matched)) {
|
|
558
|
+
$list = [];
|
|
559
|
+
foreach (array_slice($matched, 0, 5) as $m) {
|
|
560
|
+
$list[] = "- {$m['label']}: {$m['path']}";
|
|
561
|
+
}
|
|
562
|
+
return "I couldn't find an exact match, but here are some related pages:\n" . implode("\n", $list);
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// Otherwise show general suggestions
|
|
566
|
+
$pages = array_keys($flat);
|
|
567
|
+
$suggestions = array_slice($pages, 0, 5);
|
|
568
|
+
|
|
569
|
+
if (empty($suggestions)) {
|
|
570
|
+
return null;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
$list = [];
|
|
574
|
+
foreach ($suggestions as $path) {
|
|
575
|
+
$label = $flat[$path]['label'] ?? basename($path);
|
|
576
|
+
$list[] = "- {$label}: {$path}";
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
return "Here are some pages on this site:\n" . implode("\n", $list) . "\n\nYou can navigate to any of these, or ask me to help you find something specific.";
|
|
580
|
+
}
|
|
581
|
+
|
|
315
582
|
protected function extractSearchTerms(string $message): array
|
|
316
583
|
{
|
|
317
|
-
|
|
318
|
-
$topic = $matches[2] ?? '';
|
|
584
|
+
$msgLower = strtolower($message);
|
|
319
585
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
586
|
+
// Extract topic from common question patterns
|
|
587
|
+
$patterns = [
|
|
588
|
+
'/\babout\s+(\w+)/i',
|
|
589
|
+
'/\b(?:is there|are there|can i find)\s+(?:any\s+)?(\w+)/i',
|
|
590
|
+
'/\binfo(?:rmation)?\s+about\s+(\w+)/i',
|
|
591
|
+
'/\btell me (?:about|the)\s+(\w+)/i',
|
|
592
|
+
];
|
|
593
|
+
|
|
594
|
+
$topics = [];
|
|
595
|
+
foreach ($patterns as $pattern) {
|
|
596
|
+
if (preg_match($pattern, $msgLower, $matches)) {
|
|
597
|
+
if (strlen($matches[1]) >= 3) {
|
|
598
|
+
$topics[] = $matches[1];
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
if (empty($topics)) {
|
|
604
|
+
// Look for known keywords in the message
|
|
605
|
+
$keywords = ['alumni', 'event', 'events', 'news', 'scholarship', 'scholarships', 'certificate', 'certificates', 'profile', 'profiles', 'course', 'courses', 'result', 'results', 'fee', 'fees', 'benefit', 'benefits', 'story', 'stories'];
|
|
606
|
+
foreach ($keywords as $kw) {
|
|
607
|
+
if (str_contains($msgLower, $kw)) {
|
|
608
|
+
$topics[] = $kw;
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
if (empty($topics)) {
|
|
614
|
+
// Fall back to extracting significant words
|
|
615
|
+
$words = preg_split('/\s+/', $msgLower);
|
|
616
|
+
$stopWords = ['where', 'can', 'find', 'get', 'look', 'show', 'list', 'tell', 'know', 'want', 'need', 'help', 'give', 'me', 'i', 'is', 'are', 'was', 'were', 'has', 'have', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'what', 'how', 'why', 'there', 'here', 'this', 'that', 'with', 'from', 'your', 'you', 'for', 'about', 'any'];
|
|
323
617
|
foreach ($words as $w) {
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
618
|
+
$clean = preg_replace('/[^a-z]/', '', $w);
|
|
619
|
+
if (strlen($clean) >= 4 && !in_array($clean, $stopWords)) {
|
|
620
|
+
$topics[] = $clean;
|
|
621
|
+
if (count($topics) >= 2) break;
|
|
327
622
|
}
|
|
328
623
|
}
|
|
329
624
|
}
|
|
330
625
|
|
|
331
|
-
if (empty($
|
|
626
|
+
if (empty($topics)) {
|
|
332
627
|
return [];
|
|
333
628
|
}
|
|
334
|
-
|
|
335
|
-
$prompt = "List 5 synonyms for: {$topic}. Return ONLY JSON array like: [\"word1\",\"word2\"]";
|
|
336
|
-
|
|
337
|
-
$response = $this->callOllama([
|
|
338
|
-
['role' => 'system', 'content' => 'Return only valid JSON.'],
|
|
339
|
-
['role' => 'user', 'content' => $prompt],
|
|
340
|
-
]);
|
|
341
|
-
|
|
342
|
-
$json = $this->extractJson($response['content'] ?? '');
|
|
343
|
-
$terms = is_array($json) ? array_values($json) : [];
|
|
344
629
|
|
|
345
|
-
$
|
|
346
|
-
|
|
347
|
-
|
|
630
|
+
$topic = $topics[0];
|
|
631
|
+
|
|
632
|
+
// Build search terms including the original topic
|
|
633
|
+
$terms = array_filter([strtolower($topic)], fn($t) => strlen($t) >= 3);
|
|
634
|
+
|
|
635
|
+
// Add common variations
|
|
636
|
+
$terms[] = strtolower($topic);
|
|
637
|
+
if (str_ends_with($topic, 's')) {
|
|
638
|
+
$terms[] = rtrim($topic, 's');
|
|
639
|
+
} else {
|
|
640
|
+
$terms[] = $topic . 's';
|
|
348
641
|
}
|
|
349
|
-
|
|
350
|
-
return $terms;
|
|
642
|
+
|
|
643
|
+
return array_values(array_unique($terms));
|
|
351
644
|
}
|
|
352
645
|
|
|
353
646
|
protected function findBestRouteWithTerms(array $routes, array $terms): ?string
|
|
@@ -872,10 +1165,10 @@ Remember: You understand this project's structure from the analysis done during
|
|
|
872
1165
|
PROMPT;
|
|
873
1166
|
}
|
|
874
1167
|
|
|
875
|
-
protected function callOllama(array $messages): array
|
|
1168
|
+
protected function callOllama(array $messages, int $timeout = 120): array
|
|
876
1169
|
{
|
|
877
1170
|
try {
|
|
878
|
-
$response = Http::timeout(
|
|
1171
|
+
$response = Http::timeout($timeout)->post("{$this->ollamaUrl}/api/chat", [
|
|
879
1172
|
'model' => $this->model,
|
|
880
1173
|
'messages' => $messages,
|
|
881
1174
|
'stream' => false,
|
|
@@ -887,7 +1180,7 @@ PROMPT;
|
|
|
887
1180
|
|
|
888
1181
|
return ['content' => $response->json()['message']['content'] ?? 'No response'];
|
|
889
1182
|
} catch (\Exception $e) {
|
|
890
|
-
|
|
1183
|
+
throw new \Exception('AI service unavailable: ' . $e->getMessage());
|
|
891
1184
|
}
|
|
892
1185
|
}
|
|
893
1186
|
|
|
@@ -2,17 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
namespace Lwazi\Core\Services;
|
|
4
4
|
|
|
5
|
+
use Illuminate\Support\Facades\Http;
|
|
6
|
+
use Illuminate\Support\Facades\Config;
|
|
7
|
+
|
|
5
8
|
class NavigationTree
|
|
6
9
|
{
|
|
7
10
|
protected array $tree = [];
|
|
8
|
-
protected array $synonyms = [];
|
|
9
11
|
protected array $flatIndex = [];
|
|
10
|
-
protected array $synonymUsage = [];
|
|
11
12
|
protected ?TokenizerService $tokenizer = null;
|
|
12
13
|
protected array $stemmedIndex = [];
|
|
14
|
+
protected string $ollamaUrl;
|
|
15
|
+
protected string $model;
|
|
13
16
|
|
|
14
17
|
public function __construct()
|
|
15
18
|
{
|
|
19
|
+
$this->ollamaUrl = config('lwazi.ollama_url', 'http://localhost:11434');
|
|
20
|
+
$this->model = config('lwazi.model', 'llama3.2:1b');
|
|
16
21
|
}
|
|
17
22
|
|
|
18
23
|
protected function getTokenizer(): TokenizerService
|
|
@@ -23,47 +28,6 @@ class NavigationTree
|
|
|
23
28
|
return $this->tokenizer;
|
|
24
29
|
}
|
|
25
30
|
|
|
26
|
-
protected function loadDefaultSynonyms(): void
|
|
27
|
-
{
|
|
28
|
-
$this->synonyms = [
|
|
29
|
-
'jobs' => [
|
|
30
|
-
'job',
|
|
31
|
-
'job posting',
|
|
32
|
-
'vacancy',
|
|
33
|
-
'vacancies',
|
|
34
|
-
'employment',
|
|
35
|
-
'career',
|
|
36
|
-
'careers',
|
|
37
|
-
'position',
|
|
38
|
-
'positions',
|
|
39
|
-
'work',
|
|
40
|
-
],
|
|
41
|
-
'about' => ['about us', 'information', 'who we are', 'our story'],
|
|
42
|
-
'contact' => ['contact us', 'reach us', 'get in touch', 'support', 'help'],
|
|
43
|
-
'home' => ['homepage', 'main page', 'landing', 'start'],
|
|
44
|
-
'login' => ['sign in', 'log in', 'sign on', 'authenticate'],
|
|
45
|
-
'register' => ['sign up', 'sign on', 'create account', 'join'],
|
|
46
|
-
'profile' => ['account', 'my account', 'settings', 'preferences'],
|
|
47
|
-
'dashboard' => ['panel', 'control panel', 'home'],
|
|
48
|
-
'search' => ['find', 'lookup', 'browse', 'filter'],
|
|
49
|
-
'admin' => ['administration', 'manage', 'management', 'backend'],
|
|
50
|
-
'list' => ['lists', 'listing', 'all', 'view all', 'browse'],
|
|
51
|
-
'create' => ['add', 'new', 'add new', 'submit'],
|
|
52
|
-
'edit' => ['update', 'modify', 'change', 'alter'],
|
|
53
|
-
'delete' => ['remove', 'destroy', 'drop'],
|
|
54
|
-
'show' => ['view', 'display', 'detail', 'details'],
|
|
55
|
-
'store' => ['save', 'persist', 'create'],
|
|
56
|
-
'news' => ['news', 'updates', 'announcements', 'blog', 'articles', 'events'],
|
|
57
|
-
'gallery' => ['photos', 'images', 'pictures', 'media'],
|
|
58
|
-
'faq' => ['questions', 'help', 'support', 'answers', 'guide'],
|
|
59
|
-
'terms' => ['terms', 'conditions', 'terms of service', 'legal'],
|
|
60
|
-
'privacy' => ['privacy policy', 'privacy', 'data protection'],
|
|
61
|
-
'result' => ['grade', 'score', 'mark', 'outcome', 'status', 'academic record', 'transcript', 'performance'],
|
|
62
|
-
'passed' => ['success', 'approved', 'qualified', 'cleared', 'completed'],
|
|
63
|
-
'failed' => ['unsuccessful', 'rejected', 'not qualified', 'failed', 'not passed'],
|
|
64
|
-
];
|
|
65
|
-
}
|
|
66
|
-
|
|
67
31
|
public function buildFromPages(array $pages): void
|
|
68
32
|
{
|
|
69
33
|
$this->tree = [];
|
|
@@ -234,12 +198,10 @@ class NavigationTree
|
|
|
234
198
|
return null;
|
|
235
199
|
}
|
|
236
200
|
|
|
237
|
-
$expandedTokens = $this->expandWithSynonyms($queryTokens);
|
|
238
|
-
|
|
239
201
|
$candidates = [];
|
|
240
202
|
|
|
241
203
|
foreach ($this->flatIndex as $path => $entry) {
|
|
242
|
-
$score = $this->scoreMatch($entry, $queryNormalized, $queryTokens
|
|
204
|
+
$score = $this->scoreMatch($entry, $queryNormalized, $queryTokens);
|
|
243
205
|
if ($score > 0) {
|
|
244
206
|
$candidates[] = [
|
|
245
207
|
'path' => $path,
|
|
@@ -251,6 +213,26 @@ class NavigationTree
|
|
|
251
213
|
|
|
252
214
|
usort($candidates, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
253
215
|
|
|
216
|
+
if (empty($candidates) || $candidates[0]['score'] < 15) {
|
|
217
|
+
$llmSynonyms = $this->generateSynonymsWithLLM($query);
|
|
218
|
+
if (!empty($llmSynonyms)) {
|
|
219
|
+
$allTokens = array_unique(array_merge($queryTokens, $llmSynonyms));
|
|
220
|
+
|
|
221
|
+
foreach ($this->flatIndex as $path => $entry) {
|
|
222
|
+
$score = $this->scoreMatch($entry, $queryNormalized, $allTokens);
|
|
223
|
+
if ($score > 0) {
|
|
224
|
+
$candidates[] = [
|
|
225
|
+
'path' => $path,
|
|
226
|
+
'label' => $entry['label'],
|
|
227
|
+
'score' => $score,
|
|
228
|
+
];
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
usort($candidates, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
254
236
|
if (empty($candidates) || $candidates[0]['score'] < 15) {
|
|
255
237
|
return null;
|
|
256
238
|
}
|
|
@@ -261,9 +243,60 @@ class NavigationTree
|
|
|
261
243
|
public function searchTree(string $query): ?array
|
|
262
244
|
{
|
|
263
245
|
$queryTokens = $this->tokenize($query);
|
|
264
|
-
|
|
246
|
+
|
|
247
|
+
$result = $this->searchTreeRecursive($this->tree, $queryTokens, $queryTokens);
|
|
248
|
+
|
|
249
|
+
if ($result && $result['score'] >= 15) {
|
|
250
|
+
return $result;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
$llmSynonyms = $this->generateSynonymsWithLLM($query);
|
|
254
|
+
if (!empty($llmSynonyms)) {
|
|
255
|
+
$allTokens = array_unique(array_merge($queryTokens, $llmSynonyms));
|
|
256
|
+
$result = $this->searchTreeRecursive($this->tree, $allTokens, $queryTokens);
|
|
257
|
+
if ($result) {
|
|
258
|
+
return $result;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return $result;
|
|
263
|
+
}
|
|
265
264
|
|
|
266
|
-
|
|
265
|
+
protected function generateSynonymsWithLLM(string $query): array
|
|
266
|
+
{
|
|
267
|
+
try {
|
|
268
|
+
$response = Http::timeout(30)->post("{$this->ollamaUrl}/api/chat", [
|
|
269
|
+
'model' => $this->model,
|
|
270
|
+
'messages' => [
|
|
271
|
+
[
|
|
272
|
+
'role' => 'system',
|
|
273
|
+
'content' => 'You are a website navigation assistant. Generate synonyms to help find pages on a website.'
|
|
274
|
+
],
|
|
275
|
+
[
|
|
276
|
+
'role' => 'user',
|
|
277
|
+
'content' => "Generate 10 synonyms, related terms, and variations for searching this website query: \"{$query}\". Return ONLY a JSON array of strings like: [\"synonym1\", \"synonym2\"]"
|
|
278
|
+
]
|
|
279
|
+
],
|
|
280
|
+
'stream' => false,
|
|
281
|
+
]);
|
|
282
|
+
|
|
283
|
+
if (!$response->successful()) {
|
|
284
|
+
return [];
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
$content = $response->json()['message']['content'] ?? '';
|
|
288
|
+
|
|
289
|
+
if (preg_match('/\[[\s\S]*\]/', $content, $matches)) {
|
|
290
|
+
$synonyms = json_decode($matches[0], true);
|
|
291
|
+
if (is_array($synonyms)) {
|
|
292
|
+
return array_map(fn($s) => $this->normalizeText($s), $synonyms);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
} catch (\Exception $e) {
|
|
296
|
+
error_log('NavigationTree LLM synonym generation failed: ' . $e->getMessage());
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return [];
|
|
267
300
|
}
|
|
268
301
|
|
|
269
302
|
protected function searchTreeRecursive(
|
|
@@ -337,7 +370,7 @@ class NavigationTree
|
|
|
337
370
|
array $entry,
|
|
338
371
|
string $queryNormalized,
|
|
339
372
|
array $queryTokens,
|
|
340
|
-
array $expandedTokens,
|
|
373
|
+
array $expandedTokens = [],
|
|
341
374
|
): int {
|
|
342
375
|
$score = 0;
|
|
343
376
|
$labelNormalized = $entry['normalizedLabel'] ?? $this->normalizeText($entry['label'] ?? '');
|
|
@@ -364,22 +397,6 @@ class NavigationTree
|
|
|
364
397
|
return $score;
|
|
365
398
|
}
|
|
366
399
|
|
|
367
|
-
protected function expandWithSynonyms(array $tokens): array
|
|
368
|
-
{
|
|
369
|
-
$expanded = $tokens;
|
|
370
|
-
|
|
371
|
-
foreach ($tokens as $token) {
|
|
372
|
-
foreach ($this->synonyms as $key => $syns) {
|
|
373
|
-
if ($token === $key || in_array($token, $syns, true)) {
|
|
374
|
-
$expanded = array_merge($expanded, $syns);
|
|
375
|
-
break;
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
return array_unique($expanded);
|
|
381
|
-
}
|
|
382
|
-
|
|
383
400
|
protected function fuzzyMatch(string $a, string $b): bool
|
|
384
401
|
{
|
|
385
402
|
$a = strtolower($a);
|
|
@@ -389,6 +406,13 @@ class NavigationTree
|
|
|
389
406
|
return true;
|
|
390
407
|
}
|
|
391
408
|
|
|
409
|
+
if (str_contains($a, $b) || str_contains($b, $a)) {
|
|
410
|
+
$minLen = min(strlen($a), strlen($b));
|
|
411
|
+
if ($minLen > 0 && abs(strlen($a) - strlen($b)) <= 2) {
|
|
412
|
+
return true;
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
392
416
|
$commonPrefix = 0;
|
|
393
417
|
$minLen = min(strlen($a), strlen($b));
|
|
394
418
|
for ($i = 0; $i < $minLen; $i++) {
|
|
@@ -399,7 +423,7 @@ class NavigationTree
|
|
|
399
423
|
}
|
|
400
424
|
}
|
|
401
425
|
|
|
402
|
-
if ($commonPrefix <
|
|
426
|
+
if ($commonPrefix < 2) {
|
|
403
427
|
return false;
|
|
404
428
|
}
|
|
405
429
|
|
|
@@ -409,7 +433,11 @@ class NavigationTree
|
|
|
409
433
|
return false;
|
|
410
434
|
}
|
|
411
435
|
|
|
412
|
-
|
|
436
|
+
if ($maxLen < 5 && $lev <= 1) {
|
|
437
|
+
return true;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
return $lev / $maxLen < 0.35;
|
|
413
441
|
}
|
|
414
442
|
|
|
415
443
|
protected function normalizeText(string $text): string
|
|
@@ -480,43 +508,6 @@ class NavigationTree
|
|
|
480
508
|
return $this->flatIndex;
|
|
481
509
|
}
|
|
482
510
|
|
|
483
|
-
public function recordSynonymUsage(array $keywords): void
|
|
484
|
-
{
|
|
485
|
-
foreach ($keywords as $keyword) {
|
|
486
|
-
$key = strtolower(trim($keyword));
|
|
487
|
-
if (!isset($this->synonymUsage[$key])) {
|
|
488
|
-
$this->synonymUsage[$key] = 0;
|
|
489
|
-
}
|
|
490
|
-
$this->synonymUsage[$key]++;
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
public function getTopSynonyms(int $limit = 20): array
|
|
495
|
-
{
|
|
496
|
-
arsort($this->synonymUsage);
|
|
497
|
-
return array_slice($this->synonymUsage, 0, $limit, true);
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
public function saveSynonymCache(string $path): void
|
|
501
|
-
{
|
|
502
|
-
$data = [
|
|
503
|
-
'synonym_usage' => $this->synonymUsage,
|
|
504
|
-
'saved_at' => date('c'),
|
|
505
|
-
];
|
|
506
|
-
if (!is_dir(dirname($path))) {
|
|
507
|
-
mkdir(dirname($path), 0755, true);
|
|
508
|
-
}
|
|
509
|
-
file_put_contents($path, json_encode($data, JSON_PRETTY_PRINT));
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
public function loadSynonymCache(string $path): void
|
|
513
|
-
{
|
|
514
|
-
if (file_exists($path)) {
|
|
515
|
-
$data = json_decode(file_get_contents($path), true);
|
|
516
|
-
$this->synonymUsage = $data['synonym_usage'] ?? [];
|
|
517
|
-
}
|
|
518
|
-
}
|
|
519
|
-
|
|
520
511
|
public function buildStemmedIndex(): void
|
|
521
512
|
{
|
|
522
513
|
$tokenizer = $this->getTokenizer();
|
|
@@ -576,7 +567,6 @@ class NavigationTree
|
|
|
576
567
|
return [
|
|
577
568
|
'tree' => $this->tree,
|
|
578
569
|
'flat' => $this->flatIndex,
|
|
579
|
-
'synonyms' => $this->synonyms,
|
|
580
570
|
'stemmed' => $this->stemmedIndex,
|
|
581
571
|
];
|
|
582
572
|
}
|
|
@@ -586,7 +576,6 @@ class NavigationTree
|
|
|
586
576
|
$instance = new self();
|
|
587
577
|
$instance->tree = $data['tree'] ?? [];
|
|
588
578
|
$instance->flatIndex = $data['flat'] ?? [];
|
|
589
|
-
$instance->synonyms = $data['synonyms'] ?? $instance->synonyms;
|
|
590
579
|
$instance->stemmedIndex = $data['stemmed'] ?? [];
|
|
591
580
|
return $instance;
|
|
592
581
|
}
|