lwazi 1.8.3 → 1.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/Console/AnalyzeProjectCommand.php +1 -1
- package/src/Console/BuildContentIndexCommand.php +84 -0
- package/src/Console/SetupCommand.php +1 -1
- package/src/Installer/ProjectAnalyzer.php +15 -1
- package/src/Providers/LwaziServiceProvider.php +2 -0
- package/src/Services/ContentIndexer.php +30 -6
- package/src/Services/KnowledgeBaseGenerator.php +16 -1
- package/src/Services/LwaziService.php +311 -49
- package/src/Services/NavigationTree.php +43 -5
package/package.json
CHANGED
|
@@ -202,7 +202,7 @@ class AnalyzeProjectCommand extends Command
|
|
|
202
202
|
|
|
203
203
|
$manifest['flat'][$fullUrl] = [
|
|
204
204
|
'label' => $text ?: basename($fullUrl),
|
|
205
|
-
'segments' => array_filter(explode('/', parse_url($fullUrl,
|
|
205
|
+
'segments' => array_filter(explode('/', parse_url($fullUrl, PHP_URL_PATH) ?? '')),
|
|
206
206
|
'_path' => $fullUrl,
|
|
207
207
|
'_weight' => 1,
|
|
208
208
|
];
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
<?php
|
|
2
|
+
|
|
3
|
+
namespace Lwazi\Core\Console;
|
|
4
|
+
|
|
5
|
+
use Illuminate\Console\Command;
|
|
6
|
+
use Illuminate\Support\Facades\File;
|
|
7
|
+
use Lwazi\Core\Services\ContentIndexer;
|
|
8
|
+
|
|
9
|
+
class BuildContentIndexCommand extends Command
|
|
10
|
+
{
|
|
11
|
+
protected $signature = 'lwazi:build-content-index';
|
|
12
|
+
protected $description = 'Build content index from blade files for website search';
|
|
13
|
+
|
|
14
|
+
public function handle(): int
|
|
15
|
+
{
|
|
16
|
+
$this->info('Building content index from blade files...');
|
|
17
|
+
|
|
18
|
+
$viewsPath = resource_path('views');
|
|
19
|
+
if (!is_dir($viewsPath)) {
|
|
20
|
+
$this->error('Views directory not found');
|
|
21
|
+
return Command::FAILURE;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
$indexer = new ContentIndexer();
|
|
25
|
+
$this->indexBladeFiles($viewsPath, $indexer);
|
|
26
|
+
|
|
27
|
+
$docCount = $indexer->getDocumentCount();
|
|
28
|
+
$this->info('Indexed ' . $docCount . ' pages');
|
|
29
|
+
|
|
30
|
+
$data = $indexer->toArray();
|
|
31
|
+
$data['generated_at'] = now()->toIso8601String();
|
|
32
|
+
|
|
33
|
+
$path = storage_path('lwazi/content_index.json');
|
|
34
|
+
if (!is_dir(dirname($path))) {
|
|
35
|
+
mkdir(dirname($path), 0755, true);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
file_put_contents($path, json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
|
|
39
|
+
|
|
40
|
+
$this->info("Content index saved to: {$path}");
|
|
41
|
+
|
|
42
|
+
return Command::SUCCESS;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
protected function indexBladeFiles(string $path, ContentIndexer $indexer): void
|
|
46
|
+
{
|
|
47
|
+
$files = File::allFiles($path);
|
|
48
|
+
|
|
49
|
+
foreach ($files as $file) {
|
|
50
|
+
$ext = $file->getExtension();
|
|
51
|
+
if ($ext !== 'blade.php' && $ext !== 'php') {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
$filePath = $file->getPathname();
|
|
56
|
+
$relativePath = str_replace($path . '/', '', $filePath);
|
|
57
|
+
|
|
58
|
+
if (str_contains($relativePath, 'admin/') || str_contains($relativePath, '/admin.')) {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
$content = File::get($filePath);
|
|
63
|
+
|
|
64
|
+
$content = preg_replace('/@php.*?@endphp/s', '', $content);
|
|
65
|
+
$content = preg_replace('/@section.*?@endsection/s', '', $content);
|
|
66
|
+
$content = preg_replace('/@yield.*?(?=\s|$)/', '', $content);
|
|
67
|
+
$content = preg_replace('/\{\{.*?\}\}/', '', $content);
|
|
68
|
+
$content = preg_replace('/\{!!.*?!!\}/', '', $content);
|
|
69
|
+
$content = strip_tags($content);
|
|
70
|
+
$content = preg_replace('/\s+/', ' ', $content);
|
|
71
|
+
$content = trim($content);
|
|
72
|
+
|
|
73
|
+
if (strlen($content) < 20) {
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
$pathName = '/' . str_replace(['.blade.php', '.php', '/'], ['', '', '-'], $relativePath);
|
|
78
|
+
$pathName = preg_replace('/-+/', '-', $pathName);
|
|
79
|
+
$pathName = rtrim($pathName, '-') ?: '/';
|
|
80
|
+
|
|
81
|
+
$indexer->indexPage($pathName, '<html><body><p>' . $content . '</p></body></html>');
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -210,7 +210,7 @@ class SetupCommand extends Command
|
|
|
210
210
|
|
|
211
211
|
$manifest['flat'][$fullUrl] = [
|
|
212
212
|
'label' => $text ?: basename($fullUrl),
|
|
213
|
-
'segments' => array_filter(explode('/', parse_url($fullUrl,
|
|
213
|
+
'segments' => array_filter(explode('/', parse_url($fullUrl, PHP_URL_PATH) ?? '')),
|
|
214
214
|
'_path' => $fullUrl,
|
|
215
215
|
'_weight' => 1,
|
|
216
216
|
];
|
|
@@ -338,8 +338,22 @@ class ProjectAnalyzer
|
|
|
338
338
|
$pages = $this->knowledge['pages'] ?? [];
|
|
339
339
|
$routes = $this->knowledge['routes'] ?? [];
|
|
340
340
|
|
|
341
|
+
$publicRoutes = array_filter($routes, function($r) {
|
|
342
|
+
$uri = $r['uri'] ?? '';
|
|
343
|
+
$name = $r['name'] ?? '';
|
|
344
|
+
|
|
345
|
+
if (str_starts_with($uri, '/admin') || str_starts_with($uri, '/api') || str_starts_with($uri, '/_')) {
|
|
346
|
+
return false;
|
|
347
|
+
}
|
|
348
|
+
if ($name && (str_starts_with($name, 'admin.') || str_starts_with($name, 'api.'))) {
|
|
349
|
+
return false;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return true;
|
|
353
|
+
});
|
|
354
|
+
|
|
341
355
|
$tree->buildFromPages($pages);
|
|
342
|
-
$tree->buildFromRoutes($
|
|
356
|
+
$tree->buildFromRoutes(array_values($publicRoutes));
|
|
343
357
|
|
|
344
358
|
$this->knowledge['navigation_tree'] = $tree->toArray();
|
|
345
359
|
|
|
@@ -13,6 +13,7 @@ use Lwazi\Core\Services\LwaziAgent;
|
|
|
13
13
|
use Lwazi\Core\Console\AnalyzeProjectCommand;
|
|
14
14
|
use Lwazi\Core\Console\LwaziIngestCommand;
|
|
15
15
|
use Lwazi\Core\Console\SetupCommand;
|
|
16
|
+
use Lwazi\Core\Console\BuildContentIndexCommand;
|
|
16
17
|
use Lwazi\Core\Http\Middleware\InjectLwaziChat;
|
|
17
18
|
|
|
18
19
|
class LwaziServiceProvider extends ServiceProvider
|
|
@@ -36,6 +37,7 @@ class LwaziServiceProvider extends ServiceProvider
|
|
|
36
37
|
AnalyzeProjectCommand::class,
|
|
37
38
|
SetupCommand::class,
|
|
38
39
|
LwaziIngestCommand::class,
|
|
40
|
+
BuildContentIndexCommand::class,
|
|
39
41
|
]);
|
|
40
42
|
}
|
|
41
43
|
}
|
|
@@ -184,13 +184,31 @@ class ContentIndexer
|
|
|
184
184
|
|
|
185
185
|
protected function generateSnippet(array $doc, array $terms): string
|
|
186
186
|
{
|
|
187
|
-
|
|
187
|
+
if (!is_array($doc)) {
|
|
188
|
+
return '';
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
$paragraphs = $doc['paragraphs'] ?? null;
|
|
192
|
+
$text = '';
|
|
188
193
|
|
|
189
|
-
if (
|
|
190
|
-
$
|
|
194
|
+
if (is_array($paragraphs) && count($paragraphs) > 0) {
|
|
195
|
+
$first = $paragraphs[0];
|
|
196
|
+
if (is_string($first)) {
|
|
197
|
+
$text = implode(' ', $paragraphs);
|
|
198
|
+
}
|
|
191
199
|
}
|
|
192
200
|
|
|
193
|
-
if (
|
|
201
|
+
if ($text === '') {
|
|
202
|
+
$lists = $doc['lists'] ?? null;
|
|
203
|
+
if (is_array($lists) && count($lists) > 0) {
|
|
204
|
+
$first = $lists[0];
|
|
205
|
+
if (is_string($first)) {
|
|
206
|
+
$text = implode(' ', $lists);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if ($text === '' || !is_string($text)) {
|
|
194
212
|
return '';
|
|
195
213
|
}
|
|
196
214
|
|
|
@@ -199,6 +217,7 @@ class ContentIndexer
|
|
|
199
217
|
$bestLen = PHP_INT_MAX;
|
|
200
218
|
|
|
201
219
|
foreach ($terms as $term) {
|
|
220
|
+
if (!is_string($term)) continue;
|
|
202
221
|
$term = strtolower($term);
|
|
203
222
|
$pos = strpos($text, $term);
|
|
204
223
|
if ($pos !== false && $pos < $bestLen) {
|
|
@@ -218,9 +237,9 @@ class ContentIndexer
|
|
|
218
237
|
$snippet = '...' . $snippet;
|
|
219
238
|
}
|
|
220
239
|
|
|
221
|
-
$snippet = preg_replace('/\s+/', ' ', $snippet);
|
|
240
|
+
$snippet = preg_replace('/\s+/', ' ', $snippet ?? '');
|
|
222
241
|
|
|
223
|
-
return trim($snippet) . '...';
|
|
242
|
+
return (is_string($snippet) ? trim($snippet) : '') . '...';
|
|
224
243
|
}
|
|
225
244
|
|
|
226
245
|
public function toArray(): array
|
|
@@ -233,6 +252,11 @@ class ContentIndexer
|
|
|
233
252
|
];
|
|
234
253
|
}
|
|
235
254
|
|
|
255
|
+
public function getDocumentCount(): int
|
|
256
|
+
{
|
|
257
|
+
return $this->totalDocuments;
|
|
258
|
+
}
|
|
259
|
+
|
|
236
260
|
public static function fromArray(array $data): self
|
|
237
261
|
{
|
|
238
262
|
$indexer = new self();
|
|
@@ -50,8 +50,23 @@ class KnowledgeBaseGenerator
|
|
|
50
50
|
protected function buildNavigationTree(array $pages, array $routes): array
|
|
51
51
|
{
|
|
52
52
|
$tree = new NavigationTree();
|
|
53
|
+
|
|
54
|
+
$publicRoutes = array_filter($routes, function($r) {
|
|
55
|
+
$uri = $r['uri'] ?? '';
|
|
56
|
+
$name = $r['name'] ?? '';
|
|
57
|
+
|
|
58
|
+
if (str_starts_with($uri, '/admin') || str_starts_with($uri, '/api') || str_starts_with($uri, '/_')) {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
if ($name && (str_starts_with($name, 'admin.') || str_starts_with($name, 'api.'))) {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return true;
|
|
66
|
+
});
|
|
67
|
+
|
|
53
68
|
$tree->buildFromPages($pages);
|
|
54
|
-
$tree->buildFromRoutes($
|
|
69
|
+
$tree->buildFromRoutes(array_values($publicRoutes));
|
|
55
70
|
return $tree->toArray();
|
|
56
71
|
}
|
|
57
72
|
|
|
@@ -38,6 +38,13 @@ class LwaziService
|
|
|
38
38
|
{
|
|
39
39
|
error_log('Lwazi chat: ' . $message);
|
|
40
40
|
|
|
41
|
+
// Extract context from message if user provides identifier
|
|
42
|
+
$extractedContext = $this->extractContextFromMessage($message);
|
|
43
|
+
if ($extractedContext && empty($this->currentContextId)) {
|
|
44
|
+
$this->currentContextId = $extractedContext;
|
|
45
|
+
error_log('Lwazi: extracted context: ' . $extractedContext);
|
|
46
|
+
}
|
|
47
|
+
|
|
41
48
|
// Temporarily disable agent
|
|
42
49
|
// if ($this->agent->isReady()) {
|
|
43
50
|
// return $this->agent->reply($message, $this->currentContextId);
|
|
@@ -45,7 +52,7 @@ class LwaziService
|
|
|
45
52
|
|
|
46
53
|
$this->conversationHistory[] = ['role' => 'user', 'content' => $message];
|
|
47
54
|
|
|
48
|
-
if ($this->looksLikeNavigationQuery($message)) {
|
|
55
|
+
if ($this->looksLikeNavigationQuery($message) && !$this->looksLikeDataQuery($message)) {
|
|
49
56
|
error_log('Lwazi: looks like navigation query');
|
|
50
57
|
$nav = $this->findNavigationAnswer($message);
|
|
51
58
|
if ($nav) {
|
|
@@ -53,11 +60,10 @@ class LwaziService
|
|
|
53
60
|
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $nav];
|
|
54
61
|
return $nav;
|
|
55
62
|
}
|
|
56
|
-
$
|
|
57
|
-
if ($
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
return $fallback;
|
|
63
|
+
$contentResponse = $this->searchContent($message);
|
|
64
|
+
if ($contentResponse) {
|
|
65
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
66
|
+
return $contentResponse;
|
|
61
67
|
}
|
|
62
68
|
}
|
|
63
69
|
|
|
@@ -70,30 +76,53 @@ class LwaziService
|
|
|
70
76
|
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $nav];
|
|
71
77
|
return $nav;
|
|
72
78
|
}
|
|
73
|
-
$
|
|
74
|
-
if ($
|
|
75
|
-
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $
|
|
76
|
-
return $
|
|
79
|
+
$contentResponse = $this->searchContent($message);
|
|
80
|
+
if ($contentResponse) {
|
|
81
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
82
|
+
return $contentResponse;
|
|
77
83
|
}
|
|
84
|
+
return "I couldn't find a page matching that. Could you try a different question?";
|
|
78
85
|
}
|
|
79
86
|
|
|
80
|
-
if ($intent === 'content'
|
|
87
|
+
if ($intent === 'content') {
|
|
81
88
|
$contentResponse = $this->searchContent($message);
|
|
82
89
|
if ($contentResponse) {
|
|
83
90
|
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
84
91
|
return $contentResponse;
|
|
85
92
|
}
|
|
93
|
+
return "I couldn't find information about that. Could you try a different question?";
|
|
86
94
|
}
|
|
87
95
|
|
|
88
|
-
if ($intent === '
|
|
89
|
-
$
|
|
90
|
-
if ($
|
|
91
|
-
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $
|
|
92
|
-
return $
|
|
96
|
+
if ($intent === 'data') {
|
|
97
|
+
$dataResponse = $this->fetchRelevantData($message);
|
|
98
|
+
if ($dataResponse) {
|
|
99
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $dataResponse];
|
|
100
|
+
return $dataResponse;
|
|
93
101
|
}
|
|
102
|
+
$contentResponse = $this->searchContent($message);
|
|
103
|
+
if ($contentResponse) {
|
|
104
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
105
|
+
return $contentResponse;
|
|
106
|
+
}
|
|
107
|
+
return "I couldn't find information about that. Could you try a different question?";
|
|
94
108
|
}
|
|
95
109
|
|
|
96
|
-
|
|
110
|
+
// Check for general website questions BEFORE falling back to LLM
|
|
111
|
+
if ($intent === 'general' || $this->isGeneralWebsiteQuery($message)) {
|
|
112
|
+
// Simple greeting response first
|
|
113
|
+
if (preg_match('/^(hi|hello|hey|good morning|good afternoon|good evening|how are you)\b/i', $message)) {
|
|
114
|
+
$response = "Hello! I'm here to help you navigate this website. You can ask me things like 'where can I find events' or 'what is this website about'. What would you like to know?";
|
|
115
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $response];
|
|
116
|
+
return $response;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
$summary = $this->getWebsiteSummary($message);
|
|
120
|
+
if ($summary) {
|
|
121
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $summary];
|
|
122
|
+
return $summary;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Try content search as fallback
|
|
97
126
|
$contentResponse = $this->searchContent($message);
|
|
98
127
|
if ($contentResponse) {
|
|
99
128
|
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
@@ -101,7 +130,7 @@ class LwaziService
|
|
|
101
130
|
}
|
|
102
131
|
}
|
|
103
132
|
|
|
104
|
-
if ($intent ===
|
|
133
|
+
if ($intent === null && $this->currentContextId) {
|
|
105
134
|
$dataResponse = $this->fetchRelevantData($message);
|
|
106
135
|
if ($dataResponse) {
|
|
107
136
|
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $dataResponse];
|
|
@@ -109,6 +138,22 @@ class LwaziService
|
|
|
109
138
|
}
|
|
110
139
|
}
|
|
111
140
|
|
|
141
|
+
// Try content search as final fallback before LLM
|
|
142
|
+
$contentResponse = $this->searchContent($message);
|
|
143
|
+
if ($contentResponse) {
|
|
144
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $contentResponse];
|
|
145
|
+
return $contentResponse;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Last resort: check if this could be a content query and show what's available
|
|
149
|
+
if (!$this->looksLikeDataQuery($message)) {
|
|
150
|
+
$navResponse = $this->getNavigationSuggestions($message);
|
|
151
|
+
if ($navResponse) {
|
|
152
|
+
$this->conversationHistory[] = ['role' => 'assistant', 'content' => $navResponse];
|
|
153
|
+
return $navResponse;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
112
157
|
$prompt = $this->buildPrompt();
|
|
113
158
|
$messages = array_merge(
|
|
114
159
|
[['role' => 'system', 'content' => $prompt]],
|
|
@@ -123,16 +168,35 @@ class LwaziService
|
|
|
123
168
|
|
|
124
169
|
protected function selectIntentWithLLM(string $message): ?string
|
|
125
170
|
{
|
|
126
|
-
if ($this->
|
|
171
|
+
if ($this->looksLikeDataQuery($message)) {
|
|
172
|
+
return 'data';
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if ($this->isGeneralWebsiteQuery($message)) {
|
|
176
|
+
return 'general';
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if ($this->looksLikeNavigationQuery($message) && !$this->looksLikeContentQuery($message)) {
|
|
127
180
|
return 'navigation';
|
|
128
181
|
}
|
|
129
182
|
|
|
183
|
+
$msgLower = strtolower($message);
|
|
184
|
+
|
|
185
|
+
if (preg_match('/\b(is there|are there|do i have|can i find|show me|find|get|list|what.*contain|information about|info about)\b/i', $message)) {
|
|
186
|
+
return 'content';
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Check for greetings and casual conversation
|
|
190
|
+
if (preg_match('/^(hi|hello|hey|good morning|good afternoon|good evening|how are you|thanks|thank you)\b/i', $msgLower)) {
|
|
191
|
+
return 'general';
|
|
192
|
+
}
|
|
193
|
+
|
|
130
194
|
$prompt =
|
|
131
195
|
"Classify the user's intent as one of: navigation, content, data, general. Return JSON: {\"intent\":\"navigation|content|data|general\"}.\n\n" .
|
|
132
|
-
"- navigation: user wants to find a page or link\n" .
|
|
133
|
-
"- content: user is asking for information that might be on a
|
|
134
|
-
"- data: user wants to query database records\n" .
|
|
135
|
-
"- general: casual conversation\n\n" .
|
|
196
|
+
"- navigation: user wants to find a specific page or link (where, which page, how do i get to)\n" .
|
|
197
|
+
"- content: user is asking for information that might be on a webpage, NOT database data (is there, are there, show me, what does X contain)\n" .
|
|
198
|
+
"- data: user explicitly wants to query database records about their own data (my data, my records, my account)\n" .
|
|
199
|
+
"- general: casual conversation, greetings\n\n" .
|
|
136
200
|
"QUESTION:\n" .
|
|
137
201
|
$message;
|
|
138
202
|
|
|
@@ -155,6 +219,95 @@ class LwaziService
|
|
|
155
219
|
return null;
|
|
156
220
|
}
|
|
157
221
|
|
|
222
|
+
protected function looksLikeDataQuery(string $message): bool
|
|
223
|
+
{
|
|
224
|
+
$msgLower = strtolower($message);
|
|
225
|
+
|
|
226
|
+
$personalPatterns = [
|
|
227
|
+
'/\bmy\b/', '/\bmine\b/', '/\bmy own\b/',
|
|
228
|
+
'/\bmy account\b/', '/\bmy profile\b/', '/\bmy data\b/',
|
|
229
|
+
'/\bmy records\b/', '/\bmy applications\b/', '/\bmy submissions\b/',
|
|
230
|
+
'/\bmy courses?\b/', '/\bmy results?\b/', '/\bmy fees\b/',
|
|
231
|
+
'/\bmy grades?\b/', '/\bmy profile\b/', '/\bmy information\b/',
|
|
232
|
+
];
|
|
233
|
+
|
|
234
|
+
foreach ($personalPatterns as $pattern) {
|
|
235
|
+
if (preg_match($pattern, $msgLower)) {
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return false;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
protected function isGeneralWebsiteQuery(string $message): bool
|
|
244
|
+
{
|
|
245
|
+
$patterns = [
|
|
246
|
+
'/\bwhat.*this (site|website|app|platform)\b/i',
|
|
247
|
+
'/\babout (this|the) (site|website|app|platform)\b/i',
|
|
248
|
+
'/\bwhat.*(do you|does this).*(offer|provide|have)\b/i',
|
|
249
|
+
'/\btell me about (this|the) (site|website|app)\b/i',
|
|
250
|
+
'/\bwhat is (this|it)\b/i',
|
|
251
|
+
'/\bwhat can (i|you) do (here|on this)\b/i',
|
|
252
|
+
'/\bhow (does|does this) (work|site work)\b/i',
|
|
253
|
+
];
|
|
254
|
+
|
|
255
|
+
foreach ($patterns as $pattern) {
|
|
256
|
+
if (preg_match($pattern, $message)) {
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return false;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
protected function getWebsiteSummary(string $message): ?string
|
|
265
|
+
{
|
|
266
|
+
// For now, provide a helpful generic response based on navigation
|
|
267
|
+
$tree = $this->ragService->getNavigationTree();
|
|
268
|
+
if ($tree) {
|
|
269
|
+
$flat = $tree->getFlatIndex();
|
|
270
|
+
$pages = array_keys($flat);
|
|
271
|
+
|
|
272
|
+
if (!empty($pages)) {
|
|
273
|
+
$pageList = array_slice($pages, 0, 8);
|
|
274
|
+
$list = implode(', ', array_map(fn($p) => str_replace('-', ' ', trim($p, '/')), $pageList));
|
|
275
|
+
|
|
276
|
+
return "This appears to be a web application with pages like: {$list}. You can navigate to any of these pages, or ask me to help you find specific content. Use the menu or tell me what you're looking for!";
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return "This is a web application. You can use the navigation menu to find pages, or ask me to help you locate specific content. What would you like to find?";
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
protected function looksLikeContentQuery(string $message): bool
|
|
284
|
+
{
|
|
285
|
+
return (bool) preg_match(
|
|
286
|
+
'/\b(is there|are there|can i find|show me|what.*contain|information about|passed|failed|grade|result|score|check if)\b/i',
|
|
287
|
+
$message
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
protected function extractContextFromMessage(string $message): ?string
|
|
292
|
+
{
|
|
293
|
+
$contextParam = config('lwazi.context_param', 'context_id');
|
|
294
|
+
|
|
295
|
+
if (str_contains($contextParam, 'student')) {
|
|
296
|
+
if (preg_match('/\b(STU\d+)\b/i', $message, $matches)) {
|
|
297
|
+
return $matches[1];
|
|
298
|
+
}
|
|
299
|
+
if (preg_match('/\b(\d{6,})\b/', $message, $matches)) {
|
|
300
|
+
return $matches[1];
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (preg_match('/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i', $message, $matches)) {
|
|
305
|
+
return strtolower($matches[0]);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return null;
|
|
309
|
+
}
|
|
310
|
+
|
|
158
311
|
public function looksLikeNavigationQuery(string $message): bool
|
|
159
312
|
{
|
|
160
313
|
return (bool) preg_match(
|
|
@@ -276,54 +429,157 @@ class LwaziService
|
|
|
276
429
|
$response = "I found some relevant information:\n\n";
|
|
277
430
|
|
|
278
431
|
foreach ($results as $result) {
|
|
279
|
-
$
|
|
432
|
+
$url = $result['url'] ?? '/';
|
|
433
|
+
// Try to match with navigation for better URL
|
|
434
|
+
$navMatch = $this->getMatchingNavPath($url);
|
|
435
|
+
if ($navMatch) {
|
|
436
|
+
$url = $navMatch;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
$title = $result['title'] ?? basename($url);
|
|
440
|
+
$response .= "**{$title}**\n";
|
|
280
441
|
$response .= "{$result['snippet']}\n";
|
|
281
|
-
$response .= "[
|
|
442
|
+
$response .= "[View page]({$url})\n\n";
|
|
282
443
|
}
|
|
283
444
|
|
|
284
445
|
return $response;
|
|
285
446
|
}
|
|
286
447
|
|
|
448
|
+
protected function getMatchingNavPath(string $contentUrl): ?string
|
|
449
|
+
{
|
|
450
|
+
$tree = $this->ragService->getNavigationTree();
|
|
451
|
+
if (!$tree) {
|
|
452
|
+
return null;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
$flat = $tree->getFlatIndex();
|
|
456
|
+
|
|
457
|
+
// Try exact match first
|
|
458
|
+
if (isset($flat[$contentUrl])) {
|
|
459
|
+
return $contentUrl;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Try fuzzy match
|
|
463
|
+
$contentSlug = basename($contentUrl);
|
|
464
|
+
foreach ($flat as $path => $entry) {
|
|
465
|
+
$pathSlug = basename($path);
|
|
466
|
+
if (levenshtein(strtolower($contentSlug), strtolower($pathSlug)) <= 3) {
|
|
467
|
+
return $path;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
return null;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
protected function getNavigationSuggestions(string $message): ?string
|
|
475
|
+
{
|
|
476
|
+
$tree = $this->ragService->getNavigationTree();
|
|
477
|
+
if (!$tree) {
|
|
478
|
+
return null;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
$flat = $tree->getFlatIndex();
|
|
482
|
+
if (empty($flat)) {
|
|
483
|
+
return null;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Get a few random pages to suggest
|
|
487
|
+
$pages = array_keys($flat);
|
|
488
|
+
$suggestions = array_slice($pages, 0, 5);
|
|
489
|
+
|
|
490
|
+
if (empty($suggestions)) {
|
|
491
|
+
return null;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
$list = [];
|
|
495
|
+
foreach ($suggestions as $path) {
|
|
496
|
+
$label = $flat[$path]['label'] ?? basename($path);
|
|
497
|
+
$list[] = "- {$label}: {$path}";
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
return "Here are some pages on this site:\n" . implode("\n", $list) . "\n\nYou can navigate to any of these, or ask me to help you find something specific.";
|
|
501
|
+
}
|
|
502
|
+
|
|
287
503
|
protected function extractSearchTerms(string $message): array
|
|
288
504
|
{
|
|
289
|
-
|
|
290
|
-
$topic = $matches[2] ?? '';
|
|
505
|
+
$msgLower = strtolower($message);
|
|
291
506
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
507
|
+
// Extract topic from common question patterns
|
|
508
|
+
$patterns = [
|
|
509
|
+
'/\babout\s+(\w+)/i',
|
|
510
|
+
'/\b(?:is there|are there|can i find)\s+(?:any\s+)?(\w+)/i',
|
|
511
|
+
'/\binfo(?:rmation)?\s+about\s+(\w+)/i',
|
|
512
|
+
'/\btell me (?:about|the)\s+(\w+)/i',
|
|
513
|
+
];
|
|
514
|
+
|
|
515
|
+
$topics = [];
|
|
516
|
+
foreach ($patterns as $pattern) {
|
|
517
|
+
if (preg_match($pattern, $msgLower, $matches)) {
|
|
518
|
+
if (strlen($matches[1]) >= 3) {
|
|
519
|
+
$topics[] = $matches[1];
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (empty($topics)) {
|
|
525
|
+
// Look for known keywords in the message
|
|
526
|
+
$keywords = ['alumni', 'event', 'events', 'news', 'scholarship', 'scholarships', 'certificate', 'certificates', 'profile', 'profiles', 'course', 'courses', 'result', 'results', 'fee', 'fees', 'benefit', 'benefits', 'story', 'stories'];
|
|
527
|
+
foreach ($keywords as $kw) {
|
|
528
|
+
if (str_contains($msgLower, $kw)) {
|
|
529
|
+
$topics[] = $kw;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
if (empty($topics)) {
|
|
535
|
+
// Fall back to extracting significant words
|
|
536
|
+
$words = preg_split('/\s+/', $msgLower);
|
|
537
|
+
$stopWords = ['where', 'can', 'find', 'get', 'look', 'show', 'list', 'tell', 'know', 'want', 'need', 'help', 'give', 'me', 'i', 'is', 'are', 'was', 'were', 'has', 'have', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'what', 'how', 'why', 'there', 'here', 'this', 'that', 'with', 'from', 'your', 'you', 'for', 'about', 'any'];
|
|
295
538
|
foreach ($words as $w) {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
539
|
+
$clean = preg_replace('/[^a-z]/', '', $w);
|
|
540
|
+
if (strlen($clean) >= 4 && !in_array($clean, $stopWords)) {
|
|
541
|
+
$topics[] = $clean;
|
|
542
|
+
if (count($topics) >= 2) break;
|
|
299
543
|
}
|
|
300
544
|
}
|
|
301
545
|
}
|
|
302
546
|
|
|
303
|
-
if (empty($
|
|
547
|
+
if (empty($topics)) {
|
|
304
548
|
return [];
|
|
305
549
|
}
|
|
306
|
-
|
|
307
|
-
$prompt = "List 5 synonyms for: {$topic}. Return ONLY JSON array like: [\"word1\",\"word2\"]";
|
|
308
|
-
|
|
309
|
-
$response = $this->callOllama([
|
|
310
|
-
['role' => 'system', 'content' => 'Return only valid JSON.'],
|
|
311
|
-
['role' => 'user', 'content' => $prompt],
|
|
312
|
-
]);
|
|
313
|
-
|
|
314
|
-
$json = $this->extractJson($response['content'] ?? '');
|
|
315
|
-
$terms = is_array($json) ? array_values($json) : [];
|
|
316
550
|
|
|
317
|
-
$
|
|
318
|
-
|
|
319
|
-
|
|
551
|
+
$topic = $topics[0];
|
|
552
|
+
|
|
553
|
+
// Build search terms including the original topic
|
|
554
|
+
$terms = array_filter([strtolower($topic)], fn($t) => strlen($t) >= 3);
|
|
555
|
+
|
|
556
|
+
// Add common variations
|
|
557
|
+
$terms[] = strtolower($topic);
|
|
558
|
+
if (str_ends_with($topic, 's')) {
|
|
559
|
+
$terms[] = rtrim($topic, 's');
|
|
560
|
+
} else {
|
|
561
|
+
$terms[] = $topic . 's';
|
|
320
562
|
}
|
|
321
|
-
|
|
322
|
-
return $terms;
|
|
563
|
+
|
|
564
|
+
return array_values(array_unique($terms));
|
|
323
565
|
}
|
|
324
566
|
|
|
325
567
|
protected function findBestRouteWithTerms(array $routes, array $terms): ?string
|
|
326
568
|
{
|
|
569
|
+
$filtered = array_filter($routes, function($r) {
|
|
570
|
+
$uri = $r['uri'] ?? '';
|
|
571
|
+
if (str_starts_with($uri, '/admin') || str_starts_with($uri, '/api') || str_starts_with($uri, '/_')) {
|
|
572
|
+
return false;
|
|
573
|
+
}
|
|
574
|
+
return true;
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
$routes = array_values($filtered);
|
|
578
|
+
|
|
579
|
+
if (empty($routes)) {
|
|
580
|
+
return null;
|
|
581
|
+
}
|
|
582
|
+
|
|
327
583
|
$scored = [];
|
|
328
584
|
$termSet = array_filter(array_map('strtolower', array_map('trim', $terms)), fn($t) => strlen($t) >= 3);
|
|
329
585
|
|
|
@@ -593,6 +849,12 @@ class LwaziService
|
|
|
593
849
|
|
|
594
850
|
protected function fetchRelevantData(string $message): ?string
|
|
595
851
|
{
|
|
852
|
+
if ($this->isPersonalQuery($message) && empty($this->currentContextId)) {
|
|
853
|
+
$context = $this->ragService->getContextConfig();
|
|
854
|
+
$contextParam = $context['param'] ?? config('lwazi.context_param', 'context_id');
|
|
855
|
+
return "I can look that up for you, but I need your {$contextParam} to access your personal information.";
|
|
856
|
+
}
|
|
857
|
+
|
|
596
858
|
$fieldInfo = $this->selectFieldWithLLM($message) ?: $this->ragService->findFieldForQuery($message);
|
|
597
859
|
|
|
598
860
|
if ($fieldInfo) {
|
|
@@ -126,6 +126,10 @@ class NavigationTree
|
|
|
126
126
|
$heading = $page['heading'] ?? '';
|
|
127
127
|
$links = $page['links'] ?? [];
|
|
128
128
|
|
|
129
|
+
if (preg_match('#/admin/#', $file) || preg_match('#/admin\.blade\.php$#', $file)) {
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
129
133
|
$labels = array_filter([$title, $heading]);
|
|
130
134
|
$primaryLabel = reset($labels) ?: basename($file, '.blade.php');
|
|
131
135
|
|
|
@@ -137,6 +141,10 @@ class NavigationTree
|
|
|
137
141
|
continue;
|
|
138
142
|
}
|
|
139
143
|
|
|
144
|
+
if (str_starts_with($href, '/admin') || str_starts_with($href, '/api') || str_starts_with($href, '/_')) {
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
|
|
140
148
|
$this->addPath($href, $text, $primaryLabel);
|
|
141
149
|
}
|
|
142
150
|
}
|
|
@@ -221,6 +229,11 @@ class NavigationTree
|
|
|
221
229
|
{
|
|
222
230
|
$queryNormalized = $this->normalizeText($query);
|
|
223
231
|
$queryTokens = $this->tokenize($query);
|
|
232
|
+
|
|
233
|
+
if (count($queryTokens) === 0) {
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
|
|
224
237
|
$expandedTokens = $this->expandWithSynonyms($queryTokens);
|
|
225
238
|
|
|
226
239
|
$candidates = [];
|
|
@@ -238,7 +251,11 @@ class NavigationTree
|
|
|
238
251
|
|
|
239
252
|
usort($candidates, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
240
253
|
|
|
241
|
-
|
|
254
|
+
if (empty($candidates) || $candidates[0]['score'] < 15) {
|
|
255
|
+
return null;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return $candidates[0];
|
|
242
259
|
}
|
|
243
260
|
|
|
244
261
|
public function searchTree(string $query): ?array
|
|
@@ -326,16 +343,19 @@ class NavigationTree
|
|
|
326
343
|
$labelNormalized = $entry['normalizedLabel'] ?? $this->normalizeText($entry['label'] ?? '');
|
|
327
344
|
|
|
328
345
|
foreach ($queryTokens as $token) {
|
|
346
|
+
if (strlen($token) < 3) continue;
|
|
347
|
+
|
|
329
348
|
if ($token === $labelNormalized) {
|
|
330
349
|
$score += 20;
|
|
331
|
-
} elseif (str_contains($labelNormalized, $token)) {
|
|
350
|
+
} elseif (strlen($token) >= 4 && str_contains($labelNormalized, $token)) {
|
|
332
351
|
$score += 10;
|
|
333
|
-
} elseif ($this->fuzzyMatch($labelNormalized, $token)) {
|
|
352
|
+
} elseif (strlen($token) >= 4 && $this->fuzzyMatch($labelNormalized, $token)) {
|
|
334
353
|
$score += 15;
|
|
335
354
|
}
|
|
336
355
|
}
|
|
337
356
|
|
|
338
357
|
foreach ($expandedTokens as $token) {
|
|
358
|
+
if (strlen($token) < 4) continue;
|
|
339
359
|
if (str_contains($labelNormalized, $token)) {
|
|
340
360
|
$score += 5;
|
|
341
361
|
}
|
|
@@ -369,13 +389,27 @@ class NavigationTree
|
|
|
369
389
|
return true;
|
|
370
390
|
}
|
|
371
391
|
|
|
392
|
+
$commonPrefix = 0;
|
|
393
|
+
$minLen = min(strlen($a), strlen($b));
|
|
394
|
+
for ($i = 0; $i < $minLen; $i++) {
|
|
395
|
+
if ($a[$i] === $b[$i]) {
|
|
396
|
+
$commonPrefix++;
|
|
397
|
+
} else {
|
|
398
|
+
break;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
if ($commonPrefix < 3) {
|
|
403
|
+
return false;
|
|
404
|
+
}
|
|
405
|
+
|
|
372
406
|
$lev = levenshtein($a, $b);
|
|
373
407
|
$maxLen = max(strlen($a), strlen($b));
|
|
374
408
|
if ($maxLen === 0) {
|
|
375
409
|
return false;
|
|
376
410
|
}
|
|
377
411
|
|
|
378
|
-
return $lev / $maxLen < 0.
|
|
412
|
+
return $lev / $maxLen < 0.2;
|
|
379
413
|
}
|
|
380
414
|
|
|
381
415
|
protected function normalizeText(string $text): string
|
|
@@ -506,6 +540,10 @@ class NavigationTree
|
|
|
506
540
|
$queryStems = $tokenizer->tokenizeAndStem($query);
|
|
507
541
|
$queryStems = $tokenizer->removeStopWords($queryStems);
|
|
508
542
|
|
|
543
|
+
if (count($queryStems) === 0) {
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
|
|
509
547
|
$candidates = [];
|
|
510
548
|
|
|
511
549
|
foreach ($this->stemmedIndex as $path => $stems) {
|
|
@@ -525,7 +563,7 @@ class NavigationTree
|
|
|
525
563
|
}
|
|
526
564
|
}
|
|
527
565
|
|
|
528
|
-
if (empty($candidates)) {
|
|
566
|
+
if (empty($candidates) || $candidates[0]['score'] < 2) {
|
|
529
567
|
return null;
|
|
530
568
|
}
|
|
531
569
|
|