lwazi 1.2.6 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -31,7 +31,16 @@ class GraphVisualizer
|
|
|
31
31
|
protected function normalizeRoot(string $url): string
|
|
32
32
|
{
|
|
33
33
|
$parts = parse_url($url);
|
|
34
|
-
|
|
34
|
+
$scheme = $parts['scheme'] ?? 'http';
|
|
35
|
+
$host = $parts['host'] ?? $url;
|
|
36
|
+
$port = $parts['port'] ?? parse_url($url, PHP_URL_PORT);
|
|
37
|
+
|
|
38
|
+
$result = $scheme . '://' . $host;
|
|
39
|
+
if ($port && (($scheme === 'http' && $port !== 80) || ($scheme === 'https' && $port !== 443))) {
|
|
40
|
+
$result .= ':' . $port;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return $result;
|
|
35
44
|
}
|
|
36
45
|
|
|
37
46
|
protected function buildTreeAscii(
|
|
@@ -11,17 +11,16 @@ class LwaziAgent
|
|
|
11
11
|
protected string $model;
|
|
12
12
|
protected static ?array $manifestCache = null;
|
|
13
13
|
protected static ?int $manifestMtime = null;
|
|
14
|
-
protected array $synonyms;
|
|
15
14
|
|
|
16
15
|
public function __construct()
|
|
17
16
|
{
|
|
18
17
|
$this->ollamaUrl = config('lwazi.ollama_url', 'http://localhost:11434');
|
|
19
18
|
$this->model = config('lwazi.model', 'llama3.2:1b');
|
|
20
|
-
$this->loadManifest();
|
|
19
|
+
$this->loadManifest();
|
|
21
20
|
}
|
|
22
21
|
|
|
23
22
|
/**
|
|
24
|
-
* Load manifest
|
|
23
|
+
* Load manifest
|
|
25
24
|
*/
|
|
26
25
|
protected function loadManifest(): array
|
|
27
26
|
{
|
|
@@ -39,37 +38,9 @@ class LwaziAgent
|
|
|
39
38
|
self::$manifestCache = $data;
|
|
40
39
|
self::$manifestMtime = $mtime;
|
|
41
40
|
|
|
42
|
-
// Build dynamic synonyms from manifest
|
|
43
|
-
$this->synonyms = $this->buildSynonymsFromManifest($data);
|
|
44
|
-
|
|
45
41
|
return $data;
|
|
46
42
|
}
|
|
47
43
|
|
|
48
|
-
/**
|
|
49
|
-
* Generate synonyms from manifest titles and headings
|
|
50
|
-
*/
|
|
51
|
-
protected function buildSynonymsFromManifest(array $manifest): array
|
|
52
|
-
{
|
|
53
|
-
$synonyms = [];
|
|
54
|
-
|
|
55
|
-
foreach ($manifest['flat'] ?? [] as $url => $entry) {
|
|
56
|
-
$label = strtolower($entry['label'] ?? '');
|
|
57
|
-
$segments = array_map('strtolower', $entry['segments'] ?? []);
|
|
58
|
-
|
|
59
|
-
if ($label !== '') {
|
|
60
|
-
$synonyms[$label] = $segments;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
foreach ($segments as $seg) {
|
|
64
|
-
if ($seg && !isset($synonyms[$seg])) {
|
|
65
|
-
$synonyms[$seg] = [$label];
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
return $synonyms;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
44
|
public function isReady(): bool
|
|
74
45
|
{
|
|
75
46
|
return !empty($this->loadManifest()) && $this->isOllamaAvailable();
|
|
@@ -98,11 +69,13 @@ class LwaziAgent
|
|
|
98
69
|
}
|
|
99
70
|
|
|
100
71
|
$intent = $this->classifyIntent($message, $manifest);
|
|
72
|
+
$rootUrl = $manifest['root_url'] ?? config('app.url', 'http://localhost');
|
|
101
73
|
|
|
102
74
|
if ($intent === 'navigation') {
|
|
103
75
|
$link = $this->pickNavigationLinkWithFeedback($message, $manifest);
|
|
104
76
|
if ($link) {
|
|
105
|
-
|
|
77
|
+
$fullUrl = $this->buildFullUrl($link, $rootUrl);
|
|
78
|
+
return $this->sanitize("You can find that here: {$fullUrl}");
|
|
106
79
|
}
|
|
107
80
|
return $this->generateNavigationHelp($message, $manifest);
|
|
108
81
|
}
|
|
@@ -114,6 +87,18 @@ class LwaziAgent
|
|
|
114
87
|
return $this->sanitize($this->chat($message));
|
|
115
88
|
}
|
|
116
89
|
|
|
90
|
+
protected function buildFullUrl(string $path, string $rootUrl): string
|
|
91
|
+
{
|
|
92
|
+
if (str_starts_with($path, 'http://') || str_starts_with($path, 'https://')) {
|
|
93
|
+
return $path;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
$root = rtrim($rootUrl, '/');
|
|
97
|
+
$path = ltrim($path, '/');
|
|
98
|
+
|
|
99
|
+
return "{$root}/{$path}";
|
|
100
|
+
}
|
|
101
|
+
|
|
117
102
|
protected function classifyIntent(string $message, array $manifest): string
|
|
118
103
|
{
|
|
119
104
|
$prompt =
|
|
@@ -153,10 +138,12 @@ class LwaziAgent
|
|
|
153
138
|
);
|
|
154
139
|
}
|
|
155
140
|
|
|
141
|
+
$keywords = $semanticUnderstanding['keywords'] ?? [];
|
|
142
|
+
$this->recordSynonymUsage($keywords);
|
|
143
|
+
|
|
156
144
|
$navigationTree = [
|
|
157
145
|
'tree' => $manifest['adjacency'] ?? [],
|
|
158
146
|
'flat' => $manifest['flat'] ?? [],
|
|
159
|
-
'synonyms' => $this->synonyms,
|
|
160
147
|
];
|
|
161
148
|
|
|
162
149
|
$treeResult = $this->searchNavigationTree($message, $semanticUnderstanding, $navigationTree);
|
|
@@ -167,28 +154,71 @@ class LwaziAgent
|
|
|
167
154
|
return $this->searchByContent($message, $semanticUnderstanding, $manifest) ?? null;
|
|
168
155
|
}
|
|
169
156
|
|
|
157
|
+
protected function recordSynonymUsage(array $keywords): void
|
|
158
|
+
{
|
|
159
|
+
$cachePath = storage_path('lwazi/synonym_cache.json');
|
|
160
|
+
$usage = [];
|
|
161
|
+
|
|
162
|
+
if (file_exists($cachePath)) {
|
|
163
|
+
$usage = json_decode(file_get_contents($cachePath), true) ?? [];
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
foreach ($keywords as $keyword) {
|
|
167
|
+
$key = strtolower(trim($keyword));
|
|
168
|
+
if ($key) {
|
|
169
|
+
$usage[$key] = ($usage[$key] ?? 0) + 1;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (!is_dir(dirname($cachePath))) {
|
|
174
|
+
mkdir(dirname($cachePath), 0755, true);
|
|
175
|
+
}
|
|
176
|
+
file_put_contents($cachePath, json_encode($usage, JSON_PRETTY_PRINT));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
protected function getTopSynonyms(int $limit = 20): array
|
|
180
|
+
{
|
|
181
|
+
$cachePath = storage_path('lwazi/synonym_cache.json');
|
|
182
|
+
if (!file_exists($cachePath)) {
|
|
183
|
+
return [];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
$usage = json_decode(file_get_contents($cachePath), true) ?? [];
|
|
187
|
+
arsort($usage);
|
|
188
|
+
return array_slice($usage, 0, $limit, true);
|
|
189
|
+
}
|
|
190
|
+
|
|
170
191
|
protected function understandUserIntent(string $message, array $manifest): ?array
|
|
171
192
|
{
|
|
172
|
-
$
|
|
173
|
-
$
|
|
193
|
+
$flatIndex = $manifest['flat'] ?? [];
|
|
194
|
+
$routes = array_keys($flatIndex);
|
|
195
|
+
|
|
196
|
+
$topSynonyms = $this->getTopSynonyms(10);
|
|
197
|
+
$popularTerms = !empty($topSynonyms) ? 'Popular terms used: ' . implode(', ', array_keys($topSynonyms)) . '. ' : '';
|
|
174
198
|
|
|
175
|
-
$prompt =
|
|
199
|
+
$prompt = $popularTerms . "The user wants to find something on this website. Generate all possible synonyms and variations of what they might be searching for.
|
|
176
200
|
|
|
177
201
|
QUESTION: {$message}
|
|
178
|
-
AVAILABLE
|
|
179
|
-
Return JSON
|
|
202
|
+
AVAILABLE ROUTES: " . json_encode($routes) . "
|
|
203
|
+
Return JSON with:
|
|
204
|
+
- \"intent\": what the user is looking for (short description)
|
|
205
|
+
- \"keywords\": array of ALL possible synonyms, variations, related terms (be generous - include plurals, common misspellings, related concepts)
|
|
206
|
+
- \"confidence\": how confident you are (0-1)
|
|
207
|
+
|
|
208
|
+
Return: {\"intent\": \"description\", \"keywords\": [\"term1\", \"term2\", ...], \"confidence\": 0.9}";
|
|
180
209
|
|
|
181
210
|
$response = $this->callOllama([
|
|
182
|
-
['role' => 'system', 'content' => 'You are a website navigation assistant. Return valid JSON.'],
|
|
211
|
+
['role' => 'system', 'content' => 'You are a website navigation assistant. Generate comprehensive synonyms for searching. Return valid JSON only.'],
|
|
183
212
|
['role' => 'user', 'content' => $prompt],
|
|
184
213
|
]);
|
|
185
214
|
|
|
186
215
|
$understanding = $this->extractJson($response['content'] ?? '');
|
|
216
|
+
|
|
187
217
|
if (!$understanding || empty($understanding['keywords'])) {
|
|
188
|
-
$simplePrompt = "
|
|
218
|
+
$simplePrompt = "User wants: {$message}. Return JSON: {\"intent\": \"what they want\", \"keywords\": [\"term1\", \"term2\"]}";
|
|
189
219
|
$retry = $this->callOllama([
|
|
190
220
|
['role' => 'system', 'content' => 'Return only valid JSON.'],
|
|
191
|
-
['role' => 'user', 'content' => $simplePrompt
|
|
221
|
+
['role' => 'user', 'content' => $simplePrompt],
|
|
192
222
|
]);
|
|
193
223
|
$understanding = $this->extractJson($retry['content'] ?? '');
|
|
194
224
|
}
|
|
@@ -202,42 +232,28 @@ Return JSON: {\"intent\": \"description\", \"keywords\": [\"key\", \"terms\"]}";
|
|
|
202
232
|
$keywords = $understanding['keywords'] ?? [];
|
|
203
233
|
$query = $understanding['intent'] ?? $message;
|
|
204
234
|
|
|
205
|
-
$expandedKeywords = $this->expandKeywordsWithSynonyms($keywords, $navigationTree);
|
|
206
|
-
|
|
207
235
|
foreach ($flatIndex as $path => $entry) {
|
|
208
236
|
$label = strtolower($entry['label'] ?? '');
|
|
209
237
|
$segments = $entry['segments'] ?? [];
|
|
210
238
|
|
|
211
|
-
foreach ($
|
|
212
|
-
|
|
239
|
+
foreach ($keywords as $keyword) {
|
|
240
|
+
$kw = strtolower(trim($keyword));
|
|
241
|
+
if (str_contains($label, $kw)) return $entry;
|
|
213
242
|
foreach ($segments as $seg) {
|
|
214
|
-
if (str_contains(strtolower($seg),
|
|
243
|
+
if (str_contains(strtolower($seg), $kw)) return $entry;
|
|
215
244
|
}
|
|
216
245
|
}
|
|
217
246
|
|
|
218
247
|
if ($this->semanticMatch($query, $label)) return $entry;
|
|
219
248
|
}
|
|
220
249
|
|
|
221
|
-
return $this->traverseTreeForMatch($navigationTree['tree'] ?? [], $query, $
|
|
250
|
+
return $this->traverseTreeForMatch($navigationTree['tree'] ?? [], $query, $keywords)
|
|
222
251
|
?? $this->broadSearchFallback($message, $understanding, $navigationTree);
|
|
223
252
|
}
|
|
224
253
|
|
|
225
254
|
protected function expandKeywordsWithSynonyms(array $keywords, array $navigationTree = []): array
|
|
226
255
|
{
|
|
227
|
-
|
|
228
|
-
$treeSynonyms = $navigationTree['synonyms'] ?? [];
|
|
229
|
-
$allSynonyms = array_merge($this->synonyms, $treeSynonyms);
|
|
230
|
-
|
|
231
|
-
foreach ($keywords as $keyword) {
|
|
232
|
-
$kwLower = strtolower($keyword);
|
|
233
|
-
foreach ($allSynonyms as $group => $syns) {
|
|
234
|
-
if ($kwLower === strtolower($group) || in_array($kwLower, array_map('strtolower', $syns))) {
|
|
235
|
-
$expanded = array_merge($expanded, $syns, [$group]);
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
return array_unique($expanded);
|
|
256
|
+
return $keywords;
|
|
241
257
|
}
|
|
242
258
|
|
|
243
259
|
protected function semanticMatch(string $query, string $label): bool
|
|
@@ -248,27 +264,18 @@ Return JSON: {\"intent\": \"description\", \"keywords\": [\"key\", \"terms\"]}";
|
|
|
248
264
|
$matches = 0;
|
|
249
265
|
foreach ($queryWords as $qWord) {
|
|
250
266
|
foreach ($labelWords as $lWord) {
|
|
251
|
-
if ($qWord === $lWord
|
|
267
|
+
if ($qWord === $lWord) {
|
|
252
268
|
$matches++;
|
|
253
269
|
break;
|
|
254
270
|
}
|
|
255
271
|
}
|
|
256
272
|
}
|
|
257
273
|
|
|
258
|
-
return $matches > 0 && ($matches / count($queryWords)) > 0.3;
|
|
274
|
+
return $matches > 0 && ($matches / max(count($queryWords), 1)) > 0.3;
|
|
259
275
|
}
|
|
260
276
|
|
|
261
277
|
protected function isSynonym(string $word1, string $word2): bool
|
|
262
278
|
{
|
|
263
|
-
$word1 = strtolower($word1);
|
|
264
|
-
$word2 = strtolower($word2);
|
|
265
|
-
|
|
266
|
-
foreach ($this->synonyms as $group => $words) {
|
|
267
|
-
if (in_array($word1, $words) && in_array($word2, $words)) return true;
|
|
268
|
-
if ($word1 === $group && in_array($word2, $words)) return true;
|
|
269
|
-
if ($word2 === $group && in_array($word1, $words)) return true;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
279
|
return false;
|
|
273
280
|
}
|
|
274
281
|
|
|
@@ -7,12 +7,12 @@ class NavigationTree
|
|
|
7
7
|
protected array $tree = [];
|
|
8
8
|
protected array $synonyms = [];
|
|
9
9
|
protected array $flatIndex = [];
|
|
10
|
+
protected array $synonymUsage = [];
|
|
10
11
|
protected ?TokenizerService $tokenizer = null;
|
|
11
12
|
protected array $stemmedIndex = [];
|
|
12
13
|
|
|
13
14
|
public function __construct()
|
|
14
15
|
{
|
|
15
|
-
$this->loadDefaultSynonyms();
|
|
16
16
|
}
|
|
17
17
|
|
|
18
18
|
protected function getTokenizer(): TokenizerService
|
|
@@ -446,6 +446,43 @@ class NavigationTree
|
|
|
446
446
|
return $this->flatIndex;
|
|
447
447
|
}
|
|
448
448
|
|
|
449
|
+
public function recordSynonymUsage(array $keywords): void
|
|
450
|
+
{
|
|
451
|
+
foreach ($keywords as $keyword) {
|
|
452
|
+
$key = strtolower(trim($keyword));
|
|
453
|
+
if (!isset($this->synonymUsage[$key])) {
|
|
454
|
+
$this->synonymUsage[$key] = 0;
|
|
455
|
+
}
|
|
456
|
+
$this->synonymUsage[$key]++;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
public function getTopSynonyms(int $limit = 20): array
|
|
461
|
+
{
|
|
462
|
+
arsort($this->synonymUsage);
|
|
463
|
+
return array_slice($this->synonymUsage, 0, $limit, true);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
public function saveSynonymCache(string $path): void
|
|
467
|
+
{
|
|
468
|
+
$data = [
|
|
469
|
+
'synonym_usage' => $this->synonymUsage,
|
|
470
|
+
'saved_at' => date('c'),
|
|
471
|
+
];
|
|
472
|
+
if (!is_dir(dirname($path))) {
|
|
473
|
+
mkdir(dirname($path), 0755, true);
|
|
474
|
+
}
|
|
475
|
+
file_put_contents($path, json_encode($data, JSON_PRETTY_PRINT));
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
public function loadSynonymCache(string $path): void
|
|
479
|
+
{
|
|
480
|
+
if (file_exists($path)) {
|
|
481
|
+
$data = json_decode(file_get_contents($path), true);
|
|
482
|
+
$this->synonymUsage = $data['synonym_usage'] ?? [];
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
449
486
|
public function buildStemmedIndex(): void
|
|
450
487
|
{
|
|
451
488
|
$tokenizer = $this->getTokenizer();
|