lwazi 1.2.6 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/lwazi.js +5 -0
- package/bin/update.js +73 -0
- package/package.json +1 -1
- package/src/Console/AnalyzeProjectCommand.php +78 -0
- package/src/Console/SetupCommand.php +79 -0
- package/src/Services/GraphVisualizer.php +10 -1
- package/src/Services/LwaziAgent.php +79 -72
- package/src/Services/NavigationTree.php +38 -1
package/bin/lwazi.js
CHANGED
|
@@ -10,6 +10,10 @@ switch (command) {
|
|
|
10
10
|
require("./install");
|
|
11
11
|
break;
|
|
12
12
|
|
|
13
|
+
case "update":
|
|
14
|
+
require("./update");
|
|
15
|
+
break;
|
|
16
|
+
|
|
13
17
|
case "uninstall":
|
|
14
18
|
require("./uninstall");
|
|
15
19
|
break;
|
|
@@ -22,6 +26,7 @@ Commands:
|
|
|
22
26
|
|
|
23
27
|
lwazi install Install Lwazi into this Laravel project
|
|
24
28
|
lwazi install --url <url> Install and crawl website for navigation
|
|
29
|
+
lwazi update Update Lwazi to latest version
|
|
25
30
|
lwazi uninstall Remove Lwazi from this project
|
|
26
31
|
`);
|
|
27
32
|
}
|
package/bin/update.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const fs = require("fs");
|
|
4
|
+
const path = require("path");
|
|
5
|
+
const { execSync } = require("child_process");
|
|
6
|
+
|
|
7
|
+
const projectRoot = process.cwd();
|
|
8
|
+
const targetDir = path.join(projectRoot, "lwazi");
|
|
9
|
+
const packageDir = path.resolve(__dirname, "..");
|
|
10
|
+
|
|
11
|
+
console.log("Updating Lwazi...");
|
|
12
|
+
|
|
13
|
+
if (!fs.existsSync(path.join(projectRoot, "artisan"))) {
|
|
14
|
+
console.error("This is not a Laravel project.");
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (!fs.existsSync(targetDir)) {
|
|
19
|
+
console.log("Lwazi not installed. Running install instead...");
|
|
20
|
+
require("./install");
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
console.log("Updating Lwazi files...");
|
|
25
|
+
|
|
26
|
+
const ignore = new Set([
|
|
27
|
+
"node_modules",
|
|
28
|
+
".git",
|
|
29
|
+
"package-lock.json"
|
|
30
|
+
]);
|
|
31
|
+
|
|
32
|
+
function copyDirectory(src, dest) {
|
|
33
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
34
|
+
|
|
35
|
+
for (const item of fs.readdirSync(src)) {
|
|
36
|
+
if (ignore.has(item)) continue;
|
|
37
|
+
|
|
38
|
+
const s = path.join(src, item);
|
|
39
|
+
const d = path.join(dest, item);
|
|
40
|
+
|
|
41
|
+
if (fs.statSync(s).isDirectory()) {
|
|
42
|
+
copyDirectory(s, d);
|
|
43
|
+
} else {
|
|
44
|
+
fs.copyFileSync(s, d);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
copyDirectory(packageDir, targetDir);
|
|
50
|
+
|
|
51
|
+
console.log("Running composer update...");
|
|
52
|
+
try {
|
|
53
|
+
execSync("composer update lwazi/core --no-interaction", {
|
|
54
|
+
stdio: "inherit",
|
|
55
|
+
cwd: projectRoot,
|
|
56
|
+
shell: true,
|
|
57
|
+
});
|
|
58
|
+
} catch (e) {
|
|
59
|
+
console.log("Composer update skipped or failed.");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
console.log("Clearing caches...");
|
|
63
|
+
try {
|
|
64
|
+
execSync("php artisan config:clear && php artisan cache:clear", {
|
|
65
|
+
stdio: "ignore",
|
|
66
|
+
cwd: projectRoot,
|
|
67
|
+
shell: true,
|
|
68
|
+
});
|
|
69
|
+
} catch (e) {
|
|
70
|
+
// Ignore cache errors
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
console.log("\nLwazi updated successfully!");
|
package/package.json
CHANGED
|
@@ -25,6 +25,8 @@ class AnalyzeProjectCommand extends Command
|
|
|
25
25
|
|
|
26
26
|
$crawler = new NavigationCrawler($rootUrl);
|
|
27
27
|
$manifest = $crawler->crawl();
|
|
28
|
+
|
|
29
|
+
$manifest = $this->mergeWithRoutes($manifest, $rootUrl);
|
|
28
30
|
|
|
29
31
|
$storagePath = storage_path('lwazi');
|
|
30
32
|
if (!is_dir($storagePath)) {
|
|
@@ -50,4 +52,80 @@ class AnalyzeProjectCommand extends Command
|
|
|
50
52
|
|
|
51
53
|
return 0;
|
|
52
54
|
}
|
|
55
|
+
|
|
56
|
+
protected function mergeWithRoutes(array $manifest, string $rootUrl): array
|
|
57
|
+
{
|
|
58
|
+
$knowledgePath = storage_path('lwazi/project_knowledge.json');
|
|
59
|
+
if (!file_exists($knowledgePath)) {
|
|
60
|
+
return $manifest;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
$knowledge = json_decode(file_get_contents($knowledgePath), true);
|
|
64
|
+
$routes = $knowledge['routes'] ?? [];
|
|
65
|
+
|
|
66
|
+
if (empty($routes)) {
|
|
67
|
+
return $manifest;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
$root = $this->extractRoot($rootUrl);
|
|
71
|
+
|
|
72
|
+
foreach ($routes as $route) {
|
|
73
|
+
$path = '/' . ltrim($route['uri'], '/');
|
|
74
|
+
|
|
75
|
+
if (in_array($path, ['/storage/{path}', '/sanctum/csrf-cookie'])) {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (isset($manifest['flat'][$root . $path])) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
$label = $this->humanizeRoutePath($path);
|
|
84
|
+
|
|
85
|
+
$manifest['nodes'][$root . $path] = [
|
|
86
|
+
'url' => $root . $path,
|
|
87
|
+
'title' => $label,
|
|
88
|
+
'headings' => [$label],
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
$manifest['flat'][$root . $path] = [
|
|
92
|
+
'label' => $label,
|
|
93
|
+
'segments' => array_filter(explode('/', trim($path, '/'))),
|
|
94
|
+
'_path' => $root . $path,
|
|
95
|
+
'_weight' => 2,
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
$manifest['adjacency'][$root . $path] = [];
|
|
99
|
+
|
|
100
|
+
if (!isset($manifest['adjacency'][$root . '/'])) {
|
|
101
|
+
$manifest['adjacency'][$root . '/'] = [];
|
|
102
|
+
}
|
|
103
|
+
$manifest['adjacency'][$root . '/'][] = $root . $path;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return $manifest;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
protected function extractRoot(string $url): string
|
|
110
|
+
{
|
|
111
|
+
$parts = parse_url($url);
|
|
112
|
+
$scheme = $parts['scheme'] ?? 'http';
|
|
113
|
+
$host = $parts['host'] ?? 'localhost';
|
|
114
|
+
$port = $parts['port'] ?? ($scheme === 'https' ? 443 : 80);
|
|
115
|
+
|
|
116
|
+
$root = $scheme . '://' . $host;
|
|
117
|
+
if (($scheme === 'http' && $port !== 80) || ($scheme === 'https' && $port !== 443)) {
|
|
118
|
+
$root .= ':' . $port;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return $root;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
protected function humanizeRoutePath(string $path): string
|
|
125
|
+
{
|
|
126
|
+
$path = preg_replace('/\{[^}]+\}/', '', $path);
|
|
127
|
+
$path = str_replace(['/', '-', '_'], ' ', $path);
|
|
128
|
+
$path = preg_replace('/\s+/', ' ', $path);
|
|
129
|
+
return ucwords(trim($path));
|
|
130
|
+
}
|
|
53
131
|
}
|
|
@@ -111,6 +111,9 @@ class SetupCommand extends Command
|
|
|
111
111
|
try {
|
|
112
112
|
$crawler = new NavigationCrawler($url, true);
|
|
113
113
|
$manifest = $crawler->crawl();
|
|
114
|
+
|
|
115
|
+
$manifest = $this->mergeWithRoutes($manifest, $url);
|
|
116
|
+
|
|
114
117
|
$crawler->saveManifest();
|
|
115
118
|
|
|
116
119
|
$this->info("Crawled " . count($manifest['nodes']) . " pages.");
|
|
@@ -129,4 +132,80 @@ class SetupCommand extends Command
|
|
|
129
132
|
$this->warn('Website crawling failed: ' . $e->getMessage());
|
|
130
133
|
}
|
|
131
134
|
}
|
|
135
|
+
|
|
136
|
+
protected function mergeWithRoutes(array $manifest, string $rootUrl): array
|
|
137
|
+
{
|
|
138
|
+
$knowledgePath = storage_path('lwazi/project_knowledge.json');
|
|
139
|
+
if (!file_exists($knowledgePath)) {
|
|
140
|
+
return $manifest;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
$knowledge = json_decode(file_get_contents($knowledgePath), true);
|
|
144
|
+
$routes = $knowledge['routes'] ?? [];
|
|
145
|
+
|
|
146
|
+
if (empty($routes)) {
|
|
147
|
+
return $manifest;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
$root = $this->extractRoot($rootUrl);
|
|
151
|
+
|
|
152
|
+
foreach ($routes as $route) {
|
|
153
|
+
$path = '/' . ltrim($route['uri'], '/');
|
|
154
|
+
|
|
155
|
+
if (in_array($path, ['/storage/{path}', '/sanctum/csrf-cookie'])) {
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (isset($manifest['flat'][$root . $path])) {
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
$label = $this->humanizeRoutePath($path);
|
|
164
|
+
|
|
165
|
+
$manifest['nodes'][$root . $path] = [
|
|
166
|
+
'url' => $root . $path,
|
|
167
|
+
'title' => $label,
|
|
168
|
+
'headings' => [$label],
|
|
169
|
+
];
|
|
170
|
+
|
|
171
|
+
$manifest['flat'][$root . $path] = [
|
|
172
|
+
'label' => $label,
|
|
173
|
+
'segments' => array_filter(explode('/', trim($path, '/'))),
|
|
174
|
+
'_path' => $root . $path,
|
|
175
|
+
'_weight' => 2,
|
|
176
|
+
];
|
|
177
|
+
|
|
178
|
+
$manifest['adjacency'][$root . $path] = [];
|
|
179
|
+
|
|
180
|
+
if (!isset($manifest['adjacency'][$root . '/'])) {
|
|
181
|
+
$manifest['adjacency'][$root . '/'] = [];
|
|
182
|
+
}
|
|
183
|
+
$manifest['adjacency'][$root . '/'][] = $root . $path;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return $manifest;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
protected function extractRoot(string $url): string
|
|
190
|
+
{
|
|
191
|
+
$parts = parse_url($url);
|
|
192
|
+
$scheme = $parts['scheme'] ?? 'http';
|
|
193
|
+
$host = $parts['host'] ?? 'localhost';
|
|
194
|
+
$port = $parts['port'] ?? ($scheme === 'https' ? 443 : 80);
|
|
195
|
+
|
|
196
|
+
$root = $scheme . '://' . $host;
|
|
197
|
+
if (($scheme === 'http' && $port !== 80) || ($scheme === 'https' && $port !== 443)) {
|
|
198
|
+
$root .= ':' . $port;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return $root;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
protected function humanizeRoutePath(string $path): string
|
|
205
|
+
{
|
|
206
|
+
$path = preg_replace('/\{[^}]+\}/', '', $path);
|
|
207
|
+
$path = str_replace(['/', '-', '_'], ' ', $path);
|
|
208
|
+
$path = preg_replace('/\s+/', ' ', $path);
|
|
209
|
+
return ucwords(trim($path));
|
|
210
|
+
}
|
|
132
211
|
}
|
|
@@ -31,7 +31,16 @@ class GraphVisualizer
|
|
|
31
31
|
protected function normalizeRoot(string $url): string
|
|
32
32
|
{
|
|
33
33
|
$parts = parse_url($url);
|
|
34
|
-
|
|
34
|
+
$scheme = $parts['scheme'] ?? 'http';
|
|
35
|
+
$host = $parts['host'] ?? $url;
|
|
36
|
+
$port = $parts['port'] ?? parse_url($url, PHP_URL_PORT);
|
|
37
|
+
|
|
38
|
+
$result = $scheme . '://' . $host;
|
|
39
|
+
if ($port && (($scheme === 'http' && $port !== 80) || ($scheme === 'https' && $port !== 443))) {
|
|
40
|
+
$result .= ':' . $port;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return $result;
|
|
35
44
|
}
|
|
36
45
|
|
|
37
46
|
protected function buildTreeAscii(
|
|
@@ -11,17 +11,16 @@ class LwaziAgent
|
|
|
11
11
|
protected string $model;
|
|
12
12
|
protected static ?array $manifestCache = null;
|
|
13
13
|
protected static ?int $manifestMtime = null;
|
|
14
|
-
protected array $synonyms;
|
|
15
14
|
|
|
16
15
|
public function __construct()
|
|
17
16
|
{
|
|
18
17
|
$this->ollamaUrl = config('lwazi.ollama_url', 'http://localhost:11434');
|
|
19
18
|
$this->model = config('lwazi.model', 'llama3.2:1b');
|
|
20
|
-
$this->loadManifest();
|
|
19
|
+
$this->loadManifest();
|
|
21
20
|
}
|
|
22
21
|
|
|
23
22
|
/**
|
|
24
|
-
* Load manifest
|
|
23
|
+
* Load manifest
|
|
25
24
|
*/
|
|
26
25
|
protected function loadManifest(): array
|
|
27
26
|
{
|
|
@@ -39,37 +38,9 @@ class LwaziAgent
|
|
|
39
38
|
self::$manifestCache = $data;
|
|
40
39
|
self::$manifestMtime = $mtime;
|
|
41
40
|
|
|
42
|
-
// Build dynamic synonyms from manifest
|
|
43
|
-
$this->synonyms = $this->buildSynonymsFromManifest($data);
|
|
44
|
-
|
|
45
41
|
return $data;
|
|
46
42
|
}
|
|
47
43
|
|
|
48
|
-
/**
|
|
49
|
-
* Generate synonyms from manifest titles and headings
|
|
50
|
-
*/
|
|
51
|
-
protected function buildSynonymsFromManifest(array $manifest): array
|
|
52
|
-
{
|
|
53
|
-
$synonyms = [];
|
|
54
|
-
|
|
55
|
-
foreach ($manifest['flat'] ?? [] as $url => $entry) {
|
|
56
|
-
$label = strtolower($entry['label'] ?? '');
|
|
57
|
-
$segments = array_map('strtolower', $entry['segments'] ?? []);
|
|
58
|
-
|
|
59
|
-
if ($label !== '') {
|
|
60
|
-
$synonyms[$label] = $segments;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
foreach ($segments as $seg) {
|
|
64
|
-
if ($seg && !isset($synonyms[$seg])) {
|
|
65
|
-
$synonyms[$seg] = [$label];
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
return $synonyms;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
44
|
public function isReady(): bool
|
|
74
45
|
{
|
|
75
46
|
return !empty($this->loadManifest()) && $this->isOllamaAvailable();
|
|
@@ -98,11 +69,13 @@ class LwaziAgent
|
|
|
98
69
|
}
|
|
99
70
|
|
|
100
71
|
$intent = $this->classifyIntent($message, $manifest);
|
|
72
|
+
$rootUrl = $manifest['root_url'] ?? config('app.url', 'http://localhost');
|
|
101
73
|
|
|
102
74
|
if ($intent === 'navigation') {
|
|
103
75
|
$link = $this->pickNavigationLinkWithFeedback($message, $manifest);
|
|
104
76
|
if ($link) {
|
|
105
|
-
|
|
77
|
+
$fullUrl = $this->buildFullUrl($link, $rootUrl);
|
|
78
|
+
return $this->sanitize("You can find that here: {$fullUrl}");
|
|
106
79
|
}
|
|
107
80
|
return $this->generateNavigationHelp($message, $manifest);
|
|
108
81
|
}
|
|
@@ -114,6 +87,18 @@ class LwaziAgent
|
|
|
114
87
|
return $this->sanitize($this->chat($message));
|
|
115
88
|
}
|
|
116
89
|
|
|
90
|
+
protected function buildFullUrl(string $path, string $rootUrl): string
|
|
91
|
+
{
|
|
92
|
+
if (str_starts_with($path, 'http://') || str_starts_with($path, 'https://')) {
|
|
93
|
+
return $path;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
$root = rtrim($rootUrl, '/');
|
|
97
|
+
$path = ltrim($path, '/');
|
|
98
|
+
|
|
99
|
+
return "{$root}/{$path}";
|
|
100
|
+
}
|
|
101
|
+
|
|
117
102
|
protected function classifyIntent(string $message, array $manifest): string
|
|
118
103
|
{
|
|
119
104
|
$prompt =
|
|
@@ -153,10 +138,12 @@ class LwaziAgent
|
|
|
153
138
|
);
|
|
154
139
|
}
|
|
155
140
|
|
|
141
|
+
$keywords = $semanticUnderstanding['keywords'] ?? [];
|
|
142
|
+
$this->recordSynonymUsage($keywords);
|
|
143
|
+
|
|
156
144
|
$navigationTree = [
|
|
157
145
|
'tree' => $manifest['adjacency'] ?? [],
|
|
158
146
|
'flat' => $manifest['flat'] ?? [],
|
|
159
|
-
'synonyms' => $this->synonyms,
|
|
160
147
|
];
|
|
161
148
|
|
|
162
149
|
$treeResult = $this->searchNavigationTree($message, $semanticUnderstanding, $navigationTree);
|
|
@@ -167,28 +154,71 @@ class LwaziAgent
|
|
|
167
154
|
return $this->searchByContent($message, $semanticUnderstanding, $manifest) ?? null;
|
|
168
155
|
}
|
|
169
156
|
|
|
157
|
+
protected function recordSynonymUsage(array $keywords): void
|
|
158
|
+
{
|
|
159
|
+
$cachePath = storage_path('lwazi/synonym_cache.json');
|
|
160
|
+
$usage = [];
|
|
161
|
+
|
|
162
|
+
if (file_exists($cachePath)) {
|
|
163
|
+
$usage = json_decode(file_get_contents($cachePath), true) ?? [];
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
foreach ($keywords as $keyword) {
|
|
167
|
+
$key = strtolower(trim($keyword));
|
|
168
|
+
if ($key) {
|
|
169
|
+
$usage[$key] = ($usage[$key] ?? 0) + 1;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (!is_dir(dirname($cachePath))) {
|
|
174
|
+
mkdir(dirname($cachePath), 0755, true);
|
|
175
|
+
}
|
|
176
|
+
file_put_contents($cachePath, json_encode($usage, JSON_PRETTY_PRINT));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
protected function getTopSynonyms(int $limit = 20): array
|
|
180
|
+
{
|
|
181
|
+
$cachePath = storage_path('lwazi/synonym_cache.json');
|
|
182
|
+
if (!file_exists($cachePath)) {
|
|
183
|
+
return [];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
$usage = json_decode(file_get_contents($cachePath), true) ?? [];
|
|
187
|
+
arsort($usage);
|
|
188
|
+
return array_slice($usage, 0, $limit, true);
|
|
189
|
+
}
|
|
190
|
+
|
|
170
191
|
protected function understandUserIntent(string $message, array $manifest): ?array
|
|
171
192
|
{
|
|
172
|
-
$
|
|
173
|
-
$
|
|
193
|
+
$flatIndex = $manifest['flat'] ?? [];
|
|
194
|
+
$routes = array_keys($flatIndex);
|
|
195
|
+
|
|
196
|
+
$topSynonyms = $this->getTopSynonyms(10);
|
|
197
|
+
$popularTerms = !empty($topSynonyms) ? 'Popular terms used: ' . implode(', ', array_keys($topSynonyms)) . '. ' : '';
|
|
174
198
|
|
|
175
|
-
$prompt =
|
|
199
|
+
$prompt = $popularTerms . "The user wants to find something on this website. Generate all possible synonyms and variations of what they might be searching for.
|
|
176
200
|
|
|
177
201
|
QUESTION: {$message}
|
|
178
|
-
AVAILABLE
|
|
179
|
-
Return JSON
|
|
202
|
+
AVAILABLE ROUTES: " . json_encode($routes) . "
|
|
203
|
+
Return JSON with:
|
|
204
|
+
- \"intent\": what the user is looking for (short description)
|
|
205
|
+
- \"keywords\": array of ALL possible synonyms, variations, related terms (be generous - include plurals, common misspellings, related concepts)
|
|
206
|
+
- \"confidence\": how confident you are (0-1)
|
|
207
|
+
|
|
208
|
+
Return: {\"intent\": \"description\", \"keywords\": [\"term1\", \"term2\", ...], \"confidence\": 0.9}";
|
|
180
209
|
|
|
181
210
|
$response = $this->callOllama([
|
|
182
|
-
['role' => 'system', 'content' => 'You are a website navigation assistant. Return valid JSON.'],
|
|
211
|
+
['role' => 'system', 'content' => 'You are a website navigation assistant. Generate comprehensive synonyms for searching. Return valid JSON only.'],
|
|
183
212
|
['role' => 'user', 'content' => $prompt],
|
|
184
213
|
]);
|
|
185
214
|
|
|
186
215
|
$understanding = $this->extractJson($response['content'] ?? '');
|
|
216
|
+
|
|
187
217
|
if (!$understanding || empty($understanding['keywords'])) {
|
|
188
|
-
$simplePrompt = "
|
|
218
|
+
$simplePrompt = "User wants: {$message}. Return JSON: {\"intent\": \"what they want\", \"keywords\": [\"term1\", \"term2\"]}";
|
|
189
219
|
$retry = $this->callOllama([
|
|
190
220
|
['role' => 'system', 'content' => 'Return only valid JSON.'],
|
|
191
|
-
['role' => 'user', 'content' => $simplePrompt
|
|
221
|
+
['role' => 'user', 'content' => $simplePrompt],
|
|
192
222
|
]);
|
|
193
223
|
$understanding = $this->extractJson($retry['content'] ?? '');
|
|
194
224
|
}
|
|
@@ -202,42 +232,28 @@ Return JSON: {\"intent\": \"description\", \"keywords\": [\"key\", \"terms\"]}";
|
|
|
202
232
|
$keywords = $understanding['keywords'] ?? [];
|
|
203
233
|
$query = $understanding['intent'] ?? $message;
|
|
204
234
|
|
|
205
|
-
$expandedKeywords = $this->expandKeywordsWithSynonyms($keywords, $navigationTree);
|
|
206
|
-
|
|
207
235
|
foreach ($flatIndex as $path => $entry) {
|
|
208
236
|
$label = strtolower($entry['label'] ?? '');
|
|
209
237
|
$segments = $entry['segments'] ?? [];
|
|
210
238
|
|
|
211
|
-
foreach ($
|
|
212
|
-
|
|
239
|
+
foreach ($keywords as $keyword) {
|
|
240
|
+
$kw = strtolower(trim($keyword));
|
|
241
|
+
if (str_contains($label, $kw)) return $entry;
|
|
213
242
|
foreach ($segments as $seg) {
|
|
214
|
-
if (str_contains(strtolower($seg),
|
|
243
|
+
if (str_contains(strtolower($seg), $kw)) return $entry;
|
|
215
244
|
}
|
|
216
245
|
}
|
|
217
246
|
|
|
218
247
|
if ($this->semanticMatch($query, $label)) return $entry;
|
|
219
248
|
}
|
|
220
249
|
|
|
221
|
-
return $this->traverseTreeForMatch($navigationTree['tree'] ?? [], $query, $
|
|
250
|
+
return $this->traverseTreeForMatch($navigationTree['tree'] ?? [], $query, $keywords)
|
|
222
251
|
?? $this->broadSearchFallback($message, $understanding, $navigationTree);
|
|
223
252
|
}
|
|
224
253
|
|
|
225
254
|
protected function expandKeywordsWithSynonyms(array $keywords, array $navigationTree = []): array
|
|
226
255
|
{
|
|
227
|
-
|
|
228
|
-
$treeSynonyms = $navigationTree['synonyms'] ?? [];
|
|
229
|
-
$allSynonyms = array_merge($this->synonyms, $treeSynonyms);
|
|
230
|
-
|
|
231
|
-
foreach ($keywords as $keyword) {
|
|
232
|
-
$kwLower = strtolower($keyword);
|
|
233
|
-
foreach ($allSynonyms as $group => $syns) {
|
|
234
|
-
if ($kwLower === strtolower($group) || in_array($kwLower, array_map('strtolower', $syns))) {
|
|
235
|
-
$expanded = array_merge($expanded, $syns, [$group]);
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
return array_unique($expanded);
|
|
256
|
+
return $keywords;
|
|
241
257
|
}
|
|
242
258
|
|
|
243
259
|
protected function semanticMatch(string $query, string $label): bool
|
|
@@ -248,27 +264,18 @@ Return JSON: {\"intent\": \"description\", \"keywords\": [\"key\", \"terms\"]}";
|
|
|
248
264
|
$matches = 0;
|
|
249
265
|
foreach ($queryWords as $qWord) {
|
|
250
266
|
foreach ($labelWords as $lWord) {
|
|
251
|
-
if ($qWord === $lWord
|
|
267
|
+
if ($qWord === $lWord) {
|
|
252
268
|
$matches++;
|
|
253
269
|
break;
|
|
254
270
|
}
|
|
255
271
|
}
|
|
256
272
|
}
|
|
257
273
|
|
|
258
|
-
return $matches > 0 && ($matches / count($queryWords)) > 0.3;
|
|
274
|
+
return $matches > 0 && ($matches / max(count($queryWords), 1)) > 0.3;
|
|
259
275
|
}
|
|
260
276
|
|
|
261
277
|
protected function isSynonym(string $word1, string $word2): bool
|
|
262
278
|
{
|
|
263
|
-
$word1 = strtolower($word1);
|
|
264
|
-
$word2 = strtolower($word2);
|
|
265
|
-
|
|
266
|
-
foreach ($this->synonyms as $group => $words) {
|
|
267
|
-
if (in_array($word1, $words) && in_array($word2, $words)) return true;
|
|
268
|
-
if ($word1 === $group && in_array($word2, $words)) return true;
|
|
269
|
-
if ($word2 === $group && in_array($word1, $words)) return true;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
279
|
return false;
|
|
273
280
|
}
|
|
274
281
|
|
|
@@ -7,12 +7,12 @@ class NavigationTree
|
|
|
7
7
|
protected array $tree = [];
|
|
8
8
|
protected array $synonyms = [];
|
|
9
9
|
protected array $flatIndex = [];
|
|
10
|
+
protected array $synonymUsage = [];
|
|
10
11
|
protected ?TokenizerService $tokenizer = null;
|
|
11
12
|
protected array $stemmedIndex = [];
|
|
12
13
|
|
|
13
14
|
public function __construct()
|
|
14
15
|
{
|
|
15
|
-
$this->loadDefaultSynonyms();
|
|
16
16
|
}
|
|
17
17
|
|
|
18
18
|
protected function getTokenizer(): TokenizerService
|
|
@@ -446,6 +446,43 @@ class NavigationTree
|
|
|
446
446
|
return $this->flatIndex;
|
|
447
447
|
}
|
|
448
448
|
|
|
449
|
+
public function recordSynonymUsage(array $keywords): void
|
|
450
|
+
{
|
|
451
|
+
foreach ($keywords as $keyword) {
|
|
452
|
+
$key = strtolower(trim($keyword));
|
|
453
|
+
if (!isset($this->synonymUsage[$key])) {
|
|
454
|
+
$this->synonymUsage[$key] = 0;
|
|
455
|
+
}
|
|
456
|
+
$this->synonymUsage[$key]++;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
public function getTopSynonyms(int $limit = 20): array
|
|
461
|
+
{
|
|
462
|
+
arsort($this->synonymUsage);
|
|
463
|
+
return array_slice($this->synonymUsage, 0, $limit, true);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
public function saveSynonymCache(string $path): void
|
|
467
|
+
{
|
|
468
|
+
$data = [
|
|
469
|
+
'synonym_usage' => $this->synonymUsage,
|
|
470
|
+
'saved_at' => date('c'),
|
|
471
|
+
];
|
|
472
|
+
if (!is_dir(dirname($path))) {
|
|
473
|
+
mkdir(dirname($path), 0755, true);
|
|
474
|
+
}
|
|
475
|
+
file_put_contents($path, json_encode($data, JSON_PRETTY_PRINT));
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
public function loadSynonymCache(string $path): void
|
|
479
|
+
{
|
|
480
|
+
if (file_exists($path)) {
|
|
481
|
+
$data = json_decode(file_get_contents($path), true);
|
|
482
|
+
$this->synonymUsage = $data['synonym_usage'] ?? [];
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
449
486
|
public function buildStemmedIndex(): void
|
|
450
487
|
{
|
|
451
488
|
$tokenizer = $this->getTokenizer();
|