lwazi 1.6.9 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lwazi",
3
- "version": "1.6.9",
3
+ "version": "1.7.2",
4
4
  "description": "Lwazi is an AI assistant for Laravel. Install with one command to add an AI assistant to your Laravel app.",
5
5
  "main": "bin/lwazi.js",
6
6
  "bin": {
@@ -6,6 +6,9 @@ use Illuminate\Console\Command;
6
6
  use Lwazi\Core\Installer\NavigationCrawler;
7
7
  use Lwazi\Core\Services\GraphVisualizer;
8
8
  use Illuminate\Support\Facades\Storage;
9
+ use Illuminate\Support\Str;
10
+ use DOMDocument;
11
+ use DOMXPath;
9
12
 
10
13
  class AnalyzeProjectCommand extends Command
11
14
  {
@@ -26,6 +29,11 @@ class AnalyzeProjectCommand extends Command
26
29
  $crawler = new NavigationCrawler($rootUrl);
27
30
  $manifest = $crawler->crawl();
28
31
 
32
+ if (empty($manifest['nodes'] ?? [])) {
33
+ $this->info("Crawler returned no pages, trying fallback...");
34
+ $manifest = $this->fallbackExtractLinks($rootUrl);
35
+ }
36
+
29
37
  $manifest = $this->mergeWithRoutes($manifest, $rootUrl);
30
38
 
31
39
  $storagePath = storage_path('lwazi');
@@ -37,7 +45,7 @@ class AnalyzeProjectCommand extends Command
37
45
  file_put_contents($manifestFile, json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
38
46
 
39
47
  $this->info("Analysis complete. Manifest stored at: {$manifestFile}");
40
- $this->info("Pages discovered: " . count($manifest['nodes']));
48
+ $this->info("Pages discovered: " . count($manifest['nodes'] ?? $manifest['flat'] ?? []));
41
49
 
42
50
  $this->info("\n" . str_repeat('=', 50));
43
51
  $this->info('SITE NAVIGATION GRAPH');
@@ -69,6 +77,16 @@ class AnalyzeProjectCommand extends Command
69
77
 
70
78
  $root = $this->extractRoot($rootUrl);
71
79
 
80
+ if (!isset($manifest['flat'])) {
81
+ $manifest['flat'] = [];
82
+ }
83
+ if (!isset($manifest['nodes'])) {
84
+ $manifest['nodes'] = [];
85
+ }
86
+ if (!isset($manifest['adjacency'])) {
87
+ $manifest['adjacency'] = [];
88
+ }
89
+
72
90
  foreach ($routes as $route) {
73
91
  $path = '/' . ltrim($route['uri'], '/');
74
92
 
@@ -128,4 +146,90 @@ class AnalyzeProjectCommand extends Command
128
146
  $path = preg_replace('/\s+/', ' ', $path);
129
147
  return ucwords(trim($path));
130
148
  }
149
+
150
+ protected function fallbackExtractLinks(string $url): array
151
+ {
152
+ try {
153
+ $response = \Illuminate\Support\Facades\Http::timeout(10)->get($url);
154
+ if (!$response->successful()) {
155
+ return ['nodes' => [], 'adjacency' => [], 'flat' => [], 'root_url' => $url];
156
+ }
157
+
158
+ $html = $response->body();
159
+
160
+ libxml_use_internal_errors(true);
161
+ $dom = new DOMDocument();
162
+ @$dom->loadHTML($html);
163
+ libxml_clear_errors();
164
+
165
+ $xpath = new DOMXPath($dom);
166
+ $links = $xpath->query('//a[@href]');
167
+
168
+ $manifest = [
169
+ 'nodes' => [],
170
+ 'adjacency' => [$url => []],
171
+ 'flat' => [],
172
+ 'root_url' => $url,
173
+ ];
174
+
175
+ $seen = [];
176
+
177
+ foreach ($links as $a) {
178
+ $href = trim($a->getAttribute('href'));
179
+ if (!$href || Str::startsWith($href, ['#', 'javascript:', 'mailto:', 'tel:'])) {
180
+ continue;
181
+ }
182
+
183
+ $fullUrl = $this->makeAbsolute($href, $url);
184
+ if (!$fullUrl || !Str::startsWith($fullUrl, $url)) {
185
+ continue;
186
+ }
187
+
188
+ if (isset($seen[$fullUrl])) {
189
+ continue;
190
+ }
191
+ $seen[$fullUrl] = true;
192
+
193
+ $text = trim($a->textContent ?? '');
194
+
195
+ $manifest['nodes'][$fullUrl] = [
196
+ 'url' => $fullUrl,
197
+ 'title' => $text ?: basename($fullUrl),
198
+ 'headings' => [$text],
199
+ ];
200
+
201
+ $manifest['adjacency'][$url][] = $fullUrl;
202
+
203
+ $manifest['flat'][$fullUrl] = [
204
+ 'label' => $text ?: basename($fullUrl),
205
+ 'segments' => array_filter(explode('/', parse_url($fullUrl, PHP_PATH) ?? '')),
206
+ '_path' => $fullUrl,
207
+ '_weight' => 1,
208
+ ];
209
+ }
210
+
211
+ return $manifest;
212
+
213
+ } catch (\Exception $e) {
214
+ return ['nodes' => [], 'adjacency' => [], 'flat' => [], 'root_url' => $url];
215
+ }
216
+ }
217
+
218
+ protected function makeAbsolute(string $href, string $base): string
219
+ {
220
+ if (Str::startsWith($href, ['http://', 'https://'])) {
221
+ return $href;
222
+ }
223
+
224
+ if ($href === '/') {
225
+ return $base;
226
+ }
227
+
228
+ if (Str::startsWith($href, '/')) {
229
+ $parsed = parse_url($base);
230
+ return ($parsed['scheme'] ?? 'http') . '://' . ($parsed['host'] ?? '') . $href;
231
+ }
232
+
233
+ return $base . '/' . $href;
234
+ }
131
235
  }
@@ -108,15 +108,21 @@ class SetupCommand extends Command
108
108
  protected function runCrawler(string $url): void
109
109
  {
110
110
  $this->info("\nCrawling website: {$url}...");
111
+
111
112
  try {
112
113
  $crawler = new NavigationCrawler($url, true);
113
114
  $manifest = $crawler->crawl();
114
115
 
116
+ if (empty($manifest['nodes'] ?? [])) {
117
+ $this->info("Crawler returned no pages, trying fallback extraction...");
118
+ $manifest = $this->fallbackExtractLinks($url);
119
+ }
120
+
115
121
  $manifest = $this->mergeWithRoutes($manifest, $url);
116
122
 
117
123
  $crawler->saveManifest();
118
124
 
119
- $this->info("Crawled " . count($manifest['nodes']) . " pages.");
125
+ $this->info("Crawled " . count($manifest['nodes'] ?? $manifest['flat'] ?? []) . " pages.");
120
126
 
121
127
  $this->info("\n" . str_repeat('=', 50));
122
128
  $this->info('SITE NAVIGATION GRAPH');
@@ -130,8 +136,107 @@ class SetupCommand extends Command
130
136
  $this->line($summaryOutput);
131
137
  } catch (\Throwable $e) {
132
138
  $this->warn('Website crawling failed: ' . $e->getMessage());
139
+
140
+ $this->info("Trying fallback link extraction...");
141
+ try {
142
+ $manifest = $this->fallbackExtractLinks($url);
143
+ $manifest = $this->mergeWithRoutes($manifest, $url);
144
+
145
+ $crawler = new NavigationCrawler($url);
146
+ $crawler->saveManifest();
147
+
148
+ $this->info("Fallback extraction complete.");
149
+ } catch (\Throwable $e2) {
150
+ $this->warn('Fallback also failed: ' . $e2->getMessage());
151
+ }
133
152
  }
134
153
  }
154
+
155
+ protected function fallbackExtractLinks(string $url): array
156
+ {
157
+ try {
158
+ $response = \Illuminate\Support\Facades\Http::timeout(10)->get($url);
159
+ if (!$response->successful()) {
160
+ return ['nodes' => [], 'adjacency' => [], 'flat' => [], 'root_url' => $url];
161
+ }
162
+
163
+ $html = $response->body();
164
+
165
+ libxml_use_internal_errors(true);
166
+ $dom = new DOMDocument();
167
+ @$dom->loadHTML($html);
168
+ libxml_clear_errors();
169
+
170
+ $xpath = new DOMXPath($dom);
171
+ $links = $xpath->query('//a[@href]');
172
+
173
+ $manifest = [
174
+ 'nodes' => [],
175
+ 'adjacency' => [$url => []],
176
+ 'flat' => [],
177
+ 'root_url' => $url,
178
+ ];
179
+
180
+ $seen = [];
181
+
182
+ foreach ($links as $a) {
183
+ $href = trim($a->getAttribute('href'));
184
+ if (!$href || Str::startsWith($href, ['#', 'javascript:', 'mailto:', 'tel:'])) {
185
+ continue;
186
+ }
187
+
188
+ $fullUrl = $this->makeAbsolute($href, $url);
189
+ if (!$fullUrl || !Str::startsWith($fullUrl, $url)) {
190
+ continue;
191
+ }
192
+
193
+ if (isset($seen[$fullUrl])) {
194
+ continue;
195
+ }
196
+ $seen[$fullUrl] = true;
197
+
198
+ $text = trim($a->textContent ?? '');
199
+
200
+ $manifest['nodes'][$fullUrl] = [
201
+ 'url' => $fullUrl,
202
+ 'title' => $text ?: basename($fullUrl),
203
+ 'headings' => [$text],
204
+ ];
205
+
206
+ $manifest['adjacency'][$url][] = $fullUrl;
207
+
208
+ $manifest['flat'][$fullUrl] = [
209
+ 'label' => $text ?: basename($fullUrl),
210
+ 'segments' => array_filter(explode('/', parse_url($fullUrl, PHP_PATH) ?? '')),
211
+ '_path' => $fullUrl,
212
+ '_weight' => 1,
213
+ ];
214
+ }
215
+
216
+ return $manifest;
217
+
218
+ } catch (\Exception $e) {
219
+ return ['nodes' => [], 'adjacency' => [], 'flat' => [], 'root_url' => $url];
220
+ }
221
+ }
222
+
223
+ protected function makeAbsolute(string $href, string $base): string
224
+ {
225
+ if (Str::startsWith($href, ['http://', 'https://'])) {
226
+ return $href;
227
+ }
228
+
229
+ if ($href === '/') {
230
+ return $base;
231
+ }
232
+
233
+ if (Str::startsWith($href, '/')) {
234
+ $parsed = parse_url($base);
235
+ return ($parsed['scheme'] ?? 'http') . '://' . ($parsed['host'] ?? '') . $href;
236
+ }
237
+
238
+ return $base . '/' . $href;
239
+ }
135
240
 
136
241
  protected function mergeWithRoutes(array $manifest, string $rootUrl): array
137
242
  {
@@ -149,6 +254,16 @@ class SetupCommand extends Command
149
254
 
150
255
  $root = $this->extractRoot($rootUrl);
151
256
 
257
+ if (!isset($manifest['flat'])) {
258
+ $manifest['flat'] = [];
259
+ }
260
+ if (!isset($manifest['nodes'])) {
261
+ $manifest['nodes'] = [];
262
+ }
263
+ if (!isset($manifest['adjacency'])) {
264
+ $manifest['adjacency'] = [];
265
+ }
266
+
152
267
  foreach ($routes as $route) {
153
268
  $path = '/' . ltrim($route['uri'], '/');
154
269
 
@@ -189,8 +189,20 @@ class LwaziService
189
189
  ]);
190
190
 
191
191
  $json = $this->extractJson($response2['content'] ?? '');
192
- $terms = is_array($json) ? array_values($json) : [];
193
- $terms[] = strtolower($topic);
192
+ $rawTerms = is_array($json) ? array_values($json) : [];
193
+
194
+ $terms = [];
195
+ foreach ($rawTerms as $t) {
196
+ $t = strtolower(trim($t));
197
+ if (strpos($t, ' ') === false && strlen($t) >= 3) {
198
+ $terms[] = $t;
199
+ }
200
+ }
201
+
202
+ $first = explode(' ', $topic)[0];
203
+ if (strlen($first) >= 3) {
204
+ $terms[] = strtolower($first);
205
+ }
194
206
 
195
207
  if (empty($terms)) {
196
208
  return null;
@@ -358,8 +370,20 @@ class LwaziService
358
370
  ]);
359
371
 
360
372
  $json = $this->extractJson($response2['content'] ?? '');
361
- $terms = is_array($json) ? array_values($json) : [];
362
- $terms[] = strtolower($topic);
373
+ $rawTerms = is_array($json) ? array_values($json) : [];
374
+
375
+ $terms = [];
376
+ foreach ($rawTerms as $t) {
377
+ $t = strtolower(trim($t));
378
+ if (strpos($t, ' ') === false && strlen($t) >= 3) {
379
+ $terms[] = $t;
380
+ }
381
+ }
382
+
383
+ $first = explode(' ', $topic)[0];
384
+ if (strlen($first) >= 3) {
385
+ $terms[] = strtolower($first);
386
+ }
363
387
 
364
388
  if (empty($terms)) {
365
389
  return null;