@apmantza/greedysearch-pi 1.7.6 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,44 +1,44 @@
1
- ---
2
- name: greedy-search
3
- description: Live web search via Perplexity, Bing, and Google AI in parallel. Use for library docs, recent framework changes, error messages, dependency selection, or anything where training data may be stale. NOT for codebase search.
4
- ---
5
-
6
- # GreedySearch — Live Web Search
7
-
8
- Runs Perplexity, Bing Copilot, and Google AI in parallel. Gemini synthesizes results.
9
-
10
- ## greedy_search
11
-
12
- ```
13
- greedy_search({ query: "React 19 changes", depth: "standard" })
14
- ```
15
-
16
- | Parameter | Type | Default | Description |
17
- |-----------|------|---------|-------------|
18
- | `query` | string | required | Search question |
19
- | `engine` | string | `"all"` | `all`, `perplexity`, `bing`, `google`, `gemini` |
20
- | `depth` | string | `"standard"` | `fast`, `standard`, `deep` |
21
- | `fullAnswer` | boolean | `false` | Full answer vs ~300 char summary |
22
-
23
- | Depth | Engines | Synthesis | Source Fetch | Time |
24
- |-------|---------|-----------|--------------|------|
25
- | `fast` | 1 | — | — | 15-30s |
26
- | `standard` | 3 | Gemini | — | 30-90s |
27
- | `deep` | 3 | Gemini | top 5 | 60-180s |
28
-
29
- **When engines agree** → high confidence. **When they diverge** → note both perspectives.
30
-
31
- ## coding_task
32
-
33
- Second opinion from Gemini/Copilot on hard problems.
34
-
35
- ```
36
- coding_task({ task: "debug race condition", mode: "debug", engine: "gemini" })
37
- ```
38
-
39
- | Parameter | Type | Default | Options |
40
- |-----------|------|---------|---------|
41
- | `task` | string | required | — |
42
- | `engine` | string | `"gemini"` | `gemini`, `copilot`, `all` |
43
- | `mode` | string | `"code"` | `debug`, `plan`, `review`, `test`, `code` |
44
- | `context` | string | — | Code snippet |
1
+ ---
2
+ name: greedy-search
3
+ description: Live web search via Perplexity, Bing, and Google AI in parallel. Use for library docs, recent framework changes, error messages, dependency selection, or anything where training data may be stale. NOT for codebase search.
4
+ ---
5
+
6
+ # GreedySearch — Live Web Search
7
+
8
+ Runs Perplexity, Bing Copilot, and Google AI in parallel. Gemini synthesizes results.
9
+
10
+ ## greedy_search
11
+
12
+ ```
13
+ greedy_search({ query: "React 19 changes", depth: "standard" })
14
+ ```
15
+
16
+ | Parameter | Type | Default | Description |
17
+ |-----------|------|---------|-------------|
18
+ | `query` | string | required | Search question |
19
+ | `engine` | string | `"all"` | `all`, `perplexity`, `bing`, `google`, `gemini` |
20
+ | `depth` | string | `"standard"` | `fast`, `standard`, `deep` |
21
+ | `fullAnswer` | boolean | `false` | Full answer vs ~300 char summary |
22
+
23
+ | Depth | Engines | Synthesis | Source Fetch | Time |
24
+ |-------|---------|-----------|--------------|------|
25
+ | `fast` | 1 | — | — | 15-30s |
26
+ | `standard` | 3 | Gemini | — | 30-90s |
27
+ | `deep` | 3 | Gemini | top 5 | 60-180s |
28
+
29
+ **When engines agree** → high confidence. **When they diverge** → note both perspectives.
30
+
31
+ ## coding_task
32
+
33
+ Second opinion from Gemini/Copilot on hard problems.
34
+
35
+ ```
36
+ coding_task({ task: "debug race condition", mode: "debug", engine: "gemini" })
37
+ ```
38
+
39
+ | Parameter | Type | Default | Options |
40
+ |-----------|------|---------|---------|
41
+ | `task` | string | required | — |
42
+ | `engine` | string | `"gemini"` | `gemini`, `copilot`, `all` |
43
+ | `mode` | string | `"code"` | `debug`, `plan`, `review`, `test`, `code` |
44
+ | `context` | string | — | Code snippet |
package/src/fetcher.mjs CHANGED
@@ -178,6 +178,7 @@ export async function fetchSourceHttp(url, options = {}) {
178
178
 
179
179
  const contentType = response.headers.get("content-type") || "";
180
180
  const finalUrl = response.url;
181
+ const lastModified = response.headers.get("last-modified") || "";
181
182
 
182
183
  // Handle raw text/plain from GitHub (raw file content)
183
184
  if (
@@ -191,6 +192,11 @@ export async function fetchSourceHttp(url, options = {}) {
191
192
  finalUrl,
192
193
  status: response.status,
193
194
  title: finalUrl.split("/").pop() || "GitHub File",
195
+ byline: "",
196
+ siteName: "GitHub",
197
+ lang: "",
198
+ publishedTime: lastModified,
199
+ lastModified,
194
200
  markdown: text,
195
201
  contentLength: text.length,
196
202
  excerpt: text.slice(0, 300).replace(/\n/g, " "),
@@ -250,6 +256,11 @@ export async function fetchSourceHttp(url, options = {}) {
250
256
  finalUrl,
251
257
  status: response.status,
252
258
  title: extracted.title,
259
+ byline: extracted.byline,
260
+ siteName: extracted.siteName,
261
+ lang: extracted.lang,
262
+ publishedTime: extracted.publishedTime || lastModified,
263
+ lastModified,
253
264
  markdown: extracted.markdown,
254
265
  excerpt: extracted.excerpt,
255
266
  contentLength: extracted.markdown.length,
@@ -437,6 +448,29 @@ function isNetworkErrorRetryableWithBrowser(error) {
437
448
  );
438
449
  }
439
450
 
451
+ /**
452
+ * Extract a date string from <meta> tags (Open Graph, schema.org, standard)
453
+ * Returns ISO string or empty string.
454
+ */
455
+ function extractMetaDate(document) {
456
+ const selectors = [
457
+ 'meta[property="article:published_time"]',
458
+ 'meta[name="article:published_time"]',
459
+ 'meta[property="og:published_time"]',
460
+ 'meta[name="publication_date"]',
461
+ 'meta[name="date"]',
462
+ 'meta[itemprop="datePublished"]',
463
+ 'time[itemprop="datePublished"]',
464
+ 'meta[name="DC.date"]',
465
+ ];
466
+ for (const sel of selectors) {
467
+ const el = document.querySelector(sel);
468
+ const val = el?.getAttribute("content") || el?.getAttribute("datetime") || "";
469
+ if (val) return val;
470
+ }
471
+ return "";
472
+ }
473
+
440
474
  /**
441
475
  * Extract readable content using Mozilla Readability + Turndown
442
476
  */
@@ -452,8 +486,14 @@ function extractContent(html, url) {
452
486
  const markdown = turndown.turndown(article.content);
453
487
  const cleanMarkdown = markdown.replace(/\n{3,}/g, "\n\n").trim();
454
488
 
489
+ const publishedTime = article.publishedTime || extractMetaDate(document) || "";
490
+
455
491
  return {
456
492
  title: article.title || document.title || url,
493
+ byline: article.byline || "",
494
+ siteName: article.siteName || "",
495
+ lang: article.lang || "",
496
+ publishedTime,
457
497
  markdown: cleanMarkdown,
458
498
  excerpt: cleanMarkdown.slice(0, 300).replace(/\n/g, " "),
459
499
  };
@@ -472,6 +512,10 @@ function extractContent(html, url) {
472
512
 
473
513
  return {
474
514
  title: document.title || url,
515
+ byline: "",
516
+ siteName: "",
517
+ lang: "",
518
+ publishedTime: extractMetaDate(document),
475
519
  markdown: cleanText,
476
520
  excerpt: cleanText.slice(0, 300),
477
521
  };
@@ -480,6 +524,10 @@ function extractContent(html, url) {
480
524
  // Last resort
481
525
  return {
482
526
  title: url,
527
+ byline: "",
528
+ siteName: "",
529
+ lang: "",
530
+ publishedTime: "",
483
531
  markdown: "",
484
532
  excerpt: "",
485
533
  };