@snap-agent/rag-web 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1659,6 +1659,15 @@ var WebRAGPlugin = class _WebRAGPlugin {
1659
1659
  const content = this.extractBestContentText($, config);
1660
1660
  const minChars = config.minExtractedContentLength ?? 50;
1661
1661
  if (!content || content.length < minChars) return null;
1662
+ const image = $('meta[property="og:image"]').attr("content") || $('meta[name="twitter:image"]').attr("content") || $('meta[property="product:image"]').attr("content") || $('[itemtype*="schema.org/Product"] img, .product img, .product-image img, #product-image img').first().attr("src") || void 0;
1663
+ let imageUrl;
1664
+ if (image) {
1665
+ try {
1666
+ imageUrl = new URL(image, url).href;
1667
+ } catch {
1668
+ imageUrl = image;
1669
+ }
1670
+ }
1662
1671
  let type = config.defaultType || "page";
1663
1672
  if (config.typeFromUrl) {
1664
1673
  for (const [pattern, typeName] of Object.entries(config.typeFromUrl)) {
@@ -1676,6 +1685,7 @@ var WebRAGPlugin = class _WebRAGPlugin {
1676
1685
  type,
1677
1686
  title,
1678
1687
  url,
1688
+ ...imageUrl ? { imageUrl } : {},
1679
1689
  ...config.metadata
1680
1690
  }
1681
1691
  };
package/dist/index.mjs CHANGED
@@ -1623,6 +1623,15 @@ var WebRAGPlugin = class _WebRAGPlugin {
1623
1623
  const content = this.extractBestContentText($, config);
1624
1624
  const minChars = config.minExtractedContentLength ?? 50;
1625
1625
  if (!content || content.length < minChars) return null;
1626
+ const image = $('meta[property="og:image"]').attr("content") || $('meta[name="twitter:image"]').attr("content") || $('meta[property="product:image"]').attr("content") || $('[itemtype*="schema.org/Product"] img, .product img, .product-image img, #product-image img').first().attr("src") || void 0;
1627
+ let imageUrl;
1628
+ if (image) {
1629
+ try {
1630
+ imageUrl = new URL(image, url).href;
1631
+ } catch {
1632
+ imageUrl = image;
1633
+ }
1634
+ }
1626
1635
  let type = config.defaultType || "page";
1627
1636
  if (config.typeFromUrl) {
1628
1637
  for (const [pattern, typeName] of Object.entries(config.typeFromUrl)) {
@@ -1640,6 +1649,7 @@ var WebRAGPlugin = class _WebRAGPlugin {
1640
1649
  type,
1641
1650
  title,
1642
1651
  url,
1652
+ ...imageUrl ? { imageUrl } : {},
1643
1653
  ...config.metadata
1644
1654
  }
1645
1655
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@snap-agent/rag-web",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Web RAG plugin for SnapAgent SDK - Schema-agnostic content search via web crawling, CMS APIs, sitemaps, and RSS feeds",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",