@snap-agent/rag-web 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1659,6 +1659,16 @@ var WebRAGPlugin = class _WebRAGPlugin {
1659
1659
  const content = this.extractBestContentText($, config);
1660
1660
  const minChars = config.minExtractedContentLength ?? 50;
1661
1661
  if (!content || content.length < minChars) return null;
1662
+ const image = $('meta[property="og:image"]').attr("content") || $('meta[name="twitter:image"]').attr("content") || $('meta[property="product:image"]').attr("content") || $('[itemtype*="schema.org/Product"] img, .product img, .product-image img, #product-image img').first().attr("src") || void 0;
1663
+ let imageUrl;
1664
+ if (image) {
1665
+ try {
1666
+ imageUrl = new URL(image, url).href;
1667
+ } catch {
1668
+ imageUrl = image;
1669
+ }
1670
+ }
1671
+ const description = $('meta[property="og:description"]').attr("content") || $('meta[name="description"]').attr("content") || void 0;
1662
1672
  let type = config.defaultType || "page";
1663
1673
  if (config.typeFromUrl) {
1664
1674
  for (const [pattern, typeName] of Object.entries(config.typeFromUrl)) {
@@ -1676,6 +1686,8 @@ var WebRAGPlugin = class _WebRAGPlugin {
1676
1686
  type,
1677
1687
  title,
1678
1688
  url,
1689
+ ...imageUrl ? { imageUrl } : {},
1690
+ ...description ? { description } : {},
1679
1691
  ...config.metadata
1680
1692
  }
1681
1693
  };
package/dist/index.mjs CHANGED
@@ -1623,6 +1623,16 @@ var WebRAGPlugin = class _WebRAGPlugin {
1623
1623
  const content = this.extractBestContentText($, config);
1624
1624
  const minChars = config.minExtractedContentLength ?? 50;
1625
1625
  if (!content || content.length < minChars) return null;
1626
+ const image = $('meta[property="og:image"]').attr("content") || $('meta[name="twitter:image"]').attr("content") || $('meta[property="product:image"]').attr("content") || $('[itemtype*="schema.org/Product"] img, .product img, .product-image img, #product-image img').first().attr("src") || void 0;
1627
+ let imageUrl;
1628
+ if (image) {
1629
+ try {
1630
+ imageUrl = new URL(image, url).href;
1631
+ } catch {
1632
+ imageUrl = image;
1633
+ }
1634
+ }
1635
+ const description = $('meta[property="og:description"]').attr("content") || $('meta[name="description"]').attr("content") || void 0;
1626
1636
  let type = config.defaultType || "page";
1627
1637
  if (config.typeFromUrl) {
1628
1638
  for (const [pattern, typeName] of Object.entries(config.typeFromUrl)) {
@@ -1640,6 +1650,8 @@ var WebRAGPlugin = class _WebRAGPlugin {
1640
1650
  type,
1641
1651
  title,
1642
1652
  url,
1653
+ ...imageUrl ? { imageUrl } : {},
1654
+ ...description ? { description } : {},
1643
1655
  ...config.metadata
1644
1656
  }
1645
1657
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@snap-agent/rag-web",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Web RAG plugin for SnapAgent SDK - Schema-agnostic content search via web crawling, CMS APIs, sitemaps, and RSS feeds",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",