@snap-agent/rag-web 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +12 -0
- package/dist/index.mjs +12 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1659,6 +1659,16 @@ var WebRAGPlugin = class _WebRAGPlugin {
|
|
|
1659
1659
|
const content = this.extractBestContentText($, config);
|
|
1660
1660
|
const minChars = config.minExtractedContentLength ?? 50;
|
|
1661
1661
|
if (!content || content.length < minChars) return null;
|
|
1662
|
+
const image = $('meta[property="og:image"]').attr("content") || $('meta[name="twitter:image"]').attr("content") || $('meta[property="product:image"]').attr("content") || $('[itemtype*="schema.org/Product"] img, .product img, .product-image img, #product-image img').first().attr("src") || void 0;
|
|
1663
|
+
let imageUrl;
|
|
1664
|
+
if (image) {
|
|
1665
|
+
try {
|
|
1666
|
+
imageUrl = new URL(image, url).href;
|
|
1667
|
+
} catch {
|
|
1668
|
+
imageUrl = image;
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
const description = $('meta[property="og:description"]').attr("content") || $('meta[name="description"]').attr("content") || void 0;
|
|
1662
1672
|
let type = config.defaultType || "page";
|
|
1663
1673
|
if (config.typeFromUrl) {
|
|
1664
1674
|
for (const [pattern, typeName] of Object.entries(config.typeFromUrl)) {
|
|
@@ -1676,6 +1686,8 @@ var WebRAGPlugin = class _WebRAGPlugin {
|
|
|
1676
1686
|
type,
|
|
1677
1687
|
title,
|
|
1678
1688
|
url,
|
|
1689
|
+
...imageUrl ? { imageUrl } : {},
|
|
1690
|
+
...description ? { description } : {},
|
|
1679
1691
|
...config.metadata
|
|
1680
1692
|
}
|
|
1681
1693
|
};
|
package/dist/index.mjs
CHANGED
|
@@ -1623,6 +1623,16 @@ var WebRAGPlugin = class _WebRAGPlugin {
|
|
|
1623
1623
|
const content = this.extractBestContentText($, config);
|
|
1624
1624
|
const minChars = config.minExtractedContentLength ?? 50;
|
|
1625
1625
|
if (!content || content.length < minChars) return null;
|
|
1626
|
+
const image = $('meta[property="og:image"]').attr("content") || $('meta[name="twitter:image"]').attr("content") || $('meta[property="product:image"]').attr("content") || $('[itemtype*="schema.org/Product"] img, .product img, .product-image img, #product-image img').first().attr("src") || void 0;
|
|
1627
|
+
let imageUrl;
|
|
1628
|
+
if (image) {
|
|
1629
|
+
try {
|
|
1630
|
+
imageUrl = new URL(image, url).href;
|
|
1631
|
+
} catch {
|
|
1632
|
+
imageUrl = image;
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
const description = $('meta[property="og:description"]').attr("content") || $('meta[name="description"]').attr("content") || void 0;
|
|
1626
1636
|
let type = config.defaultType || "page";
|
|
1627
1637
|
if (config.typeFromUrl) {
|
|
1628
1638
|
for (const [pattern, typeName] of Object.entries(config.typeFromUrl)) {
|
|
@@ -1640,6 +1650,8 @@ var WebRAGPlugin = class _WebRAGPlugin {
|
|
|
1640
1650
|
type,
|
|
1641
1651
|
title,
|
|
1642
1652
|
url,
|
|
1653
|
+
...imageUrl ? { imageUrl } : {},
|
|
1654
|
+
...description ? { description } : {},
|
|
1643
1655
|
...config.metadata
|
|
1644
1656
|
}
|
|
1645
1657
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@snap-agent/rag-web",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Web RAG plugin for SnapAgent SDK - Schema-agnostic content search via web crawling, CMS APIs, sitemaps, and RSS feeds",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|