intelwatch 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/README.md +175 -0
- package/bin/intelwatch.js +8 -0
- package/package.json +43 -0
- package/src/ai/client.js +130 -0
- package/src/commands/ai-summary.js +147 -0
- package/src/commands/check.js +267 -0
- package/src/commands/compare.js +124 -0
- package/src/commands/diff.js +118 -0
- package/src/commands/digest.js +156 -0
- package/src/commands/discover.js +301 -0
- package/src/commands/history.js +60 -0
- package/src/commands/list.js +43 -0
- package/src/commands/notify.js +121 -0
- package/src/commands/pitch.js +156 -0
- package/src/commands/report.js +82 -0
- package/src/commands/track.js +94 -0
- package/src/config.js +65 -0
- package/src/index.js +182 -0
- package/src/report/html.js +499 -0
- package/src/report/json.js +44 -0
- package/src/report/markdown.js +156 -0
- package/src/scrapers/brave-search.js +268 -0
- package/src/scrapers/google-news.js +111 -0
- package/src/scrapers/google.js +113 -0
- package/src/scrapers/pappers.js +119 -0
- package/src/scrapers/site-analyzer.js +252 -0
- package/src/storage.js +168 -0
- package/src/trackers/brand.js +76 -0
- package/src/trackers/competitor.js +268 -0
- package/src/trackers/keyword.js +121 -0
- package/src/trackers/person.js +132 -0
- package/src/utils/display.js +102 -0
- package/src/utils/fetcher.js +82 -0
- package/src/utils/parser.js +110 -0
- package/src/utils/sentiment.js +95 -0
- package/src/utils/tech-detect.js +94 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
|
|
3
|
+
export function load(html) {
|
|
4
|
+
return cheerio.load(html);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function extractLinks($, baseUrl) {
|
|
8
|
+
const links = new Set();
|
|
9
|
+
$('a[href]').each((_, el) => {
|
|
10
|
+
const href = $(el).attr('href');
|
|
11
|
+
if (!href) return;
|
|
12
|
+
try {
|
|
13
|
+
const url = new URL(href, baseUrl);
|
|
14
|
+
if (url.hostname === new URL(baseUrl).hostname) {
|
|
15
|
+
links.add(url.href.split('#')[0].split('?')[0]);
|
|
16
|
+
}
|
|
17
|
+
} catch {}
|
|
18
|
+
});
|
|
19
|
+
return [...links];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function extractMeta($) {
|
|
23
|
+
const meta = {};
|
|
24
|
+
meta.title = $('title').first().text().trim();
|
|
25
|
+
meta.description = $('meta[name="description"]').attr('content') || '';
|
|
26
|
+
meta.keywords = $('meta[name="keywords"]').attr('content') || '';
|
|
27
|
+
meta.ogTitle = $('meta[property="og:title"]').attr('content') || '';
|
|
28
|
+
meta.ogDescription = $('meta[property="og:description"]').attr('content') || '';
|
|
29
|
+
meta.canonical = $('link[rel="canonical"]').attr('href') || '';
|
|
30
|
+
meta.generator = $('meta[name="generator"]').attr('content') || '';
|
|
31
|
+
return meta;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function extractSocialLinks($) {
|
|
35
|
+
const socialPatterns = {
|
|
36
|
+
twitter: /twitter\.com|x\.com/,
|
|
37
|
+
facebook: /facebook\.com/,
|
|
38
|
+
linkedin: /linkedin\.com/,
|
|
39
|
+
instagram: /instagram\.com/,
|
|
40
|
+
youtube: /youtube\.com/,
|
|
41
|
+
github: /github\.com/,
|
|
42
|
+
tiktok: /tiktok\.com/,
|
|
43
|
+
pinterest: /pinterest\.com/,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const socials = {};
|
|
47
|
+
$('a[href]').each((_, el) => {
|
|
48
|
+
const href = $(el).attr('href') || '';
|
|
49
|
+
for (const [platform, pattern] of Object.entries(socialPatterns)) {
|
|
50
|
+
if (pattern.test(href) && !socials[platform]) {
|
|
51
|
+
socials[platform] = href;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
return socials;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function extractScripts($) {
|
|
59
|
+
const scripts = [];
|
|
60
|
+
$('script[src]').each((_, el) => {
|
|
61
|
+
const src = $(el).attr('src');
|
|
62
|
+
if (src) scripts.push(src);
|
|
63
|
+
});
|
|
64
|
+
return scripts;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function extractHtml($) {
|
|
68
|
+
return $.html();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function textContent($, selector) {
|
|
72
|
+
return $(selector).text().trim();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function extractPricing($, html) {
|
|
76
|
+
const pricePatterns = [
|
|
77
|
+
/\$\d+(?:\.\d{2})?(?:\s*\/\s*(?:mo|month|yr|year|user|seat))?/gi,
|
|
78
|
+
/€\d+(?:\.\d{2})?(?:\s*\/\s*(?:mo|month|yr|year|user|seat))?/gi,
|
|
79
|
+
/£\d+(?:\.\d{2})?(?:\s*\/\s*(?:mo|month|yr|year|user|seat))?/gi,
|
|
80
|
+
/\d+(?:\.\d{2})?\s*(?:USD|EUR|GBP)(?:\s*\/\s*(?:mo|month|yr|year))?/gi,
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const prices = new Set();
|
|
84
|
+
for (const pattern of pricePatterns) {
|
|
85
|
+
const matches = html.match(pattern) || [];
|
|
86
|
+
for (const m of matches) prices.add(m.trim());
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const planKeywords = ['starter', 'basic', 'pro', 'professional', 'business', 'enterprise', 'free', 'premium', 'plus'];
|
|
90
|
+
const plans = [];
|
|
91
|
+
for (const kw of planKeywords) {
|
|
92
|
+
const regex = new RegExp(`${kw}[^\\n]*?\\$[\\d,]+`, 'gi');
|
|
93
|
+
const matches = html.match(regex) || [];
|
|
94
|
+
plans.push(...matches.slice(0, 2));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
prices: [...prices].slice(0, 20),
|
|
99
|
+
plans: [...new Set(plans)].slice(0, 10),
|
|
100
|
+
hash: simpleHash([...prices].sort().join('|')),
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export function simpleHash(str) {
|
|
105
|
+
let h = 0;
|
|
106
|
+
for (let i = 0; i < str.length; i++) {
|
|
107
|
+
h = (Math.imul(31, h) + str.charCodeAt(i)) | 0;
|
|
108
|
+
}
|
|
109
|
+
return (h >>> 0).toString(16);
|
|
110
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { readFileSync } from 'fs';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import { dirname, join } from 'path';
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const dataDir = join(__dirname, '../../data');
|
|
7
|
+
|
|
8
|
+
let wordLists = null;
|
|
9
|
+
|
|
10
|
+
function loadWordLists() {
|
|
11
|
+
if (wordLists) return wordLists;
|
|
12
|
+
wordLists = {
|
|
13
|
+
negativeEn: JSON.parse(readFileSync(join(dataDir, 'negative-words-en.json'), 'utf8')),
|
|
14
|
+
negativeFr: JSON.parse(readFileSync(join(dataDir, 'negative-words-fr.json'), 'utf8')),
|
|
15
|
+
positiveEn: JSON.parse(readFileSync(join(dataDir, 'positive-words-en.json'), 'utf8')),
|
|
16
|
+
positiveFr: JSON.parse(readFileSync(join(dataDir, 'positive-words-fr.json'), 'utf8')),
|
|
17
|
+
};
|
|
18
|
+
return wordLists;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function analyzeSentiment(text, lang = 'auto') {
|
|
22
|
+
if (!text) return { score: 0, label: 'neutral', positiveHits: [], negativeHits: [] };
|
|
23
|
+
|
|
24
|
+
const lists = loadWordLists();
|
|
25
|
+
const lower = text.toLowerCase();
|
|
26
|
+
|
|
27
|
+
const negativeLists = lang === 'fr'
|
|
28
|
+
? [lists.negativeFr]
|
|
29
|
+
: lang === 'en'
|
|
30
|
+
? [lists.negativeEn]
|
|
31
|
+
: [lists.negativeEn, lists.negativeFr];
|
|
32
|
+
|
|
33
|
+
const positiveLists = lang === 'fr'
|
|
34
|
+
? [lists.positiveFr]
|
|
35
|
+
: lang === 'en'
|
|
36
|
+
? [lists.positiveEn]
|
|
37
|
+
: [lists.positiveEn, lists.positiveFr];
|
|
38
|
+
|
|
39
|
+
const negativeHits = [];
|
|
40
|
+
for (const list of negativeLists) {
|
|
41
|
+
for (const word of list) {
|
|
42
|
+
if (lower.includes(word.toLowerCase()) && !negativeHits.includes(word)) {
|
|
43
|
+
negativeHits.push(word);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const positiveHits = [];
|
|
49
|
+
for (const list of positiveLists) {
|
|
50
|
+
for (const word of list) {
|
|
51
|
+
if (lower.includes(word.toLowerCase()) && !positiveHits.includes(word)) {
|
|
52
|
+
positiveHits.push(word);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const score = positiveHits.length - negativeHits.length;
|
|
58
|
+
let label;
|
|
59
|
+
if (score > 1) label = 'positive';
|
|
60
|
+
else if (score < -1) label = 'negative';
|
|
61
|
+
else if (negativeHits.length > 0) label = 'slightly_negative';
|
|
62
|
+
else if (positiveHits.length > 0) label = 'slightly_positive';
|
|
63
|
+
else label = 'neutral';
|
|
64
|
+
|
|
65
|
+
return { score, label, positiveHits: positiveHits.slice(0, 5), negativeHits: negativeHits.slice(0, 5) };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function sentimentEmoji(label) {
|
|
69
|
+
switch (label) {
|
|
70
|
+
case 'positive': return '😊';
|
|
71
|
+
case 'slightly_positive': return '🙂';
|
|
72
|
+
case 'neutral': return '😐';
|
|
73
|
+
case 'slightly_negative': return '😕';
|
|
74
|
+
case 'negative': return '😞';
|
|
75
|
+
default: return '❓';
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function categorizeMention(url, title, snippet) {
|
|
80
|
+
const text = `${url} ${title} ${snippet}`.toLowerCase();
|
|
81
|
+
|
|
82
|
+
if (/techcrunch|wired|reuters|bloomberg|bbc|forbes|businessinsider|wsj|nytimes|lemonde|lefigaro/.test(text)) {
|
|
83
|
+
return 'press';
|
|
84
|
+
}
|
|
85
|
+
if (/reddit|hacker news|news\.ycombinator|forum|community|discussion|ask\./.test(text)) {
|
|
86
|
+
return 'forum';
|
|
87
|
+
}
|
|
88
|
+
if (/twitter|x\.com|linkedin|facebook|instagram|tiktok|youtube/.test(text)) {
|
|
89
|
+
return 'social';
|
|
90
|
+
}
|
|
91
|
+
if (/trustpilot|g2\.com|capterra|getapp|review|avis|rating/.test(text)) {
|
|
92
|
+
return 'review';
|
|
93
|
+
}
|
|
94
|
+
return 'blog';
|
|
95
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { readFileSync } from 'fs';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import { dirname, join } from 'path';
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const signaturesPath = join(__dirname, '../../data/tech-signatures.json');
|
|
7
|
+
|
|
8
|
+
let signatures = null;
|
|
9
|
+
|
|
10
|
+
function loadSignatures() {
|
|
11
|
+
if (signatures) return signatures;
|
|
12
|
+
signatures = JSON.parse(readFileSync(signaturesPath, 'utf8')).technologies;
|
|
13
|
+
return signatures;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function detectTechnologies(html, headers = {}, url = '') {
|
|
17
|
+
const techs = loadSignatures();
|
|
18
|
+
const detected = [];
|
|
19
|
+
|
|
20
|
+
const lowerHtml = html.toLowerCase();
|
|
21
|
+
const lowerHeaders = Object.fromEntries(
|
|
22
|
+
Object.entries(headers).map(([k, v]) => [k.toLowerCase(), (Array.isArray(v) ? v.join(', ') : String(v || '')).toLowerCase()])
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
for (const tech of techs) {
|
|
26
|
+
let found = false;
|
|
27
|
+
|
|
28
|
+
// Check headers
|
|
29
|
+
for (const hCheck of (tech.checks.headers || [])) {
|
|
30
|
+
const headerVal = lowerHeaders[hCheck.name.toLowerCase()] || '';
|
|
31
|
+
if (headerVal && new RegExp(hCheck.pattern, 'i').test(headerVal)) {
|
|
32
|
+
found = true;
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Check meta tags (generator etc)
|
|
38
|
+
if (!found) {
|
|
39
|
+
for (const mCheck of (tech.checks.meta || [])) {
|
|
40
|
+
const pattern = new RegExp(mCheck.pattern, 'i');
|
|
41
|
+
if (pattern.test(lowerHtml)) {
|
|
42
|
+
found = true;
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Check script sources
|
|
49
|
+
if (!found) {
|
|
50
|
+
for (const scriptPattern of (tech.checks.scripts || [])) {
|
|
51
|
+
if (lowerHtml.includes(scriptPattern.toLowerCase())) {
|
|
52
|
+
found = true;
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Check HTML patterns
|
|
59
|
+
if (!found) {
|
|
60
|
+
for (const htmlPattern of (tech.checks.html || [])) {
|
|
61
|
+
if (lowerHtml.includes(htmlPattern.toLowerCase())) {
|
|
62
|
+
found = true;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Check known paths in URL
|
|
69
|
+
if (!found && url) {
|
|
70
|
+
for (const pathPattern of (tech.checks.paths || [])) {
|
|
71
|
+
if (url.includes(pathPattern)) {
|
|
72
|
+
found = true;
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (found) {
|
|
79
|
+
detected.push({ name: tech.name, category: tech.category });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return detected;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function diffTechStacks(prev, curr) {
|
|
87
|
+
const prevNames = new Set(prev.map(t => t.name));
|
|
88
|
+
const currNames = new Set(curr.map(t => t.name));
|
|
89
|
+
|
|
90
|
+
const added = curr.filter(t => !prevNames.has(t.name));
|
|
91
|
+
const removed = prev.filter(t => !currNames.has(t.name));
|
|
92
|
+
|
|
93
|
+
return { added, removed };
|
|
94
|
+
}
|