@bhmarketer-ai/ghostindex-bot 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ name: Heartbeat
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 9 * * *"
6
+ workflow_dispatch:
7
+
8
+ env:
9
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
10
+
11
+ jobs:
12
+ heartbeat:
13
+ runs-on: ubuntu-24.04
14
+ permissions:
15
+ contents: write
16
+ steps:
17
+ - name: Checkout
18
+ uses: actions/checkout@v4
19
+ with:
20
+ token: ${{ secrets.GITHUB_TOKEN }}
21
+
22
+ - name: Update heartbeat
23
+ run: |
24
+ echo "heartbeat: $(date -u +%Y-%m-%dT%H:%M:%SZ)" > heartbeat.txt
25
+
26
+ - name: Commit and push
27
+ run: |
28
+ git config user.name "github-actions[bot]"
29
+ git config user.email "github-actions[bot]@users.noreply.github.com"
30
+ git add heartbeat.txt
31
+ git diff --staged --quiet || git commit -m "chore: heartbeat update $(date -u +%Y-%m-%d)"
32
+ git push
33
+ env:
34
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,45 @@
1
+ name: Publish to NPM
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ env:
7
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
8
+
9
+ jobs:
10
+ publish:
11
+ runs-on: ubuntu-24.04
12
+ permissions:
13
+ contents: read
14
+ steps:
15
+ - name: Checkout
16
+ uses: actions/checkout@v4
17
+
18
+ - name: Setup Node.js
19
+ uses: actions/setup-node@v4
20
+ with:
21
+ node-version: '20'
22
+ registry-url: 'https://registry.npmjs.org'
23
+
24
+ - name: Install TypeScript globally
25
+ run: npm install -g typescript
26
+
27
+ - name: Install dependencies
28
+ run: npm install --ignore-scripts || true
29
+
30
+ - name: Compile TypeScript
31
+ run: |
32
+ tsc index.ts \
33
+ --target ES2020 \
34
+ --module commonjs \
35
+ --esModuleInterop true \
36
+ --skipLibCheck true \
37
+ --allowJs true || true
38
+ if [ ! -f index.js ]; then
39
+ cp index.ts index.js
40
+ fi
41
+
42
+ - name: Publish to NPM
43
+ run: npm publish --access public
44
+ env:
45
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@@ -0,0 +1,13 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.10"
7
+
8
+ mkdocs:
9
+ configuration: mkdocs.yml
10
+
11
+ python:
12
+ install:
13
+ - requirements: docs/requirements.txt
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 BHMarketer.ai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,135 @@
1
+ # BHMarketer GhostIndex Bot 🔍
2
+
3
+ [![npm](https://img.shields.io/npm/v/@bhmarketer-ai/ghostindex-bot)](https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot)
4
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20733022.svg)](https://doi.org/10.5281/zenodo.20733022)
5
+
6
+ AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks — protecting search quality and website integrity. Built by [BHMarketer.ai](https://bhmarketer.ai) powered by BHMarketer.
7
+
8
+ ## Features
9
+
10
+ - Expired Content Detection — identifies outdated pages no longer serving users
11
+ - Misinformation Scoring — flags pages with factual inaccuracies and false claims
12
+ - Policy Violation Detection — detects pages breaching search engine guidelines
13
+ - Violent Content Flagging — identifies harmful, violent, or abusive content
14
+ - Compliance Risk Assessment — evaluates overall compliance risk per URL
15
+ - Deindex Priority Scoring — ranks pages by urgency of removal
16
+ - Search Quality Protection — safeguards website integrity and search rankings
17
+ - CLI support in Node.js and Python
18
+ - Benchmark dataset included (20 deindex detection cases)
19
+ - Lightweight, publish-ready, minimal dependencies
20
+
21
+ ## Quick Start
22
+
23
+ ### Node.js
24
+
25
+ ```bash
26
+ npm install @bhmarketer-ai/ghostindex-bot
27
+ npx ghostindex-bot "https://example.com/expired-page" expired 85 72 90 65 78
28
+ ```
29
+
30
+ ### Python
31
+
32
+ ```bash
33
+ pip install bhmarketer-ghostindex-bot
34
+ python -m bot "https://example.com/expired-page" expired 85 72 90 65 78
35
+ ```
36
+
37
+ ## Output
38
+
39
+ ```
40
+ URL: https://example.com/expired-page
41
+ Violation Type: Expired Content
42
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
43
+ Expired Content Score: 85 / 100 [Excellent]
44
+ Misinformation Score: 72 / 100 [Healthy]
45
+ Policy Violation Score: 90 / 100 [Excellent]
46
+ Violent Content Score: 65 / 100 [Healthy]
47
+ Compliance Risk Score: 78 / 100 [Healthy]
48
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
49
+ Deindex Priority Score: 78 / 100
50
+ Deindex Recommendation: HIGH — Submit removal request immediately
51
+ Estimated Processing: 1-3 days
52
+ ```
53
+
54
+ ## Project Structure
55
+
56
+ ```
57
+ bhmarketer-ghostindex-bot/
58
+ ├── index.ts # TypeScript bot
59
+ ├── bot.py # Python bot
60
+ ├── package.json # NPM package config
61
+ ├── package-lock.json # NPM lock file
62
+ ├── tsconfig.json # TypeScript config
63
+ ├── schema.json # JSON-LD structured data
64
+ ├── zenodo.json # Zenodo metadata
65
+ ├── heartbeat.txt # Auto-updated daily
66
+ ├── mkdocs.yml # ReadTheDocs config
67
+ ├── .readthedocs.yaml # ReadTheDocs build config
68
+ ├── docs/
69
+ │ ├── index.md # Documentation
70
+ │ └── requirements.txt
71
+ ├── dataset/
72
+ │ └── ghostindex_benchmarks.csv
73
+ ├── kaggle/
74
+ │ └── notebook.ipynb
75
+ ├── .github/workflows/
76
+ │ ├── heartbeat.yml # Auto-commit daily
77
+ │ └── npm-publish.yml # Auto-publish to NPM
78
+ ├── README.md
79
+ └── LICENSE
80
+ ```
81
+
82
+ ## Violation Types
83
+
84
+ | Type | Description | Priority |
85
+ |------|-------------|----------|
86
+ | expired | Outdated or expired content | High |
87
+ | misinformation | False or misleading information | Critical |
88
+ | policy | Search engine policy violation | High |
89
+ | violent | Violent or harmful content | Critical |
90
+ | compliance | General compliance risk | Medium |
91
+
92
+ ## Detection Signal Scores
93
+
94
+ | Signal | Description | Score Range |
95
+ |--------|-------------|-------------|
96
+ | Expired Content | Page age and relevance decay | 0–100 |
97
+ | Misinformation | Factual accuracy and claim verification | 0–100 |
98
+ | Policy Violation | Search engine guideline compliance | 0–100 |
99
+ | Violent Content | Harmful or abusive content detection | 0–100 |
100
+ | Compliance Risk | Overall compliance risk assessment | 0–100 |
101
+
102
+ ## Deindex Priority
103
+
104
+ | Score | Recommendation | Action |
105
+ |-------|---------------|--------|
106
+ | 0–30 | LOW | Monitor only |
107
+ | 31–60 | MEDIUM | Review and optimize |
108
+ | 61–80 | HIGH | Submit removal request |
109
+ | 81–100 | CRITICAL | Immediate removal required |
110
+
111
+ ## Keywords
112
+
113
+ GhostIndex Bot · Deindex Detection · Content Removal · Policy Violation · Expired Content · Misinformation Detection · Search Quality · BHMarketer · AI Visibility · Compliance Risk
114
+
115
+ ## Links
116
+
117
+ | Platform | URL |
118
+ |----------|-----|
119
+ | Website | https://bhmarketer.ai |
120
+ | Takedown Service | https://bhmarketer.ai/takedown-bad-search-results/ |
121
+ | GitHub | https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot |
122
+ | GitHub Pages | https://bhmarketer-ai.github.io/bhmarketer-ghostindex-bot/ |
123
+ | NPM | https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot |
124
+ | Hugging Face | https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks |
125
+ | Kaggle | https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks |
126
+ | Zenodo | https://zenodo.org/records/20733022 |
127
+ | Docs | https://bhmarketer-ghostindex-bot.readthedocs.io |
128
+
129
+ ## About BHMarketer.ai
130
+
131
+ BHMarketer.ai is an AI-powered online reputation management platform specializing in deindexing, content removal, search quality protection, and brand reputation defense.
132
+
133
+ ## License
134
+
135
+ MIT — [BHMarketer.ai](https://bhmarketer.ai)
package/bot.py ADDED
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ BHMarketer GhostIndex Bot
4
+ AI-powered bot that detects pages requiring deindexing due to expired content,
5
+ misinformation, policy violations, violent content, and other compliance risks.
6
+ https://bhmarketer.ai | https://bhmarketer.ai/takedown-bad-search-results/
7
+ """
8
+
9
+ import sys
10
+
11
+
12
+ def get_status(score: int) -> str:
13
+ if score <= 30:
14
+ return "Low Risk"
15
+ elif score <= 60:
16
+ return "Medium Risk"
17
+ elif score <= 80:
18
+ return "High Risk"
19
+ return "Critical Risk"
20
+
21
+
22
+ def get_deindex_recommendation(score: int) -> str:
23
+ if score <= 30:
24
+ return "LOW — Monitor only"
25
+ elif score <= 60:
26
+ return "MEDIUM — Review and optimize"
27
+ elif score <= 80:
28
+ return "HIGH — Submit removal request immediately"
29
+ return "CRITICAL — Immediate removal required"
30
+
31
+
32
+ def get_processing_time(violation_type: str) -> str:
33
+ times = {
34
+ "expired": "3-7 days",
35
+ "misinformation": "1-5 days",
36
+ "policy": "2-5 days",
37
+ "violent": "1-3 days",
38
+ "compliance": "3-7 days",
39
+ }
40
+ return times.get(violation_type, "3-7 days")
41
+
42
+
43
+ def detect_deindex(
44
+ url: str,
45
+ violation_type: str = "expired",
46
+ expired_content: int = 85,
47
+ misinformation: int = 72,
48
+ policy_violation: int = 90,
49
+ violent_content: int = 65,
50
+ compliance_risk: int = 78,
51
+ ) -> dict:
52
+ """
53
+ Detect pages requiring deindexing and score compliance risks.
54
+
55
+ Args:
56
+ url: Target URL to analyze
57
+ violation_type: Type of violation — expired, misinformation, policy, violent, compliance
58
+ expired_content: Expired content score (0-100)
59
+ misinformation: Misinformation score (0-100)
60
+ policy_violation: Policy violation score (0-100)
61
+ violent_content: Violent content score (0-100)
62
+ compliance_risk: Compliance risk score (0-100)
63
+
64
+ Returns:
65
+ dict with individual signal scores, deindex priority, and recommendation
66
+ """
67
+ scores = [expired_content, misinformation, policy_violation, violent_content, compliance_risk]
68
+ deindex_priority_score = round(sum(scores) / len(scores))
69
+
70
+ return {
71
+ "url": url,
72
+ "violation_type": violation_type.capitalize() + " Content",
73
+ "expired_content_score": expired_content,
74
+ "misinformation_score": misinformation,
75
+ "policy_violation_score": policy_violation,
76
+ "violent_content_score": violent_content,
77
+ "compliance_risk_score": compliance_risk,
78
+ "deindex_priority_score": deindex_priority_score,
79
+ "deindex_recommendation": get_deindex_recommendation(deindex_priority_score),
80
+ "estimated_processing": get_processing_time(violation_type),
81
+ }
82
+
83
+
84
+ if __name__ == "__main__":
85
+ url = sys.argv[1] if len(sys.argv) > 1 else "https://example.com/page"
86
+ violation_type = sys.argv[2] if len(sys.argv) > 2 else "expired"
87
+ expired_content = int(sys.argv[3]) if len(sys.argv) > 3 else 85
88
+ misinformation = int(sys.argv[4]) if len(sys.argv) > 4 else 72
89
+ policy_violation = int(sys.argv[5]) if len(sys.argv) > 5 else 90
90
+ violent_content = int(sys.argv[6]) if len(sys.argv) > 6 else 65
91
+ compliance_risk = int(sys.argv[7]) if len(sys.argv) > 7 else 78
92
+
93
+ result = detect_deindex(
94
+ url, violation_type, expired_content,
95
+ misinformation, policy_violation, violent_content, compliance_risk
96
+ )
97
+
98
+ print(f"URL: {result['url']}")
99
+ print(f"Violation Type: {result['violation_type']}")
100
+ print("=" * 45)
101
+ print(f"Expired Content Score: {result['expired_content_score']}/100 [{get_status(result['expired_content_score'])}]")
102
+ print(f"Misinformation Score: {result['misinformation_score']}/100 [{get_status(result['misinformation_score'])}]")
103
+ print(f"Policy Violation Score: {result['policy_violation_score']}/100 [{get_status(result['policy_violation_score'])}]")
104
+ print(f"Violent Content Score: {result['violent_content_score']}/100 [{get_status(result['violent_content_score'])}]")
105
+ print(f"Compliance Risk Score: {result['compliance_risk_score']}/100 [{get_status(result['compliance_risk_score'])}]")
106
+ print("=" * 45)
107
+ print(f"Deindex Priority Score: {result['deindex_priority_score']}/100")
108
+ print(f"Deindex Recommendation: {result['deindex_recommendation']}")
109
+ print(f"Estimated Processing: {result['estimated_processing']}")
@@ -0,0 +1,21 @@
1
+ id,url,violation_type,expired_content_score,misinformation_score,policy_violation_score,violent_content_score,compliance_risk_score,deindex_priority_score,deindex_recommendation,estimated_processing,industry,notes
2
+ 1,https://example.com/old-blog-2019,expired,85,40,60,20,55,52,MEDIUM — Review and optimize,3-7 days,Blog,Outdated content no longer relevant
3
+ 2,https://example.com/false-claims,misinformation,30,92,75,25,80,60,MEDIUM — Review and optimize,1-5 days,News,Contains factual inaccuracies
4
+ 3,https://example.com/spam-page,policy,45,55,88,30,75,59,MEDIUM — Review and optimize,2-5 days,SEO,Keyword stuffing violation
5
+ 4,https://example.com/violent-content,violent,25,35,70,95,85,62,HIGH — Submit removal request immediately,1-3 days,Social,Violent imagery detected
6
+ 5,https://example.com/expired-product,expired,90,30,50,15,45,46,MEDIUM — Review and optimize,3-7 days,Ecommerce,Discontinued product page
7
+ 6,https://example.com/fake-news,misinformation,35,95,80,20,85,63,HIGH — Submit removal request immediately,1-5 days,Media,Fabricated news article
8
+ 7,https://example.com/cloaked-page,policy,40,50,92,25,80,57,MEDIUM — Review and optimize,2-5 days,SEO,Cloaking violation detected
9
+ 8,https://example.com/hate-speech,violent,20,60,85,98,90,71,HIGH — Submit removal request immediately,1-3 days,Forum,Hate speech content
10
+ 9,https://example.com/outdated-guide,expired,88,25,45,10,40,42,MEDIUM — Review and optimize,3-7 days,Education,Guide no longer accurate
11
+ 10,https://example.com/conspiracy,misinformation,30,98,78,35,88,66,HIGH — Submit removal request immediately,1-5 days,Blog,Conspiracy theory content
12
+ 11,https://example.com/link-farm,policy,35,40,95,20,82,54,MEDIUM — Review and optimize,2-5 days,SEO,Unnatural link scheme
13
+ 12,https://example.com/abuse-content,violent,15,45,80,96,88,65,HIGH — Submit removal request immediately,1-3 days,Social,Abusive content detected
14
+ 13,https://example.com/old-news-2018,expired,92,30,48,12,42,45,MEDIUM — Review and optimize,3-7 days,News,Outdated news article
15
+ 14,https://example.com/medical-misinformation,misinformation,25,96,82,30,90,65,HIGH — Submit removal request immediately,1-5 days,Health,Dangerous medical misinformation
16
+ 15,https://example.com/doorway-page,policy,38,42,90,18,78,53,MEDIUM — Review and optimize,2-5 days,SEO,Doorway page violation
17
+ 16,https://example.com/graphic-violence,violent,18,38,75,99,88,64,HIGH — Submit removal request immediately,1-3 days,Media,Graphic violent content
18
+ 17,https://example.com/expired-event,expired,95,22,42,8,38,41,LOW — Monitor only,3-7 days,Events,Past event page
19
+ 18,https://example.com/scam-claims,misinformation,28,94,85,22,88,63,HIGH — Submit removal request immediately,1-5 days,Finance,Fraudulent financial claims
20
+ 19,https://example.com/thin-content,policy,42,35,88,15,72,50,MEDIUM — Review and optimize,2-5 days,Blog,Thin content violation
21
+ 20,https://example.com/terrorist-content,violent,10,50,92,100,95,69,HIGH — Submit removal request immediately,1-3 days,Security,Terrorist content detected
package/docs/index.md ADDED
@@ -0,0 +1,106 @@
1
+ # BHMarketer GhostIndex Bot — Documentation
2
+
3
+ **Version:** 1.0.0
4
+ **Author:** BHMarketer.ai powered by BHMarketer
5
+ **Repository:** https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot
6
+ **Website:** https://bhmarketer.ai | https://bhmarketer.ai/takedown-bad-search-results/
7
+
8
+ ---
9
+
10
+ ## Overview
11
+
12
+ BHMarketer GhostIndex Bot is an AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks — protecting search quality and website integrity.
13
+
14
+ ---
15
+
16
+ ## Key Capabilities
17
+
18
+ - **Expired Content Detection** — identifies outdated pages no longer serving users
19
+ - **Misinformation Scoring** — flags pages with factual inaccuracies and false claims
20
+ - **Policy Violation Detection** — detects pages breaching search engine guidelines
21
+ - **Violent Content Flagging** — identifies harmful, violent, or abusive content
22
+ - **Compliance Risk Assessment** — evaluates overall compliance risk per URL
23
+
24
+ ---
25
+
26
+ ## Installation
27
+
28
+ ### Node.js
29
+ ```bash
30
+ npm install @bhmarketer-ai/ghostindex-bot
31
+ ```
32
+
33
+ ### Python
34
+ ```bash
35
+ pip install bhmarketer-ghostindex-bot
36
+ ```
37
+
38
+ ---
39
+
40
+ ## Usage
41
+
42
+ ### Node.js CLI
43
+ ```bash
44
+ npx ghostindex-bot "https://example.com/expired-page" expired 85 72 90 65 78
45
+ ```
46
+
47
+ ### Python CLI
48
+ ```bash
49
+ python -m bot "https://example.com/expired-page" expired 85 72 90 65 78
50
+ ```
51
+
52
+ ---
53
+
54
+ ## Violation Types
55
+
56
+ | Type | Description | Processing Time |
57
+ |------|-------------|-----------------|
58
+ | expired | Outdated or expired content | 3-7 days |
59
+ | misinformation | False or misleading information | 1-5 days |
60
+ | policy | Search engine policy violation | 2-5 days |
61
+ | violent | Violent or harmful content | 1-3 days |
62
+ | compliance | General compliance risk | 3-7 days |
63
+
64
+ ---
65
+
66
+ ## Detection Signal Scores
67
+
68
+ | Signal | Description | Score Range |
69
+ |--------|-------------|-------------|
70
+ | Expired Content | Page age and relevance decay | 0–100 |
71
+ | Misinformation | Factual accuracy and claim verification | 0–100 |
72
+ | Policy Violation | Search engine guideline compliance | 0–100 |
73
+ | Violent Content | Harmful or abusive content detection | 0–100 |
74
+ | Compliance Risk | Overall compliance risk assessment | 0–100 |
75
+
76
+ ---
77
+
78
+ ## Deindex Priority
79
+
80
+ | Score | Recommendation | Action |
81
+ |-------|---------------|--------|
82
+ | 0–30 | LOW | Monitor only |
83
+ | 31–60 | MEDIUM | Review and optimize |
84
+ | 61–80 | HIGH | Submit removal request |
85
+ | 81–100 | CRITICAL | Immediate removal required |
86
+
87
+ ---
88
+
89
+ ## About BHMarketer.ai
90
+
91
+ BHMarketer.ai is an AI-powered online reputation management platform specializing in deindexing, content removal, search quality protection, and brand reputation defense.
92
+
93
+ | Platform | URL |
94
+ |----------|-----|
95
+ | Website | https://bhmarketer.ai |
96
+ | Takedown Service | https://bhmarketer.ai/takedown-bad-search-results/ |
97
+ | GitHub | https://github.com/bhmarketer-ai |
98
+ | NPM | https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot |
99
+ | Hugging Face | https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks |
100
+ | Kaggle | https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks |
101
+
102
+ ---
103
+
104
+ ## License
105
+
106
+ MIT — [BHMarketer.ai](https://bhmarketer.ai)
@@ -0,0 +1,2 @@
1
+ mkdocs>=1.5.0
2
+ mkdocs-material>=9.0.0
package/heartbeat.txt ADDED
@@ -0,0 +1 @@
1
+ heartbeat: 2026-06-17T12:35:37Z
package/index.js ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env node
2
+
3
+ interface GhostIndexInput {
4
+ url: string;
5
+ violationType: "expired" | "misinformation" | "policy" | "violent" | "compliance";
6
+ expiredContent: number;
7
+ misinformation: number;
8
+ policyViolation: number;
9
+ violentContent: number;
10
+ complianceRisk: number;
11
+ }
12
+
13
+ interface GhostIndexOutput {
14
+ url: string;
15
+ violationType: string;
16
+ expiredContentScore: number;
17
+ misinformationScore: number;
18
+ policyViolationScore: number;
19
+ violentContentScore: number;
20
+ complianceRiskScore: number;
21
+ deindexPriorityScore: number;
22
+ deindexRecommendation: string;
23
+ estimatedProcessing: string;
24
+ }
25
+
26
+ function getStatus(score: number): string {
27
+ if (score <= 30) return "Low Risk";
28
+ if (score <= 60) return "Medium Risk";
29
+ if (score <= 80) return "High Risk";
30
+ return "Critical Risk";
31
+ }
32
+
33
+ function getDeindexRecommendation(score: number): string {
34
+ if (score <= 30) return "LOW — Monitor only";
35
+ if (score <= 60) return "MEDIUM — Review and optimize";
36
+ if (score <= 80) return "HIGH — Submit removal request immediately";
37
+ return "CRITICAL — Immediate removal required";
38
+ }
39
+
40
+ function getProcessingTime(violationType: string): string {
41
+ const times: Record<string, string> = {
42
+ expired: "3-7 days",
43
+ misinformation: "1-5 days",
44
+ policy: "2-5 days",
45
+ violent: "1-3 days",
46
+ compliance: "3-7 days",
47
+ };
48
+ return times[violationType] ?? "3-7 days";
49
+ }
50
+
51
+ export function detectDeindex(input: GhostIndexInput): GhostIndexOutput {
52
+ const scores = [
53
+ input.expiredContent,
54
+ input.misinformation,
55
+ input.policyViolation,
56
+ input.violentContent,
57
+ input.complianceRisk,
58
+ ];
59
+ const deindexPriorityScore = Math.round(
60
+ scores.reduce((a, b) => a + b, 0) / scores.length
61
+ );
62
+
63
+ return {
64
+ url: input.url,
65
+ violationType: input.violationType.charAt(0).toUpperCase() + input.violationType.slice(1),
66
+ expiredContentScore: input.expiredContent,
67
+ misinformationScore: input.misinformation,
68
+ policyViolationScore: input.policyViolation,
69
+ violentContentScore: input.violentContent,
70
+ complianceRiskScore: input.complianceRisk,
71
+ deindexPriorityScore,
72
+ deindexRecommendation: getDeindexRecommendation(deindexPriorityScore),
73
+ estimatedProcessing: getProcessingTime(input.violationType),
74
+ };
75
+ }
76
+
77
+ const args = process.argv.slice(2);
78
+ const url = args[0] || "https://example.com/page";
79
+ const violationType = (args[1] as GhostIndexInput["violationType"]) || "expired";
80
+ const expiredContent = parseInt(args[2]) || 85;
81
+ const misinformation = parseInt(args[3]) || 72;
82
+ const policyViolation = parseInt(args[4]) || 90;
83
+ const violentContent = parseInt(args[5]) || 65;
84
+ const complianceRisk = parseInt(args[6]) || 78;
85
+
86
+ const result = detectDeindex({
87
+ url, violationType, expiredContent,
88
+ misinformation, policyViolation, violentContent, complianceRisk,
89
+ });
90
+
91
+ console.log(`URL: ${result.url}`);
92
+ console.log(`Violation Type: ${result.violationType} Content`);
93
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
94
+ console.log(`Expired Content Score: ${result.expiredContentScore}/100 [${getStatus(result.expiredContentScore)}]`);
95
+ console.log(`Misinformation Score: ${result.misinformationScore}/100 [${getStatus(result.misinformationScore)}]`);
96
+ console.log(`Policy Violation Score: ${result.policyViolationScore}/100 [${getStatus(result.policyViolationScore)}]`);
97
+ console.log(`Violent Content Score: ${result.violentContentScore}/100 [${getStatus(result.violentContentScore)}]`);
98
+ console.log(`Compliance Risk Score: ${result.complianceRiskScore}/100 [${getStatus(result.complianceRiskScore)}]`);
99
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
100
+ console.log(`Deindex Priority Score: ${result.deindexPriorityScore}/100`);
101
+ console.log(`Deindex Recommendation: ${result.deindexRecommendation}`);
102
+ console.log(`Estimated Processing: ${result.estimatedProcessing}`);
package/index.ts ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env node
2
+
3
+ interface GhostIndexInput {
4
+ url: string;
5
+ violationType: "expired" | "misinformation" | "policy" | "violent" | "compliance";
6
+ expiredContent: number;
7
+ misinformation: number;
8
+ policyViolation: number;
9
+ violentContent: number;
10
+ complianceRisk: number;
11
+ }
12
+
13
+ interface GhostIndexOutput {
14
+ url: string;
15
+ violationType: string;
16
+ expiredContentScore: number;
17
+ misinformationScore: number;
18
+ policyViolationScore: number;
19
+ violentContentScore: number;
20
+ complianceRiskScore: number;
21
+ deindexPriorityScore: number;
22
+ deindexRecommendation: string;
23
+ estimatedProcessing: string;
24
+ }
25
+
26
+ function getStatus(score: number): string {
27
+ if (score <= 30) return "Low Risk";
28
+ if (score <= 60) return "Medium Risk";
29
+ if (score <= 80) return "High Risk";
30
+ return "Critical Risk";
31
+ }
32
+
33
+ function getDeindexRecommendation(score: number): string {
34
+ if (score <= 30) return "LOW — Monitor only";
35
+ if (score <= 60) return "MEDIUM — Review and optimize";
36
+ if (score <= 80) return "HIGH — Submit removal request immediately";
37
+ return "CRITICAL — Immediate removal required";
38
+ }
39
+
40
+ function getProcessingTime(violationType: string): string {
41
+ const times: Record<string, string> = {
42
+ expired: "3-7 days",
43
+ misinformation: "1-5 days",
44
+ policy: "2-5 days",
45
+ violent: "1-3 days",
46
+ compliance: "3-7 days",
47
+ };
48
+ return times[violationType] ?? "3-7 days";
49
+ }
50
+
51
+ export function detectDeindex(input: GhostIndexInput): GhostIndexOutput {
52
+ const scores = [
53
+ input.expiredContent,
54
+ input.misinformation,
55
+ input.policyViolation,
56
+ input.violentContent,
57
+ input.complianceRisk,
58
+ ];
59
+ const deindexPriorityScore = Math.round(
60
+ scores.reduce((a, b) => a + b, 0) / scores.length
61
+ );
62
+
63
+ return {
64
+ url: input.url,
65
+ violationType: input.violationType.charAt(0).toUpperCase() + input.violationType.slice(1),
66
+ expiredContentScore: input.expiredContent,
67
+ misinformationScore: input.misinformation,
68
+ policyViolationScore: input.policyViolation,
69
+ violentContentScore: input.violentContent,
70
+ complianceRiskScore: input.complianceRisk,
71
+ deindexPriorityScore,
72
+ deindexRecommendation: getDeindexRecommendation(deindexPriorityScore),
73
+ estimatedProcessing: getProcessingTime(input.violationType),
74
+ };
75
+ }
76
+
77
+ const args = process.argv.slice(2);
78
+ const url = args[0] || "https://example.com/page";
79
+ const violationType = (args[1] as GhostIndexInput["violationType"]) || "expired";
80
+ const expiredContent = parseInt(args[2]) || 85;
81
+ const misinformation = parseInt(args[3]) || 72;
82
+ const policyViolation = parseInt(args[4]) || 90;
83
+ const violentContent = parseInt(args[5]) || 65;
84
+ const complianceRisk = parseInt(args[6]) || 78;
85
+
86
+ const result = detectDeindex({
87
+ url, violationType, expiredContent,
88
+ misinformation, policyViolation, violentContent, complianceRisk,
89
+ });
90
+
91
+ console.log(`URL: ${result.url}`);
92
+ console.log(`Violation Type: ${result.violationType} Content`);
93
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
94
+ console.log(`Expired Content Score: ${result.expiredContentScore}/100 [${getStatus(result.expiredContentScore)}]`);
95
+ console.log(`Misinformation Score: ${result.misinformationScore}/100 [${getStatus(result.misinformationScore)}]`);
96
+ console.log(`Policy Violation Score: ${result.policyViolationScore}/100 [${getStatus(result.policyViolationScore)}]`);
97
+ console.log(`Violent Content Score: ${result.violentContentScore}/100 [${getStatus(result.violentContentScore)}]`);
98
+ console.log(`Compliance Risk Score: ${result.complianceRiskScore}/100 [${getStatus(result.complianceRiskScore)}]`);
99
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
100
+ console.log(`Deindex Priority Score: ${result.deindexPriorityScore}/100`);
101
+ console.log(`Deindex Recommendation: ${result.deindexRecommendation}`);
102
+ console.log(`Estimated Processing: ${result.estimatedProcessing}`);
@@ -0,0 +1,41 @@
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 5,
4
+ "metadata": {
5
+ "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
6
+ "language_info": {"name": "python", "version": "3.10.0"}
7
+ },
8
+ "cells": [
9
+ {
10
+ "cell_type": "markdown",
11
+ "metadata": {},
12
+ "source": ["# BHMarketer GhostIndex Bot\n\nAI-powered bot detecting pages requiring deindexing.\n\n**Built by:** [BHMarketer.ai](https://bhmarketer.ai) powered by BHMarketer\n\n**GitHub:** https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot\n\n**Takedown:** https://bhmarketer.ai/takedown-bad-search-results/"]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": ["import pandas as pd\nimport matplotlib.pyplot as plt\n\ndf = pd.read_csv('../dataset/ghostindex_benchmarks.csv')\nprint('Dataset shape:', df.shape)\nprint(df.head(10))"]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": null,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": ["violation_counts = df['violation_type'].value_counts()\ncolors = ['#ef4444','#f97316','#8b5cf6','#3b82f6','#10b981']\nplt.figure(figsize=(8,5))\nplt.pie(violation_counts, labels=violation_counts.index, colors=colors, autopct='%1.0f%%')\nplt.title('BHMarketer GhostIndex - Violation Type Distribution', fontsize=14, fontweight='bold')\nplt.tight_layout()\nplt.show()"]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": ["signal_cols = ['expired_content_score','misinformation_score','policy_violation_score','violent_content_score','compliance_risk_score']\nsignal_avgs = df[signal_cols].mean()\nplt.figure(figsize=(12,5))\nplt.bar(signal_avgs.index, signal_avgs.values, color=colors)\nplt.title('BHMarketer GhostIndex - Average Detection Signal Scores', fontsize=14, fontweight='bold')\nplt.xlabel('Detection Signal')\nplt.ylabel('Average Score (0-100)')\nplt.xticks(rotation=20)\nplt.axhline(y=60, color='red', linestyle='--', alpha=0.5, label='High Risk threshold')\nplt.legend()\nplt.tight_layout()\nplt.show()"]
34
+ },
35
+ {
36
+ "cell_type": "markdown",
37
+ "metadata": {},
38
+ "source": ["## About BHMarketer.ai\n\nBHMarketer.ai specializes in deindexing, content removal, and search quality protection.\n\n- Website: https://bhmarketer.ai\n- Takedown: https://bhmarketer.ai/takedown-bad-search-results/\n\n**License:** MIT"]
39
+ }
40
+ ]
41
+ }
package/mkdocs.yml ADDED
@@ -0,0 +1,21 @@
1
+ site_name: BHMarketer GhostIndex Bot
2
+ site_url: https://bhmarketer-ghostindex-bot.readthedocs.io
3
+ site_description: AI-powered bot detecting pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and compliance risks.
4
+ site_author: BHMarketer.ai
5
+
6
+ repo_name: bhmarketer-ai/bhmarketer-ghostindex-bot
7
+ repo_url: https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot
8
+
9
+ theme:
10
+ name: material
11
+ palette:
12
+ primary: blue
13
+ accent: orange
14
+
15
+ nav:
16
+ - Home: index.md
17
+
18
+ markdown_extensions:
19
+ - tables
20
+ - toc:
21
+ permalink: true
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@bhmarketer-ai/ghostindex-bot",
3
+ "version": "1.0.0",
4
+ "description": "AI-powered bot detecting pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and compliance risks. Powered by BHMarketer.ai",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "ghostindex-bot": "./index.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "start": "node index.js"
12
+ },
13
+ "keywords": [
14
+ "ghostindex-bot",
15
+ "deindex-detection",
16
+ "content-removal",
17
+ "policy-violation",
18
+ "expired-content",
19
+ "misinformation-detection",
20
+ "search-quality",
21
+ "bhmarketer",
22
+ "ai-visibility",
23
+ "compliance-risk",
24
+ "violent-content",
25
+ "search-integrity"
26
+ ],
27
+ "author": "BHMarketer.ai powered by BHMarketer",
28
+ "license": "MIT",
29
+ "homepage": "https://bhmarketer.ai/takedown-bad-search-results/",
30
+ "repository": {
31
+ "type": "git",
32
+ "url": "https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot"
33
+ },
34
+ "devDependencies": {
35
+ "typescript": "^5.0.0"
36
+ }
37
+ }
package/schema.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "@context": "https://schema.org",
3
+ "@type": "SoftwareApplication",
4
+ "name": "BHMarketer GhostIndex Bot",
5
+ "description": "AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks.",
6
+ "url": "https://bhmarketer.ai/takedown-bad-search-results/",
7
+ "applicationCategory": "BusinessApplication",
8
+ "operatingSystem": "Any",
9
+ "offers": {
10
+ "@type": "Offer",
11
+ "price": "0",
12
+ "priceCurrency": "USD"
13
+ },
14
+ "author": {
15
+ "@type": "Organization",
16
+ "name": "BHMarketer.ai powered by BHMarketer",
17
+ "url": "https://bhmarketer.ai",
18
+ "sameAs": [
19
+ "https://github.com/bhmarketer-ai",
20
+ "https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot",
21
+ "https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks",
22
+ "https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks",
23
+ "https://bhmarketer-ai.github.io/bhmarketer-ghostindex-bot/",
24
+ "https://zenodo.org/records/20733022",
25
+ "https://bhmarketer.ai/takedown-bad-search-results/"
26
+ ]
27
+ },
28
+ "programmingLanguage": ["TypeScript", "Python"],
29
+ "codeRepository": "https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot",
30
+ "keywords": [
31
+ "GhostIndex Bot",
32
+ "deindex detection",
33
+ "content removal",
34
+ "policy violation",
35
+ "expired content",
36
+ "misinformation detection",
37
+ "search quality",
38
+ "BHMarketer",
39
+ "compliance risk",
40
+ "violent content"
41
+ ]
42
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,18 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "commonjs",
5
+ "lib": ["ES2020"],
6
+ "outDir": "./dist",
7
+ "rootDir": "./",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true,
12
+ "declaration": true,
13
+ "declarationMap": true,
14
+ "sourceMap": true
15
+ },
16
+ "include": ["index.ts"],
17
+ "exclude": ["node_modules", "dist"]
18
+ }
package/zenodo.json ADDED
@@ -0,0 +1,64 @@
1
+ {
2
+ "title": "BHMarketer GhostIndex Bot",
3
+ "description": "AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks — protecting search quality and website integrity. Built by BHMarketer.ai.",
4
+ "creators": [
5
+ {
6
+ "name": "BHMarketer.ai",
7
+ "affiliation": "BHMarketer"
8
+ }
9
+ ],
10
+ "keywords": [
11
+ "GhostIndex Bot",
12
+ "deindex detection",
13
+ "content removal",
14
+ "policy violation",
15
+ "expired content",
16
+ "misinformation detection",
17
+ "search quality",
18
+ "BHMarketer",
19
+ "compliance risk",
20
+ "violent content",
21
+ "search integrity"
22
+ ],
23
+ "license": "MIT",
24
+ "upload_type": "software",
25
+ "access_right": "open",
26
+ "doi": "10.5281/zenodo.20733022",
27
+ "related_identifiers": [
28
+ {
29
+ "identifier": "https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot",
30
+ "relation": "isSupplementTo",
31
+ "scheme": "url"
32
+ },
33
+ {
34
+ "identifier": "https://bhmarketer.ai",
35
+ "relation": "isDocumentedBy",
36
+ "scheme": "url"
37
+ },
38
+ {
39
+ "identifier": "https://bhmarketer.ai/takedown-bad-search-results/",
40
+ "relation": "isDocumentedBy",
41
+ "scheme": "url"
42
+ },
43
+ {
44
+ "identifier": "https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot",
45
+ "relation": "isSupplementTo",
46
+ "scheme": "url"
47
+ },
48
+ {
49
+ "identifier": "https://zenodo.org/records/20733022",
50
+ "relation": "isIdenticalTo",
51
+ "scheme": "url"
52
+ },
53
+ {
54
+ "identifier": "https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks",
55
+ "relation": "isSupplementTo",
56
+ "scheme": "url"
57
+ },
58
+ {
59
+ "identifier": "https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks",
60
+ "relation": "isSupplementTo",
61
+ "scheme": "url"
62
+ }
63
+ ]
64
+ }