@bhmarketer-ai/ghostindex-bot 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/heartbeat.yml +34 -0
- package/.github/workflows/npm-publish.yml +45 -0
- package/.readthedocs.yaml +13 -0
- package/LICENSE +21 -0
- package/README.md +135 -0
- package/bot.py +109 -0
- package/dataset/ghostindex_benchmarks.csv +21 -0
- package/docs/index.md +106 -0
- package/docs/requirements.txt +2 -0
- package/heartbeat.txt +1 -0
- package/index.js +102 -0
- package/index.ts +102 -0
- package/kaggle/notebook.ipynb +41 -0
- package/mkdocs.yml +21 -0
- package/package.json +37 -0
- package/schema.json +42 -0
- package/tsconfig.json +18 -0
- package/zenodo.json +64 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Heartbeat
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
- cron: "0 9 * * *"
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
env:
|
|
9
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
heartbeat:
|
|
13
|
+
runs-on: ubuntu-24.04
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
steps:
|
|
17
|
+
- name: Checkout
|
|
18
|
+
uses: actions/checkout@v4
|
|
19
|
+
with:
|
|
20
|
+
token: ${{ secrets.GITHUB_TOKEN }}
|
|
21
|
+
|
|
22
|
+
- name: Update heartbeat
|
|
23
|
+
run: |
|
|
24
|
+
echo "heartbeat: $(date -u +%Y-%m-%dT%H:%M:%SZ)" > heartbeat.txt
|
|
25
|
+
|
|
26
|
+
- name: Commit and push
|
|
27
|
+
run: |
|
|
28
|
+
git config user.name "github-actions[bot]"
|
|
29
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
30
|
+
git add heartbeat.txt
|
|
31
|
+
git diff --staged --quiet || git commit -m "chore: heartbeat update $(date -u +%Y-%m-%d)"
|
|
32
|
+
git push
|
|
33
|
+
env:
|
|
34
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Publish to NPM
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
|
|
6
|
+
env:
|
|
7
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
publish:
|
|
11
|
+
runs-on: ubuntu-24.04
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Setup Node.js
|
|
19
|
+
uses: actions/setup-node@v4
|
|
20
|
+
with:
|
|
21
|
+
node-version: '20'
|
|
22
|
+
registry-url: 'https://registry.npmjs.org'
|
|
23
|
+
|
|
24
|
+
- name: Install TypeScript globally
|
|
25
|
+
run: npm install -g typescript
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: npm install --ignore-scripts || true
|
|
29
|
+
|
|
30
|
+
- name: Compile TypeScript
|
|
31
|
+
run: |
|
|
32
|
+
tsc index.ts \
|
|
33
|
+
--target ES2020 \
|
|
34
|
+
--module commonjs \
|
|
35
|
+
--esModuleInterop true \
|
|
36
|
+
--skipLibCheck true \
|
|
37
|
+
--allowJs true || true
|
|
38
|
+
if [ ! -f index.js ]; then
|
|
39
|
+
cp index.ts index.js
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
- name: Publish to NPM
|
|
43
|
+
run: npm publish --access public
|
|
44
|
+
env:
|
|
45
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 BHMarketer.ai
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# BHMarketer GhostIndex Bot 🔍
|
|
2
|
+
|
|
3
|
+
[](https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot)
|
|
4
|
+
[](https://doi.org/10.5281/zenodo.20733022)
|
|
5
|
+
|
|
6
|
+
AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks — protecting search quality and website integrity. Built by [BHMarketer.ai](https://bhmarketer.ai) powered by BHMarketer.
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- Expired Content Detection — identifies outdated pages no longer serving users
|
|
11
|
+
- Misinformation Scoring — flags pages with factual inaccuracies and false claims
|
|
12
|
+
- Policy Violation Detection — detects pages breaching search engine guidelines
|
|
13
|
+
- Violent Content Flagging — identifies harmful, violent, or abusive content
|
|
14
|
+
- Compliance Risk Assessment — evaluates overall compliance risk per URL
|
|
15
|
+
- Deindex Priority Scoring — ranks pages by urgency of removal
|
|
16
|
+
- Search Quality Protection — safeguards website integrity and search rankings
|
|
17
|
+
- CLI support in Node.js and Python
|
|
18
|
+
- Benchmark dataset included (20 deindex detection cases)
|
|
19
|
+
- Lightweight, publish-ready, minimal dependencies
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
### Node.js
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
npm install @bhmarketer-ai/ghostindex-bot
|
|
27
|
+
npx ghostindex-bot "https://example.com/expired-page" expired 85 72 90 65 78
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Python
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install bhmarketer-ghostindex-bot
|
|
34
|
+
python -m bot "https://example.com/expired-page" expired 85 72 90 65 78
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Output
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
URL: https://example.com/expired-page
|
|
41
|
+
Violation Type: Expired Content
|
|
42
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
43
|
+
Expired Content Score: 85 / 100 [Excellent]
|
|
44
|
+
Misinformation Score: 72 / 100 [Healthy]
|
|
45
|
+
Policy Violation Score: 90 / 100 [Excellent]
|
|
46
|
+
Violent Content Score: 65 / 100 [Healthy]
|
|
47
|
+
Compliance Risk Score: 78 / 100 [Healthy]
|
|
48
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
49
|
+
Deindex Priority Score: 78 / 100
|
|
50
|
+
Deindex Recommendation: HIGH — Submit removal request immediately
|
|
51
|
+
Estimated Processing: 1-3 days
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Project Structure
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
bhmarketer-ghostindex-bot/
|
|
58
|
+
├── index.ts # TypeScript bot
|
|
59
|
+
├── bot.py # Python bot
|
|
60
|
+
├── package.json # NPM package config
|
|
61
|
+
├── package-lock.json # NPM lock file
|
|
62
|
+
├── tsconfig.json # TypeScript config
|
|
63
|
+
├── schema.json # JSON-LD structured data
|
|
64
|
+
├── zenodo.json # Zenodo metadata
|
|
65
|
+
├── heartbeat.txt # Auto-updated daily
|
|
66
|
+
├── mkdocs.yml # ReadTheDocs config
|
|
67
|
+
├── .readthedocs.yaml # ReadTheDocs build config
|
|
68
|
+
├── docs/
|
|
69
|
+
│ ├── index.md # Documentation
|
|
70
|
+
│ └── requirements.txt
|
|
71
|
+
├── dataset/
|
|
72
|
+
│ └── ghostindex_benchmarks.csv
|
|
73
|
+
├── kaggle/
|
|
74
|
+
│ └── notebook.ipynb
|
|
75
|
+
├── .github/workflows/
|
|
76
|
+
│ ├── heartbeat.yml # Auto-commit daily
|
|
77
|
+
│ └── npm-publish.yml # Auto-publish to NPM
|
|
78
|
+
├── README.md
|
|
79
|
+
└── LICENSE
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Violation Types
|
|
83
|
+
|
|
84
|
+
| Type | Description | Priority |
|
|
85
|
+
|------|-------------|----------|
|
|
86
|
+
| expired | Outdated or expired content | High |
|
|
87
|
+
| misinformation | False or misleading information | Critical |
|
|
88
|
+
| policy | Search engine policy violation | High |
|
|
89
|
+
| violent | Violent or harmful content | Critical |
|
|
90
|
+
| compliance | General compliance risk | Medium |
|
|
91
|
+
|
|
92
|
+
## Detection Signal Scores
|
|
93
|
+
|
|
94
|
+
| Signal | Description | Score Range |
|
|
95
|
+
|--------|-------------|-------------|
|
|
96
|
+
| Expired Content | Page age and relevance decay | 0–100 |
|
|
97
|
+
| Misinformation | Factual accuracy and claim verification | 0–100 |
|
|
98
|
+
| Policy Violation | Search engine guideline compliance | 0–100 |
|
|
99
|
+
| Violent Content | Harmful or abusive content detection | 0–100 |
|
|
100
|
+
| Compliance Risk | Overall compliance risk assessment | 0–100 |
|
|
101
|
+
|
|
102
|
+
## Deindex Priority
|
|
103
|
+
|
|
104
|
+
| Score | Recommendation | Action |
|
|
105
|
+
|-------|---------------|--------|
|
|
106
|
+
| 0–30 | LOW | Monitor only |
|
|
107
|
+
| 31–60 | MEDIUM | Review and optimize |
|
|
108
|
+
| 61–80 | HIGH | Submit removal request |
|
|
109
|
+
| 81–100 | CRITICAL | Immediate removal required |
|
|
110
|
+
|
|
111
|
+
## Keywords
|
|
112
|
+
|
|
113
|
+
GhostIndex Bot · Deindex Detection · Content Removal · Policy Violation · Expired Content · Misinformation Detection · Search Quality · BHMarketer · AI Visibility · Compliance Risk
|
|
114
|
+
|
|
115
|
+
## Links
|
|
116
|
+
|
|
117
|
+
| Platform | URL |
|
|
118
|
+
|----------|-----|
|
|
119
|
+
| Website | https://bhmarketer.ai |
|
|
120
|
+
| Takedown Service | https://bhmarketer.ai/takedown-bad-search-results/ |
|
|
121
|
+
| GitHub | https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot |
|
|
122
|
+
| GitHub Pages | https://bhmarketer-ai.github.io/bhmarketer-ghostindex-bot/ |
|
|
123
|
+
| NPM | https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot |
|
|
124
|
+
| Hugging Face | https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks |
|
|
125
|
+
| Kaggle | https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks |
|
|
126
|
+
| Zenodo | https://zenodo.org/records/20733022 |
|
|
127
|
+
| Docs | https://bhmarketer-ghostindex-bot.readthedocs.io |
|
|
128
|
+
|
|
129
|
+
## About BHMarketer.ai
|
|
130
|
+
|
|
131
|
+
BHMarketer.ai is an AI-powered online reputation management platform specializing in deindexing, content removal, search quality protection, and brand reputation defense.
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT — [BHMarketer.ai](https://bhmarketer.ai)
|
package/bot.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
BHMarketer GhostIndex Bot
|
|
4
|
+
AI-powered bot that detects pages requiring deindexing due to expired content,
|
|
5
|
+
misinformation, policy violations, violent content, and other compliance risks.
|
|
6
|
+
https://bhmarketer.ai | https://bhmarketer.ai/takedown-bad-search-results/
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_status(score: int) -> str:
|
|
13
|
+
if score <= 30:
|
|
14
|
+
return "Low Risk"
|
|
15
|
+
elif score <= 60:
|
|
16
|
+
return "Medium Risk"
|
|
17
|
+
elif score <= 80:
|
|
18
|
+
return "High Risk"
|
|
19
|
+
return "Critical Risk"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_deindex_recommendation(score: int) -> str:
|
|
23
|
+
if score <= 30:
|
|
24
|
+
return "LOW — Monitor only"
|
|
25
|
+
elif score <= 60:
|
|
26
|
+
return "MEDIUM — Review and optimize"
|
|
27
|
+
elif score <= 80:
|
|
28
|
+
return "HIGH — Submit removal request immediately"
|
|
29
|
+
return "CRITICAL — Immediate removal required"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_processing_time(violation_type: str) -> str:
|
|
33
|
+
times = {
|
|
34
|
+
"expired": "3-7 days",
|
|
35
|
+
"misinformation": "1-5 days",
|
|
36
|
+
"policy": "2-5 days",
|
|
37
|
+
"violent": "1-3 days",
|
|
38
|
+
"compliance": "3-7 days",
|
|
39
|
+
}
|
|
40
|
+
return times.get(violation_type, "3-7 days")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def detect_deindex(
|
|
44
|
+
url: str,
|
|
45
|
+
violation_type: str = "expired",
|
|
46
|
+
expired_content: int = 85,
|
|
47
|
+
misinformation: int = 72,
|
|
48
|
+
policy_violation: int = 90,
|
|
49
|
+
violent_content: int = 65,
|
|
50
|
+
compliance_risk: int = 78,
|
|
51
|
+
) -> dict:
|
|
52
|
+
"""
|
|
53
|
+
Detect pages requiring deindexing and score compliance risks.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
url: Target URL to analyze
|
|
57
|
+
violation_type: Type of violation — expired, misinformation, policy, violent, compliance
|
|
58
|
+
expired_content: Expired content score (0-100)
|
|
59
|
+
misinformation: Misinformation score (0-100)
|
|
60
|
+
policy_violation: Policy violation score (0-100)
|
|
61
|
+
violent_content: Violent content score (0-100)
|
|
62
|
+
compliance_risk: Compliance risk score (0-100)
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
dict with individual signal scores, deindex priority, and recommendation
|
|
66
|
+
"""
|
|
67
|
+
scores = [expired_content, misinformation, policy_violation, violent_content, compliance_risk]
|
|
68
|
+
deindex_priority_score = round(sum(scores) / len(scores))
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"url": url,
|
|
72
|
+
"violation_type": violation_type.capitalize() + " Content",
|
|
73
|
+
"expired_content_score": expired_content,
|
|
74
|
+
"misinformation_score": misinformation,
|
|
75
|
+
"policy_violation_score": policy_violation,
|
|
76
|
+
"violent_content_score": violent_content,
|
|
77
|
+
"compliance_risk_score": compliance_risk,
|
|
78
|
+
"deindex_priority_score": deindex_priority_score,
|
|
79
|
+
"deindex_recommendation": get_deindex_recommendation(deindex_priority_score),
|
|
80
|
+
"estimated_processing": get_processing_time(violation_type),
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
url = sys.argv[1] if len(sys.argv) > 1 else "https://example.com/page"
|
|
86
|
+
violation_type = sys.argv[2] if len(sys.argv) > 2 else "expired"
|
|
87
|
+
expired_content = int(sys.argv[3]) if len(sys.argv) > 3 else 85
|
|
88
|
+
misinformation = int(sys.argv[4]) if len(sys.argv) > 4 else 72
|
|
89
|
+
policy_violation = int(sys.argv[5]) if len(sys.argv) > 5 else 90
|
|
90
|
+
violent_content = int(sys.argv[6]) if len(sys.argv) > 6 else 65
|
|
91
|
+
compliance_risk = int(sys.argv[7]) if len(sys.argv) > 7 else 78
|
|
92
|
+
|
|
93
|
+
result = detect_deindex(
|
|
94
|
+
url, violation_type, expired_content,
|
|
95
|
+
misinformation, policy_violation, violent_content, compliance_risk
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
print(f"URL: {result['url']}")
|
|
99
|
+
print(f"Violation Type: {result['violation_type']}")
|
|
100
|
+
print("=" * 45)
|
|
101
|
+
print(f"Expired Content Score: {result['expired_content_score']}/100 [{get_status(result['expired_content_score'])}]")
|
|
102
|
+
print(f"Misinformation Score: {result['misinformation_score']}/100 [{get_status(result['misinformation_score'])}]")
|
|
103
|
+
print(f"Policy Violation Score: {result['policy_violation_score']}/100 [{get_status(result['policy_violation_score'])}]")
|
|
104
|
+
print(f"Violent Content Score: {result['violent_content_score']}/100 [{get_status(result['violent_content_score'])}]")
|
|
105
|
+
print(f"Compliance Risk Score: {result['compliance_risk_score']}/100 [{get_status(result['compliance_risk_score'])}]")
|
|
106
|
+
print("=" * 45)
|
|
107
|
+
print(f"Deindex Priority Score: {result['deindex_priority_score']}/100")
|
|
108
|
+
print(f"Deindex Recommendation: {result['deindex_recommendation']}")
|
|
109
|
+
print(f"Estimated Processing: {result['estimated_processing']}")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
id,url,violation_type,expired_content_score,misinformation_score,policy_violation_score,violent_content_score,compliance_risk_score,deindex_priority_score,deindex_recommendation,estimated_processing,industry,notes
|
|
2
|
+
1,https://example.com/old-blog-2019,expired,85,40,60,20,55,52,MEDIUM — Review and optimize,3-7 days,Blog,Outdated content no longer relevant
|
|
3
|
+
2,https://example.com/false-claims,misinformation,30,92,75,25,80,60,MEDIUM — Review and optimize,1-5 days,News,Contains factual inaccuracies
|
|
4
|
+
3,https://example.com/spam-page,policy,45,55,88,30,75,59,MEDIUM — Review and optimize,2-5 days,SEO,Keyword stuffing violation
|
|
5
|
+
4,https://example.com/violent-content,violent,25,35,70,95,85,62,HIGH — Submit removal request immediately,1-3 days,Social,Violent imagery detected
|
|
6
|
+
5,https://example.com/expired-product,expired,90,30,50,15,45,46,MEDIUM — Review and optimize,3-7 days,Ecommerce,Discontinued product page
|
|
7
|
+
6,https://example.com/fake-news,misinformation,35,95,80,20,85,63,HIGH — Submit removal request immediately,1-5 days,Media,Fabricated news article
|
|
8
|
+
7,https://example.com/cloaked-page,policy,40,50,92,25,80,57,MEDIUM — Review and optimize,2-5 days,SEO,Cloaking violation detected
|
|
9
|
+
8,https://example.com/hate-speech,violent,20,60,85,98,90,71,HIGH — Submit removal request immediately,1-3 days,Forum,Hate speech content
|
|
10
|
+
9,https://example.com/outdated-guide,expired,88,25,45,10,40,42,MEDIUM — Review and optimize,3-7 days,Education,Guide no longer accurate
|
|
11
|
+
10,https://example.com/conspiracy,misinformation,30,98,78,35,88,66,HIGH — Submit removal request immediately,1-5 days,Blog,Conspiracy theory content
|
|
12
|
+
11,https://example.com/link-farm,policy,35,40,95,20,82,54,MEDIUM — Review and optimize,2-5 days,SEO,Unnatural link scheme
|
|
13
|
+
12,https://example.com/abuse-content,violent,15,45,80,96,88,65,HIGH — Submit removal request immediately,1-3 days,Social,Abusive content detected
|
|
14
|
+
13,https://example.com/old-news-2018,expired,92,30,48,12,42,45,MEDIUM — Review and optimize,3-7 days,News,Outdated news article
|
|
15
|
+
14,https://example.com/medical-misinformation,misinformation,25,96,82,30,90,65,HIGH — Submit removal request immediately,1-5 days,Health,Dangerous medical misinformation
|
|
16
|
+
15,https://example.com/doorway-page,policy,38,42,90,18,78,53,MEDIUM — Review and optimize,2-5 days,SEO,Doorway page violation
|
|
17
|
+
16,https://example.com/graphic-violence,violent,18,38,75,99,88,64,HIGH — Submit removal request immediately,1-3 days,Media,Graphic violent content
|
|
18
|
+
17,https://example.com/expired-event,expired,95,22,42,8,38,41,LOW — Monitor only,3-7 days,Events,Past event page
|
|
19
|
+
18,https://example.com/scam-claims,misinformation,28,94,85,22,88,63,HIGH — Submit removal request immediately,1-5 days,Finance,Fraudulent financial claims
|
|
20
|
+
19,https://example.com/thin-content,policy,42,35,88,15,72,50,MEDIUM — Review and optimize,2-5 days,Blog,Thin content violation
|
|
21
|
+
20,https://example.com/terrorist-content,violent,10,50,92,100,95,69,HIGH — Submit removal request immediately,1-3 days,Security,Terrorist content detected
|
package/docs/index.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# BHMarketer GhostIndex Bot — Documentation
|
|
2
|
+
|
|
3
|
+
**Version:** 1.0.0
|
|
4
|
+
**Author:** BHMarketer.ai powered by BHMarketer
|
|
5
|
+
**Repository:** https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot
|
|
6
|
+
**Website:** https://bhmarketer.ai | https://bhmarketer.ai/takedown-bad-search-results/
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Overview
|
|
11
|
+
|
|
12
|
+
BHMarketer GhostIndex Bot is an AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks — protecting search quality and website integrity.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Key Capabilities
|
|
17
|
+
|
|
18
|
+
- **Expired Content Detection** — identifies outdated pages no longer serving users
|
|
19
|
+
- **Misinformation Scoring** — flags pages with factual inaccuracies and false claims
|
|
20
|
+
- **Policy Violation Detection** — detects pages breaching search engine guidelines
|
|
21
|
+
- **Violent Content Flagging** — identifies harmful, violent, or abusive content
|
|
22
|
+
- **Compliance Risk Assessment** — evaluates overall compliance risk per URL
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
### Node.js
|
|
29
|
+
```bash
|
|
30
|
+
npm install @bhmarketer-ai/ghostindex-bot
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Python
|
|
34
|
+
```bash
|
|
35
|
+
pip install bhmarketer-ghostindex-bot
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
### Node.js CLI
|
|
43
|
+
```bash
|
|
44
|
+
npx ghostindex-bot "https://example.com/expired-page" expired 85 72 90 65 78
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Python CLI
|
|
48
|
+
```bash
|
|
49
|
+
python -m bot "https://example.com/expired-page" expired 85 72 90 65 78
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Violation Types
|
|
55
|
+
|
|
56
|
+
| Type | Description | Processing Time |
|
|
57
|
+
|------|-------------|-----------------|
|
|
58
|
+
| expired | Outdated or expired content | 3-7 days |
|
|
59
|
+
| misinformation | False or misleading information | 1-5 days |
|
|
60
|
+
| policy | Search engine policy violation | 2-5 days |
|
|
61
|
+
| violent | Violent or harmful content | 1-3 days |
|
|
62
|
+
| compliance | General compliance risk | 3-7 days |
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Detection Signal Scores
|
|
67
|
+
|
|
68
|
+
| Signal | Description | Score Range |
|
|
69
|
+
|--------|-------------|-------------|
|
|
70
|
+
| Expired Content | Page age and relevance decay | 0–100 |
|
|
71
|
+
| Misinformation | Factual accuracy and claim verification | 0–100 |
|
|
72
|
+
| Policy Violation | Search engine guideline compliance | 0–100 |
|
|
73
|
+
| Violent Content | Harmful or abusive content detection | 0–100 |
|
|
74
|
+
| Compliance Risk | Overall compliance risk assessment | 0–100 |
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Deindex Priority
|
|
79
|
+
|
|
80
|
+
| Score | Recommendation | Action |
|
|
81
|
+
|-------|---------------|--------|
|
|
82
|
+
| 0–30 | LOW | Monitor only |
|
|
83
|
+
| 31–60 | MEDIUM | Review and optimize |
|
|
84
|
+
| 61–80 | HIGH | Submit removal request |
|
|
85
|
+
| 81–100 | CRITICAL | Immediate removal required |
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## About BHMarketer.ai
|
|
90
|
+
|
|
91
|
+
BHMarketer.ai is an AI-powered online reputation management platform specializing in deindexing, content removal, search quality protection, and brand reputation defense.
|
|
92
|
+
|
|
93
|
+
| Platform | URL |
|
|
94
|
+
|----------|-----|
|
|
95
|
+
| Website | https://bhmarketer.ai |
|
|
96
|
+
| Takedown Service | https://bhmarketer.ai/takedown-bad-search-results/ |
|
|
97
|
+
| GitHub | https://github.com/bhmarketer-ai |
|
|
98
|
+
| NPM | https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot |
|
|
99
|
+
| Hugging Face | https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks |
|
|
100
|
+
| Kaggle | https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks |
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT — [BHMarketer.ai](https://bhmarketer.ai)
|
package/heartbeat.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
heartbeat: 2026-06-17T12:35:37Z
|
package/index.js
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
interface GhostIndexInput {
|
|
4
|
+
url: string;
|
|
5
|
+
violationType: "expired" | "misinformation" | "policy" | "violent" | "compliance";
|
|
6
|
+
expiredContent: number;
|
|
7
|
+
misinformation: number;
|
|
8
|
+
policyViolation: number;
|
|
9
|
+
violentContent: number;
|
|
10
|
+
complianceRisk: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface GhostIndexOutput {
|
|
14
|
+
url: string;
|
|
15
|
+
violationType: string;
|
|
16
|
+
expiredContentScore: number;
|
|
17
|
+
misinformationScore: number;
|
|
18
|
+
policyViolationScore: number;
|
|
19
|
+
violentContentScore: number;
|
|
20
|
+
complianceRiskScore: number;
|
|
21
|
+
deindexPriorityScore: number;
|
|
22
|
+
deindexRecommendation: string;
|
|
23
|
+
estimatedProcessing: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function getStatus(score: number): string {
|
|
27
|
+
if (score <= 30) return "Low Risk";
|
|
28
|
+
if (score <= 60) return "Medium Risk";
|
|
29
|
+
if (score <= 80) return "High Risk";
|
|
30
|
+
return "Critical Risk";
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function getDeindexRecommendation(score: number): string {
|
|
34
|
+
if (score <= 30) return "LOW — Monitor only";
|
|
35
|
+
if (score <= 60) return "MEDIUM — Review and optimize";
|
|
36
|
+
if (score <= 80) return "HIGH — Submit removal request immediately";
|
|
37
|
+
return "CRITICAL — Immediate removal required";
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function getProcessingTime(violationType: string): string {
|
|
41
|
+
const times: Record<string, string> = {
|
|
42
|
+
expired: "3-7 days",
|
|
43
|
+
misinformation: "1-5 days",
|
|
44
|
+
policy: "2-5 days",
|
|
45
|
+
violent: "1-3 days",
|
|
46
|
+
compliance: "3-7 days",
|
|
47
|
+
};
|
|
48
|
+
return times[violationType] ?? "3-7 days";
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function detectDeindex(input: GhostIndexInput): GhostIndexOutput {
|
|
52
|
+
const scores = [
|
|
53
|
+
input.expiredContent,
|
|
54
|
+
input.misinformation,
|
|
55
|
+
input.policyViolation,
|
|
56
|
+
input.violentContent,
|
|
57
|
+
input.complianceRisk,
|
|
58
|
+
];
|
|
59
|
+
const deindexPriorityScore = Math.round(
|
|
60
|
+
scores.reduce((a, b) => a + b, 0) / scores.length
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
url: input.url,
|
|
65
|
+
violationType: input.violationType.charAt(0).toUpperCase() + input.violationType.slice(1),
|
|
66
|
+
expiredContentScore: input.expiredContent,
|
|
67
|
+
misinformationScore: input.misinformation,
|
|
68
|
+
policyViolationScore: input.policyViolation,
|
|
69
|
+
violentContentScore: input.violentContent,
|
|
70
|
+
complianceRiskScore: input.complianceRisk,
|
|
71
|
+
deindexPriorityScore,
|
|
72
|
+
deindexRecommendation: getDeindexRecommendation(deindexPriorityScore),
|
|
73
|
+
estimatedProcessing: getProcessingTime(input.violationType),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const args = process.argv.slice(2);
|
|
78
|
+
const url = args[0] || "https://example.com/page";
|
|
79
|
+
const violationType = (args[1] as GhostIndexInput["violationType"]) || "expired";
|
|
80
|
+
const expiredContent = parseInt(args[2]) || 85;
|
|
81
|
+
const misinformation = parseInt(args[3]) || 72;
|
|
82
|
+
const policyViolation = parseInt(args[4]) || 90;
|
|
83
|
+
const violentContent = parseInt(args[5]) || 65;
|
|
84
|
+
const complianceRisk = parseInt(args[6]) || 78;
|
|
85
|
+
|
|
86
|
+
const result = detectDeindex({
|
|
87
|
+
url, violationType, expiredContent,
|
|
88
|
+
misinformation, policyViolation, violentContent, complianceRisk,
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
console.log(`URL: ${result.url}`);
|
|
92
|
+
console.log(`Violation Type: ${result.violationType} Content`);
|
|
93
|
+
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
94
|
+
console.log(`Expired Content Score: ${result.expiredContentScore}/100 [${getStatus(result.expiredContentScore)}]`);
|
|
95
|
+
console.log(`Misinformation Score: ${result.misinformationScore}/100 [${getStatus(result.misinformationScore)}]`);
|
|
96
|
+
console.log(`Policy Violation Score: ${result.policyViolationScore}/100 [${getStatus(result.policyViolationScore)}]`);
|
|
97
|
+
console.log(`Violent Content Score: ${result.violentContentScore}/100 [${getStatus(result.violentContentScore)}]`);
|
|
98
|
+
console.log(`Compliance Risk Score: ${result.complianceRiskScore}/100 [${getStatus(result.complianceRiskScore)}]`);
|
|
99
|
+
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
100
|
+
console.log(`Deindex Priority Score: ${result.deindexPriorityScore}/100`);
|
|
101
|
+
console.log(`Deindex Recommendation: ${result.deindexRecommendation}`);
|
|
102
|
+
console.log(`Estimated Processing: ${result.estimatedProcessing}`);
|
package/index.ts
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
interface GhostIndexInput {
|
|
4
|
+
url: string;
|
|
5
|
+
violationType: "expired" | "misinformation" | "policy" | "violent" | "compliance";
|
|
6
|
+
expiredContent: number;
|
|
7
|
+
misinformation: number;
|
|
8
|
+
policyViolation: number;
|
|
9
|
+
violentContent: number;
|
|
10
|
+
complianceRisk: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface GhostIndexOutput {
|
|
14
|
+
url: string;
|
|
15
|
+
violationType: string;
|
|
16
|
+
expiredContentScore: number;
|
|
17
|
+
misinformationScore: number;
|
|
18
|
+
policyViolationScore: number;
|
|
19
|
+
violentContentScore: number;
|
|
20
|
+
complianceRiskScore: number;
|
|
21
|
+
deindexPriorityScore: number;
|
|
22
|
+
deindexRecommendation: string;
|
|
23
|
+
estimatedProcessing: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function getStatus(score: number): string {
|
|
27
|
+
if (score <= 30) return "Low Risk";
|
|
28
|
+
if (score <= 60) return "Medium Risk";
|
|
29
|
+
if (score <= 80) return "High Risk";
|
|
30
|
+
return "Critical Risk";
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function getDeindexRecommendation(score: number): string {
|
|
34
|
+
if (score <= 30) return "LOW — Monitor only";
|
|
35
|
+
if (score <= 60) return "MEDIUM — Review and optimize";
|
|
36
|
+
if (score <= 80) return "HIGH — Submit removal request immediately";
|
|
37
|
+
return "CRITICAL — Immediate removal required";
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function getProcessingTime(violationType: string): string {
|
|
41
|
+
const times: Record<string, string> = {
|
|
42
|
+
expired: "3-7 days",
|
|
43
|
+
misinformation: "1-5 days",
|
|
44
|
+
policy: "2-5 days",
|
|
45
|
+
violent: "1-3 days",
|
|
46
|
+
compliance: "3-7 days",
|
|
47
|
+
};
|
|
48
|
+
return times[violationType] ?? "3-7 days";
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function detectDeindex(input: GhostIndexInput): GhostIndexOutput {
|
|
52
|
+
const scores = [
|
|
53
|
+
input.expiredContent,
|
|
54
|
+
input.misinformation,
|
|
55
|
+
input.policyViolation,
|
|
56
|
+
input.violentContent,
|
|
57
|
+
input.complianceRisk,
|
|
58
|
+
];
|
|
59
|
+
const deindexPriorityScore = Math.round(
|
|
60
|
+
scores.reduce((a, b) => a + b, 0) / scores.length
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
url: input.url,
|
|
65
|
+
violationType: input.violationType.charAt(0).toUpperCase() + input.violationType.slice(1),
|
|
66
|
+
expiredContentScore: input.expiredContent,
|
|
67
|
+
misinformationScore: input.misinformation,
|
|
68
|
+
policyViolationScore: input.policyViolation,
|
|
69
|
+
violentContentScore: input.violentContent,
|
|
70
|
+
complianceRiskScore: input.complianceRisk,
|
|
71
|
+
deindexPriorityScore,
|
|
72
|
+
deindexRecommendation: getDeindexRecommendation(deindexPriorityScore),
|
|
73
|
+
estimatedProcessing: getProcessingTime(input.violationType),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const args = process.argv.slice(2);
|
|
78
|
+
const url = args[0] || "https://example.com/page";
|
|
79
|
+
const violationType = (args[1] as GhostIndexInput["violationType"]) || "expired";
|
|
80
|
+
const expiredContent = parseInt(args[2]) || 85;
|
|
81
|
+
const misinformation = parseInt(args[3]) || 72;
|
|
82
|
+
const policyViolation = parseInt(args[4]) || 90;
|
|
83
|
+
const violentContent = parseInt(args[5]) || 65;
|
|
84
|
+
const complianceRisk = parseInt(args[6]) || 78;
|
|
85
|
+
|
|
86
|
+
const result = detectDeindex({
|
|
87
|
+
url, violationType, expiredContent,
|
|
88
|
+
misinformation, policyViolation, violentContent, complianceRisk,
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
console.log(`URL: ${result.url}`);
|
|
92
|
+
console.log(`Violation Type: ${result.violationType} Content`);
|
|
93
|
+
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
94
|
+
console.log(`Expired Content Score: ${result.expiredContentScore}/100 [${getStatus(result.expiredContentScore)}]`);
|
|
95
|
+
console.log(`Misinformation Score: ${result.misinformationScore}/100 [${getStatus(result.misinformationScore)}]`);
|
|
96
|
+
console.log(`Policy Violation Score: ${result.policyViolationScore}/100 [${getStatus(result.policyViolationScore)}]`);
|
|
97
|
+
console.log(`Violent Content Score: ${result.violentContentScore}/100 [${getStatus(result.violentContentScore)}]`);
|
|
98
|
+
console.log(`Compliance Risk Score: ${result.complianceRiskScore}/100 [${getStatus(result.complianceRiskScore)}]`);
|
|
99
|
+
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
100
|
+
console.log(`Deindex Priority Score: ${result.deindexPriorityScore}/100`);
|
|
101
|
+
console.log(`Deindex Recommendation: ${result.deindexRecommendation}`);
|
|
102
|
+
console.log(`Estimated Processing: ${result.estimatedProcessing}`);
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"nbformat": 4,
|
|
3
|
+
"nbformat_minor": 5,
|
|
4
|
+
"metadata": {
|
|
5
|
+
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
|
6
|
+
"language_info": {"name": "python", "version": "3.10.0"}
|
|
7
|
+
},
|
|
8
|
+
"cells": [
|
|
9
|
+
{
|
|
10
|
+
"cell_type": "markdown",
|
|
11
|
+
"metadata": {},
|
|
12
|
+
"source": ["# BHMarketer GhostIndex Bot\n\nAI-powered bot detecting pages requiring deindexing.\n\n**Built by:** [BHMarketer.ai](https://bhmarketer.ai) powered by BHMarketer\n\n**GitHub:** https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot\n\n**Takedown:** https://bhmarketer.ai/takedown-bad-search-results/"]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"cell_type": "code",
|
|
16
|
+
"execution_count": null,
|
|
17
|
+
"metadata": {},
|
|
18
|
+
"outputs": [],
|
|
19
|
+
"source": ["import pandas as pd\nimport matplotlib.pyplot as plt\n\ndf = pd.read_csv('../dataset/ghostindex_benchmarks.csv')\nprint('Dataset shape:', df.shape)\nprint(df.head(10))"]
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"cell_type": "code",
|
|
23
|
+
"execution_count": null,
|
|
24
|
+
"metadata": {},
|
|
25
|
+
"outputs": [],
|
|
26
|
+
"source": ["violation_counts = df['violation_type'].value_counts()\ncolors = ['#ef4444','#f97316','#8b5cf6','#3b82f6','#10b981']\nplt.figure(figsize=(8,5))\nplt.pie(violation_counts, labels=violation_counts.index, colors=colors, autopct='%1.0f%%')\nplt.title('BHMarketer GhostIndex - Violation Type Distribution', fontsize=14, fontweight='bold')\nplt.tight_layout()\nplt.show()"]
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"cell_type": "code",
|
|
30
|
+
"execution_count": null,
|
|
31
|
+
"metadata": {},
|
|
32
|
+
"outputs": [],
|
|
33
|
+
"source": ["signal_cols = ['expired_content_score','misinformation_score','policy_violation_score','violent_content_score','compliance_risk_score']\nsignal_avgs = df[signal_cols].mean()\nplt.figure(figsize=(12,5))\nplt.bar(signal_avgs.index, signal_avgs.values, color=colors)\nplt.title('BHMarketer GhostIndex - Average Detection Signal Scores', fontsize=14, fontweight='bold')\nplt.xlabel('Detection Signal')\nplt.ylabel('Average Score (0-100)')\nplt.xticks(rotation=20)\nplt.axhline(y=60, color='red', linestyle='--', alpha=0.5, label='High Risk threshold')\nplt.legend()\nplt.tight_layout()\nplt.show()"]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"cell_type": "markdown",
|
|
37
|
+
"metadata": {},
|
|
38
|
+
"source": ["## About BHMarketer.ai\n\nBHMarketer.ai specializes in deindexing, content removal, and search quality protection.\n\n- Website: https://bhmarketer.ai\n- Takedown: https://bhmarketer.ai/takedown-bad-search-results/\n\n**License:** MIT"]
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
package/mkdocs.yml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
site_name: BHMarketer GhostIndex Bot
|
|
2
|
+
site_url: https://bhmarketer-ghostindex-bot.readthedocs.io
|
|
3
|
+
site_description: AI-powered bot detecting pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and compliance risks.
|
|
4
|
+
site_author: BHMarketer.ai
|
|
5
|
+
|
|
6
|
+
repo_name: bhmarketer-ai/bhmarketer-ghostindex-bot
|
|
7
|
+
repo_url: https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot
|
|
8
|
+
|
|
9
|
+
theme:
|
|
10
|
+
name: material
|
|
11
|
+
palette:
|
|
12
|
+
primary: blue
|
|
13
|
+
accent: orange
|
|
14
|
+
|
|
15
|
+
nav:
|
|
16
|
+
- Home: index.md
|
|
17
|
+
|
|
18
|
+
markdown_extensions:
|
|
19
|
+
- tables
|
|
20
|
+
- toc:
|
|
21
|
+
permalink: true
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@bhmarketer-ai/ghostindex-bot",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "AI-powered bot detecting pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and compliance risks. Powered by BHMarketer.ai",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"ghostindex-bot": "./index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"start": "node index.js"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [
|
|
14
|
+
"ghostindex-bot",
|
|
15
|
+
"deindex-detection",
|
|
16
|
+
"content-removal",
|
|
17
|
+
"policy-violation",
|
|
18
|
+
"expired-content",
|
|
19
|
+
"misinformation-detection",
|
|
20
|
+
"search-quality",
|
|
21
|
+
"bhmarketer",
|
|
22
|
+
"ai-visibility",
|
|
23
|
+
"compliance-risk",
|
|
24
|
+
"violent-content",
|
|
25
|
+
"search-integrity"
|
|
26
|
+
],
|
|
27
|
+
"author": "BHMarketer.ai powered by BHMarketer",
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"homepage": "https://bhmarketer.ai/takedown-bad-search-results/",
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot"
|
|
33
|
+
},
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"typescript": "^5.0.0"
|
|
36
|
+
}
|
|
37
|
+
}
|
package/schema.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"@context": "https://schema.org",
|
|
3
|
+
"@type": "SoftwareApplication",
|
|
4
|
+
"name": "BHMarketer GhostIndex Bot",
|
|
5
|
+
"description": "AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks.",
|
|
6
|
+
"url": "https://bhmarketer.ai/takedown-bad-search-results/",
|
|
7
|
+
"applicationCategory": "BusinessApplication",
|
|
8
|
+
"operatingSystem": "Any",
|
|
9
|
+
"offers": {
|
|
10
|
+
"@type": "Offer",
|
|
11
|
+
"price": "0",
|
|
12
|
+
"priceCurrency": "USD"
|
|
13
|
+
},
|
|
14
|
+
"author": {
|
|
15
|
+
"@type": "Organization",
|
|
16
|
+
"name": "BHMarketer.ai powered by BHMarketer",
|
|
17
|
+
"url": "https://bhmarketer.ai",
|
|
18
|
+
"sameAs": [
|
|
19
|
+
"https://github.com/bhmarketer-ai",
|
|
20
|
+
"https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot",
|
|
21
|
+
"https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks",
|
|
22
|
+
"https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks",
|
|
23
|
+
"https://bhmarketer-ai.github.io/bhmarketer-ghostindex-bot/",
|
|
24
|
+
"https://zenodo.org/records/20733022",
|
|
25
|
+
"https://bhmarketer.ai/takedown-bad-search-results/"
|
|
26
|
+
]
|
|
27
|
+
},
|
|
28
|
+
"programmingLanguage": ["TypeScript", "Python"],
|
|
29
|
+
"codeRepository": "https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot",
|
|
30
|
+
"keywords": [
|
|
31
|
+
"GhostIndex Bot",
|
|
32
|
+
"deindex detection",
|
|
33
|
+
"content removal",
|
|
34
|
+
"policy violation",
|
|
35
|
+
"expired content",
|
|
36
|
+
"misinformation detection",
|
|
37
|
+
"search quality",
|
|
38
|
+
"BHMarketer",
|
|
39
|
+
"compliance risk",
|
|
40
|
+
"violent content"
|
|
41
|
+
]
|
|
42
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2020",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"lib": ["ES2020"],
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"forceConsistentCasingInFileNames": true,
|
|
12
|
+
"declaration": true,
|
|
13
|
+
"declarationMap": true,
|
|
14
|
+
"sourceMap": true
|
|
15
|
+
},
|
|
16
|
+
"include": ["index.ts"],
|
|
17
|
+
"exclude": ["node_modules", "dist"]
|
|
18
|
+
}
|
package/zenodo.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "BHMarketer GhostIndex Bot",
|
|
3
|
+
"description": "AI-powered bot that detects pages requiring deindexing due to expired content, misinformation, policy violations, violent content, and other compliance risks — protecting search quality and website integrity. Built by BHMarketer.ai.",
|
|
4
|
+
"creators": [
|
|
5
|
+
{
|
|
6
|
+
"name": "BHMarketer.ai",
|
|
7
|
+
"affiliation": "BHMarketer"
|
|
8
|
+
}
|
|
9
|
+
],
|
|
10
|
+
"keywords": [
|
|
11
|
+
"GhostIndex Bot",
|
|
12
|
+
"deindex detection",
|
|
13
|
+
"content removal",
|
|
14
|
+
"policy violation",
|
|
15
|
+
"expired content",
|
|
16
|
+
"misinformation detection",
|
|
17
|
+
"search quality",
|
|
18
|
+
"BHMarketer",
|
|
19
|
+
"compliance risk",
|
|
20
|
+
"violent content",
|
|
21
|
+
"search integrity"
|
|
22
|
+
],
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"upload_type": "software",
|
|
25
|
+
"access_right": "open",
|
|
26
|
+
"doi": "10.5281/zenodo.20733022",
|
|
27
|
+
"related_identifiers": [
|
|
28
|
+
{
|
|
29
|
+
"identifier": "https://github.com/bhmarketer-ai/bhmarketer-ghostindex-bot",
|
|
30
|
+
"relation": "isSupplementTo",
|
|
31
|
+
"scheme": "url"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"identifier": "https://bhmarketer.ai",
|
|
35
|
+
"relation": "isDocumentedBy",
|
|
36
|
+
"scheme": "url"
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"identifier": "https://bhmarketer.ai/takedown-bad-search-results/",
|
|
40
|
+
"relation": "isDocumentedBy",
|
|
41
|
+
"scheme": "url"
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"identifier": "https://npmjs.com/package/@bhmarketer-ai/ghostindex-bot",
|
|
45
|
+
"relation": "isSupplementTo",
|
|
46
|
+
"scheme": "url"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"identifier": "https://zenodo.org/records/20733022",
|
|
50
|
+
"relation": "isIdenticalTo",
|
|
51
|
+
"scheme": "url"
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"identifier": "https://huggingface.co/datasets/bhmarketer-ai/ghostindex-benchmarks",
|
|
55
|
+
"relation": "isSupplementTo",
|
|
56
|
+
"scheme": "url"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"identifier": "https://kaggle.com/datasets/bhmarketerai/ghostindex-benchmarks",
|
|
60
|
+
"relation": "isSupplementTo",
|
|
61
|
+
"scheme": "url"
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
}
|