videonut 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.antigravity/config.toml +8 -0
- package/.claude/commands/archivist.toml +12 -0
- package/.claude/commands/director.toml +12 -0
- package/.claude/commands/eic.toml +12 -0
- package/.claude/commands/investigator.toml +12 -0
- package/.claude/commands/prompt.toml +12 -0
- package/.claude/commands/scavenger.toml +12 -0
- package/.claude/commands/scout.toml +12 -0
- package/.claude/commands/scriptwriter.toml +12 -0
- package/.claude/commands/seo.toml +12 -0
- package/.claude/commands/thumbnail.toml +12 -0
- package/.claude/commands/topic_scout.toml +12 -0
- package/.gemini/commands/archivist.toml +12 -0
- package/.gemini/commands/director.toml +12 -0
- package/.gemini/commands/eic.toml +12 -0
- package/.gemini/commands/investigator.toml +12 -0
- package/.gemini/commands/prompt.toml +12 -0
- package/.gemini/commands/scavenger.toml +12 -0
- package/.gemini/commands/scout.toml +12 -0
- package/.gemini/commands/scriptwriter.toml +12 -0
- package/.gemini/commands/seo.toml +12 -0
- package/.gemini/commands/thumbnail.toml +12 -0
- package/.gemini/commands/topic_scout.toml +12 -0
- package/.qwen/commands/archivist.toml +12 -0
- package/.qwen/commands/director.toml +12 -0
- package/.qwen/commands/eic.toml +12 -0
- package/.qwen/commands/investigator.toml +12 -0
- package/.qwen/commands/prompt.toml +12 -0
- package/.qwen/commands/scavenger.toml +12 -0
- package/.qwen/commands/scout.toml +12 -0
- package/.qwen/commands/scriptwriter.toml +12 -0
- package/.qwen/commands/seo.toml +12 -0
- package/.qwen/commands/thumbnail.toml +12 -0
- package/.qwen/commands/topic_scout.toml +12 -0
- package/USER_GUIDE.md +90 -0
- package/agents/core/eic.md +772 -0
- package/agents/core/prompt_agent.md +264 -0
- package/agents/core/self_review_protocol.md +143 -0
- package/agents/creative/director.md +247 -0
- package/agents/creative/scriptwriter.md +208 -0
- package/agents/creative/seo.md +316 -0
- package/agents/creative/thumbnail.md +285 -0
- package/agents/research/investigator.md +395 -0
- package/agents/research/topic_scout.md +419 -0
- package/agents/technical/archivist.md +289 -0
- package/agents/technical/scavenger.md +248 -0
- package/bin/videonut.js +389 -107
- package/config.yaml +62 -0
- package/docs/AUDIT_REPORT.md +364 -0
- package/docs/LIFECYCLE.md +651 -0
- package/docs/scriptwriter.md +43 -0
- package/file_validator.py +187 -0
- package/memory/short_term/asset_manifest.md +64 -0
- package/memory/short_term/investigation_dossier.md +31 -0
- package/memory/short_term/master_script.md +51 -0
- package/package.json +16 -3
- package/requirements.txt +9 -0
- package/scripts/setup.js +8 -0
- package/tools/check_env.py +77 -0
- package/tools/downloaders/__pycache__/caption_reader.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/image_grabber.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/pdf_reader.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/screenshotter.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/web_reader.cpython-312.pyc +0 -0
- package/tools/downloaders/article_screenshotter.py +388 -0
- package/tools/downloaders/caption_reader.py +238 -0
- package/tools/downloaders/clip_grabber.py +83 -0
- package/tools/downloaders/image_grabber.py +106 -0
- package/tools/downloaders/pdf_reader.py +163 -0
- package/tools/downloaders/pdf_screenshotter.py +240 -0
- package/tools/downloaders/screenshotter.py +58 -0
- package/tools/downloaders/web_reader.py +69 -0
- package/tools/downloaders/youtube_search.py +174 -0
- package/tools/logging/search_logger.py +334 -0
- package/tools/validators/__pycache__/archive_url.cpython-312.pyc +0 -0
- package/tools/validators/__pycache__/link_checker.cpython-312.pyc +0 -0
- package/tools/validators/archive_url.py +269 -0
- package/tools/validators/link_checker.py +45 -0
- package/workflow_orchestrator.py +337 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import argparse
|
|
3
|
+
import time
|
|
4
|
+
from random import uniform
|
|
5
|
+
from playwright.sync_api import sync_playwright
|
|
6
|
+
|
|
7
|
+
def read_webpage(url):
|
|
8
|
+
try:
|
|
9
|
+
# Add random delay to implement rate limiting
|
|
10
|
+
delay = uniform(1, 3) # Random delay between 1-3 seconds
|
|
11
|
+
print(f"Rate limiting: Waiting {delay:.2f} seconds before accessing {url}")
|
|
12
|
+
time.sleep(delay)
|
|
13
|
+
|
|
14
|
+
with sync_playwright() as p:
|
|
15
|
+
# Launch browser (headless by default)
|
|
16
|
+
browser = p.chromium.launch()
|
|
17
|
+
page = browser.new_page()
|
|
18
|
+
|
|
19
|
+
# Set additional headers to appear more like a real user
|
|
20
|
+
page.set_extra_http_headers({
|
|
21
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
22
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
23
|
+
"Accept-Encoding": "gzip, deflate",
|
|
24
|
+
"Connection": "keep-alive",
|
|
25
|
+
"Upgrade-Insecure-Requests": "1",
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
# Navigate with a reasonable timeout
|
|
29
|
+
page.goto(url, timeout=30000)
|
|
30
|
+
|
|
31
|
+
# Wait for content to load (basic heuristic)
|
|
32
|
+
page.wait_for_load_state("domcontentloaded")
|
|
33
|
+
|
|
34
|
+
# Get the text content
|
|
35
|
+
# We use evaluate to get innerText which mimics what a user sees (hidden text is ignored)
|
|
36
|
+
text = page.evaluate("document.body.innerText")
|
|
37
|
+
|
|
38
|
+
# Basic cleanup: Remove excessive newlines
|
|
39
|
+
clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
|
|
40
|
+
|
|
41
|
+
# Smart truncation: Preserve intro AND conclusion (critical for research)
|
|
42
|
+
MAX_TOTAL = 40000 # Increased from 25000
|
|
43
|
+
INTRO_SIZE = 8000 # First portion (hook/summary)
|
|
44
|
+
OUTRO_SIZE = 8000 # Last portion (conclusion/recommendations)
|
|
45
|
+
|
|
46
|
+
if len(clean_text) > MAX_TOTAL:
|
|
47
|
+
intro = clean_text[:INTRO_SIZE]
|
|
48
|
+
outro = clean_text[-OUTRO_SIZE:]
|
|
49
|
+
truncated_chars = len(clean_text) - MAX_TOTAL
|
|
50
|
+
|
|
51
|
+
print(f"--- CONTENT START (First {INTRO_SIZE} chars) ---")
|
|
52
|
+
print(intro)
|
|
53
|
+
print(f"\n\n[... {truncated_chars:,} CHARACTERS TRUNCATED - Middle section omitted to preserve intro and conclusion ...]\n\n")
|
|
54
|
+
print(f"--- CONTENT END (Last {OUTRO_SIZE} chars) ---")
|
|
55
|
+
print(outro)
|
|
56
|
+
else:
|
|
57
|
+
print(clean_text)
|
|
58
|
+
|
|
59
|
+
browser.close()
|
|
60
|
+
|
|
61
|
+
except Exception as e:
|
|
62
|
+
print(f"Error reading webpage: {e}")
|
|
63
|
+
sys.exit(1)
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
parser = argparse.ArgumentParser()
|
|
67
|
+
parser.add_argument("--url", required=True)
|
|
68
|
+
args = parser.parse_args()
|
|
69
|
+
read_webpage(args.url)
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
YouTube Search Tool for VideoNut
|
|
4
|
+
Searches YouTube for videos matching a query and returns structured results.
|
|
5
|
+
Uses youtube-search-python library for searching without API key.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from youtubesearchpython import VideosSearch, Video
|
|
15
|
+
except ImportError:
|
|
16
|
+
print("Error: youtube-search-python not installed. Install with: pip install youtube-search-python")
|
|
17
|
+
sys.exit(1)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def search_youtube(query, max_results=10, filter_year=None):
|
|
21
|
+
"""
|
|
22
|
+
Search YouTube for videos matching the query.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
query: Search query string
|
|
26
|
+
max_results: Maximum number of results to return (default 10)
|
|
27
|
+
filter_year: Optional year to filter results (e.g., 2018 for videos from 2018)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List of video dictionaries with title, url, duration, views, upload_date, channel
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
videos_search = VideosSearch(query, limit=max_results * 2) # Get extra to filter
|
|
34
|
+
results = videos_search.result()
|
|
35
|
+
|
|
36
|
+
videos = []
|
|
37
|
+
for video in results.get('result', []):
|
|
38
|
+
video_data = {
|
|
39
|
+
'title': video.get('title', 'Unknown'),
|
|
40
|
+
'url': video.get('link', ''),
|
|
41
|
+
'video_id': video.get('id', ''),
|
|
42
|
+
'duration': video.get('duration', 'Unknown'),
|
|
43
|
+
'views': video.get('viewCount', {}).get('text', 'Unknown'),
|
|
44
|
+
'upload_date': video.get('publishedTime', 'Unknown'),
|
|
45
|
+
'channel': video.get('channel', {}).get('name', 'Unknown'),
|
|
46
|
+
'description': video.get('descriptionSnippet', [{}])[0].get('text', '') if video.get('descriptionSnippet') else '',
|
|
47
|
+
'thumbnail': video.get('thumbnails', [{}])[0].get('url', '') if video.get('thumbnails') else ''
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Filter by year if specified
|
|
51
|
+
if filter_year:
|
|
52
|
+
upload_text = video_data['upload_date'].lower()
|
|
53
|
+
# Check if it contains year info
|
|
54
|
+
if str(filter_year) in upload_text or f"{filter_year}" in video_data['title']:
|
|
55
|
+
videos.append(video_data)
|
|
56
|
+
elif 'year' in upload_text:
|
|
57
|
+
# Try to parse "X years ago"
|
|
58
|
+
try:
|
|
59
|
+
years_ago = int(upload_text.split()[0])
|
|
60
|
+
current_year = datetime.now().year
|
|
61
|
+
video_year = current_year - years_ago
|
|
62
|
+
if video_year <= filter_year:
|
|
63
|
+
videos.append(video_data)
|
|
64
|
+
except:
|
|
65
|
+
pass
|
|
66
|
+
else:
|
|
67
|
+
videos.append(video_data)
|
|
68
|
+
|
|
69
|
+
if len(videos) >= max_results:
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
return videos
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(f"Error searching YouTube: {str(e)}", file=sys.stderr)
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def format_results(videos, output_format='text'):
|
|
80
|
+
"""Format results for display"""
|
|
81
|
+
if output_format == 'json':
|
|
82
|
+
return json.dumps(videos, indent=2, ensure_ascii=False)
|
|
83
|
+
|
|
84
|
+
# Text format
|
|
85
|
+
output = []
|
|
86
|
+
output.append(f"\n🎬 YouTube Search Results ({len(videos)} videos found)\n")
|
|
87
|
+
output.append("=" * 60)
|
|
88
|
+
|
|
89
|
+
for i, video in enumerate(videos, 1):
|
|
90
|
+
output.append(f"\n📹 Result {i}:")
|
|
91
|
+
output.append(f" Title: {video['title']}")
|
|
92
|
+
output.append(f" URL: {video['url']}")
|
|
93
|
+
output.append(f" Duration: {video['duration']}")
|
|
94
|
+
output.append(f" Views: {video['views']}")
|
|
95
|
+
output.append(f" Uploaded: {video['upload_date']}")
|
|
96
|
+
output.append(f" Channel: {video['channel']}")
|
|
97
|
+
if video['description']:
|
|
98
|
+
output.append(f" Description: {video['description'][:100]}...")
|
|
99
|
+
output.append("-" * 40)
|
|
100
|
+
|
|
101
|
+
return '\n'.join(output)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_video_details(video_url):
|
|
105
|
+
"""Get detailed information about a specific video"""
|
|
106
|
+
try:
|
|
107
|
+
video_info = Video.getInfo(video_url)
|
|
108
|
+
return {
|
|
109
|
+
'title': video_info.get('title', 'Unknown'),
|
|
110
|
+
'duration_seconds': video_info.get('duration', {}).get('secondsText', 'Unknown'),
|
|
111
|
+
'views': video_info.get('viewCount', {}).get('text', 'Unknown'),
|
|
112
|
+
'upload_date': video_info.get('publishDate', 'Unknown'),
|
|
113
|
+
'channel': video_info.get('channel', {}).get('name', 'Unknown'),
|
|
114
|
+
'description': video_info.get('description', '')[:500],
|
|
115
|
+
'is_live': video_info.get('isLiveNow', False),
|
|
116
|
+
'category': video_info.get('category', 'Unknown')
|
|
117
|
+
}
|
|
118
|
+
except Exception as e:
|
|
119
|
+
print(f"Error getting video details: {str(e)}", file=sys.stderr)
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def main():
|
|
124
|
+
parser = argparse.ArgumentParser(
|
|
125
|
+
description="Search YouTube for videos. Returns video titles, URLs, and metadata.",
|
|
126
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
127
|
+
epilog="""
|
|
128
|
+
Examples:
|
|
129
|
+
python youtube_search.py --query "electoral bonds interview"
|
|
130
|
+
python youtube_search.py --query "electoral bonds" --max 5 --year 2018
|
|
131
|
+
python youtube_search.py --query "Raghuram Rajan interview" --json
|
|
132
|
+
python youtube_search.py --video-url "https://youtube.com/watch?v=xxx" --details
|
|
133
|
+
"""
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
parser.add_argument("--query", "-q", help="Search query for YouTube videos")
|
|
137
|
+
parser.add_argument("--max", "-m", type=int, default=10, help="Maximum number of results (default: 10)")
|
|
138
|
+
parser.add_argument("--year", "-y", type=int, help="Filter videos from a specific year (e.g., 2018)")
|
|
139
|
+
parser.add_argument("--json", "-j", action="store_true", help="Output results as JSON")
|
|
140
|
+
parser.add_argument("--video-url", help="Get details for a specific video URL")
|
|
141
|
+
parser.add_argument("--details", "-d", action="store_true", help="Get detailed info for video URL")
|
|
142
|
+
|
|
143
|
+
args = parser.parse_args()
|
|
144
|
+
|
|
145
|
+
if args.video_url and args.details:
|
|
146
|
+
# Get details for specific video
|
|
147
|
+
details = get_video_details(args.video_url)
|
|
148
|
+
if details:
|
|
149
|
+
if args.json:
|
|
150
|
+
print(json.dumps(details, indent=2, ensure_ascii=False))
|
|
151
|
+
else:
|
|
152
|
+
print("\n📺 Video Details:")
|
|
153
|
+
for key, value in details.items():
|
|
154
|
+
print(f" {key}: {value}")
|
|
155
|
+
else:
|
|
156
|
+
print("Error: Could not retrieve video details")
|
|
157
|
+
sys.exit(1)
|
|
158
|
+
elif args.query:
|
|
159
|
+
# Search for videos
|
|
160
|
+
videos = search_youtube(args.query, args.max, args.year)
|
|
161
|
+
|
|
162
|
+
if not videos:
|
|
163
|
+
print(f"No videos found for query: {args.query}")
|
|
164
|
+
sys.exit(0)
|
|
165
|
+
|
|
166
|
+
output_format = 'json' if args.json else 'text'
|
|
167
|
+
print(format_results(videos, output_format))
|
|
168
|
+
else:
|
|
169
|
+
parser.print_help()
|
|
170
|
+
sys.exit(1)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
main()
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Search Logger for VideoNut
|
|
4
|
+
|
|
5
|
+
Logs all search queries made by agents, with special focus on regional language searches.
|
|
6
|
+
This helps track:
|
|
7
|
+
1. What searches are being performed
|
|
8
|
+
2. Whether regional languages (Hindi, Telugu, Tamil, Marathi) are being used
|
|
9
|
+
3. Success/failure rates of searches
|
|
10
|
+
4. Search patterns over time
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
# Log a search
|
|
14
|
+
python search_logger.py --log --query "electoral bonds scam" --language "en" --agent "investigator"
|
|
15
|
+
|
|
16
|
+
# Log with regional language
|
|
17
|
+
python search_logger.py --log --query "इलेक्टोरल बॉन्ड घोटाला" --language "hi" --agent "investigator"
|
|
18
|
+
|
|
19
|
+
# View search history
|
|
20
|
+
python search_logger.py --view --project "electoral_bonds"
|
|
21
|
+
|
|
22
|
+
# Get regional language statistics
|
|
23
|
+
python search_logger.py --stats
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import sys
|
|
27
|
+
import os
|
|
28
|
+
import argparse
|
|
29
|
+
import json
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Regional language codes and names
|
|
35
|
+
REGIONAL_LANGUAGES = {
|
|
36
|
+
'hi': 'Hindi',
|
|
37
|
+
'te': 'Telugu',
|
|
38
|
+
'ta': 'Tamil',
|
|
39
|
+
'mr': 'Marathi',
|
|
40
|
+
'bn': 'Bengali',
|
|
41
|
+
'gu': 'Gujarati',
|
|
42
|
+
'kn': 'Kannada',
|
|
43
|
+
'ml': 'Malayalam',
|
|
44
|
+
'pa': 'Punjabi',
|
|
45
|
+
'or': 'Odia'
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Default log file location
|
|
49
|
+
DEFAULT_LOG_DIR = Path(__file__).parent.parent.parent / "logs"
|
|
50
|
+
DEFAULT_LOG_FILE = DEFAULT_LOG_DIR / "search_history.jsonl"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def ensure_log_dir():
|
|
54
|
+
"""Create log directory if it doesn't exist."""
|
|
55
|
+
DEFAULT_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def log_search(query, language='en', agent='unknown', project=None,
|
|
59
|
+
success=True, results_count=0, notes=None):
|
|
60
|
+
"""
|
|
61
|
+
Log a search query to the search history.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
query: The search query string
|
|
65
|
+
language: Language code (en, hi, te, etc.)
|
|
66
|
+
agent: Which agent performed the search
|
|
67
|
+
project: Project name/folder
|
|
68
|
+
success: Whether the search returned results
|
|
69
|
+
results_count: Number of results found
|
|
70
|
+
notes: Optional notes
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Dict with the logged entry
|
|
74
|
+
"""
|
|
75
|
+
ensure_log_dir()
|
|
76
|
+
|
|
77
|
+
entry = {
|
|
78
|
+
'timestamp': datetime.now().isoformat(),
|
|
79
|
+
'query': query,
|
|
80
|
+
'language': language,
|
|
81
|
+
'language_name': REGIONAL_LANGUAGES.get(language, 'English' if language == 'en' else language),
|
|
82
|
+
'is_regional': language in REGIONAL_LANGUAGES,
|
|
83
|
+
'agent': agent,
|
|
84
|
+
'project': project,
|
|
85
|
+
'success': success,
|
|
86
|
+
'results_count': results_count,
|
|
87
|
+
'notes': notes
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# Append to log file (JSONL format - one JSON per line)
|
|
91
|
+
with open(DEFAULT_LOG_FILE, 'a', encoding='utf-8') as f:
|
|
92
|
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
|
93
|
+
|
|
94
|
+
# Also log to console
|
|
95
|
+
lang_indicator = f"🇮🇳 {entry['language_name']}" if entry['is_regional'] else f"🌐 {entry['language_name']}"
|
|
96
|
+
status = "✅" if success else "❌"
|
|
97
|
+
|
|
98
|
+
print(f"{status} [{agent}] {lang_indicator}: \"{query}\" ({results_count} results)")
|
|
99
|
+
|
|
100
|
+
return entry
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def load_search_history(project=None, agent=None, language=None, limit=100):
|
|
104
|
+
"""
|
|
105
|
+
Load search history with optional filters.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
project: Filter by project name
|
|
109
|
+
agent: Filter by agent name
|
|
110
|
+
language: Filter by language code
|
|
111
|
+
limit: Maximum entries to return
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
List of search entries
|
|
115
|
+
"""
|
|
116
|
+
if not DEFAULT_LOG_FILE.exists():
|
|
117
|
+
return []
|
|
118
|
+
|
|
119
|
+
entries = []
|
|
120
|
+
with open(DEFAULT_LOG_FILE, 'r', encoding='utf-8') as f:
|
|
121
|
+
for line in f:
|
|
122
|
+
try:
|
|
123
|
+
entry = json.loads(line.strip())
|
|
124
|
+
|
|
125
|
+
# Apply filters
|
|
126
|
+
if project and entry.get('project') != project:
|
|
127
|
+
continue
|
|
128
|
+
if agent and entry.get('agent') != agent:
|
|
129
|
+
continue
|
|
130
|
+
if language and entry.get('language') != language:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
entries.append(entry)
|
|
134
|
+
except json.JSONDecodeError:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# Return most recent entries
|
|
138
|
+
return entries[-limit:]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_statistics():
|
|
142
|
+
"""
|
|
143
|
+
Calculate search statistics.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Dict with statistics
|
|
147
|
+
"""
|
|
148
|
+
entries = load_search_history(limit=10000)
|
|
149
|
+
|
|
150
|
+
if not entries:
|
|
151
|
+
return {'total': 0, 'message': 'No search history found'}
|
|
152
|
+
|
|
153
|
+
stats = {
|
|
154
|
+
'total_searches': len(entries),
|
|
155
|
+
'by_language': {},
|
|
156
|
+
'by_agent': {},
|
|
157
|
+
'regional_percentage': 0,
|
|
158
|
+
'success_rate': 0,
|
|
159
|
+
'average_results': 0
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
regional_count = 0
|
|
163
|
+
success_count = 0
|
|
164
|
+
total_results = 0
|
|
165
|
+
|
|
166
|
+
for entry in entries:
|
|
167
|
+
# By language
|
|
168
|
+
lang = entry.get('language', 'unknown')
|
|
169
|
+
stats['by_language'][lang] = stats['by_language'].get(lang, 0) + 1
|
|
170
|
+
|
|
171
|
+
# By agent
|
|
172
|
+
agent = entry.get('agent', 'unknown')
|
|
173
|
+
stats['by_agent'][agent] = stats['by_agent'].get(agent, 0) + 1
|
|
174
|
+
|
|
175
|
+
# Regional count
|
|
176
|
+
if entry.get('is_regional'):
|
|
177
|
+
regional_count += 1
|
|
178
|
+
|
|
179
|
+
# Success rate
|
|
180
|
+
if entry.get('success'):
|
|
181
|
+
success_count += 1
|
|
182
|
+
|
|
183
|
+
# Results count
|
|
184
|
+
total_results += entry.get('results_count', 0)
|
|
185
|
+
|
|
186
|
+
# Calculate percentages
|
|
187
|
+
stats['regional_percentage'] = round((regional_count / len(entries)) * 100, 1)
|
|
188
|
+
stats['regional_count'] = regional_count
|
|
189
|
+
stats['success_rate'] = round((success_count / len(entries)) * 100, 1)
|
|
190
|
+
stats['average_results'] = round(total_results / len(entries), 1)
|
|
191
|
+
|
|
192
|
+
return stats
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def display_history(entries):
|
|
196
|
+
"""Display search history in a readable format."""
|
|
197
|
+
if not entries:
|
|
198
|
+
print("No search history found.")
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
print(f"\n📋 Search History ({len(entries)} entries)\n")
|
|
202
|
+
print("=" * 80)
|
|
203
|
+
|
|
204
|
+
for entry in entries:
|
|
205
|
+
timestamp = entry.get('timestamp', 'Unknown')[:19] # Trim to date+time
|
|
206
|
+
agent = entry.get('agent', 'unknown')
|
|
207
|
+
lang = entry.get('language_name', entry.get('language', '?'))
|
|
208
|
+
query = entry.get('query', '')[:50] # Truncate long queries
|
|
209
|
+
results = entry.get('results_count', 0)
|
|
210
|
+
status = "✅" if entry.get('success') else "❌"
|
|
211
|
+
regional = "🇮🇳" if entry.get('is_regional') else " "
|
|
212
|
+
|
|
213
|
+
print(f"{status} {regional} [{timestamp}] {agent:12} | {lang:10} | {query}")
|
|
214
|
+
|
|
215
|
+
print("=" * 80)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def display_statistics(stats):
|
|
219
|
+
"""Display statistics in a readable format."""
|
|
220
|
+
print("\n📊 Search Statistics\n")
|
|
221
|
+
print("=" * 60)
|
|
222
|
+
|
|
223
|
+
print(f"Total Searches: {stats.get('total_searches', 0)}")
|
|
224
|
+
print(f"Success Rate: {stats.get('success_rate', 0)}%")
|
|
225
|
+
print(f"Average Results: {stats.get('average_results', 0)}")
|
|
226
|
+
print(f"\n🇮🇳 Regional Language Usage: {stats.get('regional_percentage', 0)}%")
|
|
227
|
+
print(f" ({stats.get('regional_count', 0)} out of {stats.get('total_searches', 0)} searches)")
|
|
228
|
+
|
|
229
|
+
print("\n📈 By Language:")
|
|
230
|
+
for lang, count in sorted(stats.get('by_language', {}).items(), key=lambda x: -x[1]):
|
|
231
|
+
lang_name = REGIONAL_LANGUAGES.get(lang, 'English' if lang == 'en' else lang)
|
|
232
|
+
bar = "█" * min(count, 30)
|
|
233
|
+
regional_marker = "🇮🇳" if lang in REGIONAL_LANGUAGES else " "
|
|
234
|
+
print(f" {regional_marker} {lang_name:15} {bar} ({count})")
|
|
235
|
+
|
|
236
|
+
print("\n🤖 By Agent:")
|
|
237
|
+
for agent, count in sorted(stats.get('by_agent', {}).items(), key=lambda x: -x[1]):
|
|
238
|
+
bar = "█" * min(count, 30)
|
|
239
|
+
print(f" {agent:15} {bar} ({count})")
|
|
240
|
+
|
|
241
|
+
print("=" * 60)
|
|
242
|
+
|
|
243
|
+
# Recommendations
|
|
244
|
+
regional_pct = stats.get('regional_percentage', 0)
|
|
245
|
+
if regional_pct < 20:
|
|
246
|
+
print("\n💡 Recommendation: Try more regional language searches!")
|
|
247
|
+
print(" Hindi: Add 'हिंदी में' or translate key terms")
|
|
248
|
+
print(" Telugu: Add 'తెలుగులో' for Telugu sources")
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def main():
|
|
252
|
+
parser = argparse.ArgumentParser(
|
|
253
|
+
description="Log and analyze search queries for VideoNut agents.",
|
|
254
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
255
|
+
epilog="""
|
|
256
|
+
Examples:
|
|
257
|
+
# Log a search
|
|
258
|
+
python search_logger.py --log --query "electoral bonds" --language "en" --agent "investigator"
|
|
259
|
+
|
|
260
|
+
# Log a Hindi search
|
|
261
|
+
python search_logger.py --log --query "इलेक्टोरल बॉन्ड" --language "hi" --agent "investigator"
|
|
262
|
+
|
|
263
|
+
# View search history
|
|
264
|
+
python search_logger.py --view
|
|
265
|
+
|
|
266
|
+
# View history for specific project
|
|
267
|
+
python search_logger.py --view --project "electoral_bonds"
|
|
268
|
+
|
|
269
|
+
# Get statistics
|
|
270
|
+
python search_logger.py --stats
|
|
271
|
+
|
|
272
|
+
Regional Language Codes:
|
|
273
|
+
hi = Hindi te = Telugu ta = Tamil
|
|
274
|
+
mr = Marathi bn = Bengali gu = Gujarati
|
|
275
|
+
kn = Kannada ml = Malayalam pa = Punjabi
|
|
276
|
+
"""
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Actions
|
|
280
|
+
parser.add_argument("--log", "-l", action="store_true", help="Log a new search")
|
|
281
|
+
parser.add_argument("--view", "-v", action="store_true", help="View search history")
|
|
282
|
+
parser.add_argument("--stats", "-s", action="store_true", help="Show statistics")
|
|
283
|
+
|
|
284
|
+
# Log parameters
|
|
285
|
+
parser.add_argument("--query", "-q", help="Search query to log")
|
|
286
|
+
parser.add_argument("--language", default="en", help="Language code (en, hi, te, etc.)")
|
|
287
|
+
parser.add_argument("--agent", default="unknown", help="Agent name")
|
|
288
|
+
parser.add_argument("--project", "-p", help="Project name")
|
|
289
|
+
parser.add_argument("--success", type=bool, default=True, help="Whether search succeeded")
|
|
290
|
+
parser.add_argument("--results", type=int, default=0, help="Number of results")
|
|
291
|
+
parser.add_argument("--notes", help="Optional notes")
|
|
292
|
+
|
|
293
|
+
# View parameters
|
|
294
|
+
parser.add_argument("--limit", type=int, default=50, help="Max entries to show")
|
|
295
|
+
|
|
296
|
+
args = parser.parse_args()
|
|
297
|
+
|
|
298
|
+
if not (args.log or args.view or args.stats):
|
|
299
|
+
parser.print_help()
|
|
300
|
+
print("\nError: Use --log, --view, or --stats")
|
|
301
|
+
sys.exit(1)
|
|
302
|
+
|
|
303
|
+
if args.log:
|
|
304
|
+
if not args.query:
|
|
305
|
+
print("Error: --query is required for logging")
|
|
306
|
+
sys.exit(1)
|
|
307
|
+
|
|
308
|
+
log_search(
|
|
309
|
+
query=args.query,
|
|
310
|
+
language=args.language,
|
|
311
|
+
agent=args.agent,
|
|
312
|
+
project=args.project,
|
|
313
|
+
success=args.success,
|
|
314
|
+
results_count=args.results,
|
|
315
|
+
notes=args.notes
|
|
316
|
+
)
|
|
317
|
+
print("\n✅ Search logged successfully")
|
|
318
|
+
|
|
319
|
+
elif args.view:
|
|
320
|
+
entries = load_search_history(
|
|
321
|
+
project=args.project,
|
|
322
|
+
agent=args.agent,
|
|
323
|
+
language=args.language,
|
|
324
|
+
limit=args.limit
|
|
325
|
+
)
|
|
326
|
+
display_history(entries)
|
|
327
|
+
|
|
328
|
+
elif args.stats:
|
|
329
|
+
stats = get_statistics()
|
|
330
|
+
display_statistics(stats)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
if __name__ == "__main__":
|
|
334
|
+
main()
|
|
Binary file
|
|
Binary file
|