kite-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kite/__init__.py +46 -0
- kite/ab_testing.py +384 -0
- kite/agent.py +556 -0
- kite/agents/__init__.py +3 -0
- kite/agents/plan_execute.py +191 -0
- kite/agents/react_agent.py +509 -0
- kite/agents/reflective_agent.py +90 -0
- kite/agents/rewoo.py +119 -0
- kite/agents/tot.py +151 -0
- kite/conversation.py +125 -0
- kite/core.py +974 -0
- kite/data_loaders.py +111 -0
- kite/embedding_providers.py +372 -0
- kite/llm_providers.py +1278 -0
- kite/memory/__init__.py +6 -0
- kite/memory/advanced_rag.py +333 -0
- kite/memory/graph_rag.py +719 -0
- kite/memory/session_memory.py +423 -0
- kite/memory/vector_memory.py +579 -0
- kite/monitoring.py +611 -0
- kite/observers.py +107 -0
- kite/optimization/__init__.py +9 -0
- kite/optimization/resource_router.py +80 -0
- kite/persistence.py +42 -0
- kite/pipeline/__init__.py +5 -0
- kite/pipeline/deterministic_pipeline.py +323 -0
- kite/pipeline/reactive_pipeline.py +171 -0
- kite/pipeline_manager.py +15 -0
- kite/routing/__init__.py +6 -0
- kite/routing/aggregator_router.py +325 -0
- kite/routing/llm_router.py +149 -0
- kite/routing/semantic_router.py +228 -0
- kite/safety/__init__.py +6 -0
- kite/safety/circuit_breaker.py +360 -0
- kite/safety/guardrails.py +82 -0
- kite/safety/idempotency_manager.py +304 -0
- kite/safety/kill_switch.py +75 -0
- kite/tool.py +183 -0
- kite/tool_registry.py +87 -0
- kite/tools/__init__.py +21 -0
- kite/tools/code_execution.py +53 -0
- kite/tools/contrib/__init__.py +19 -0
- kite/tools/contrib/calculator.py +26 -0
- kite/tools/contrib/datetime_utils.py +20 -0
- kite/tools/contrib/linkedin.py +428 -0
- kite/tools/contrib/web_search.py +30 -0
- kite/tools/mcp/__init__.py +31 -0
- kite/tools/mcp/database_mcp.py +267 -0
- kite/tools/mcp/gdrive_mcp_server.py +503 -0
- kite/tools/mcp/gmail_mcp_server.py +601 -0
- kite/tools/mcp/postgres_mcp_server.py +490 -0
- kite/tools/mcp/slack_mcp_server.py +538 -0
- kite/tools/mcp/stripe_mcp_server.py +219 -0
- kite/tools/search.py +90 -0
- kite/tools/system_tools.py +54 -0
- kite/tools_manager.py +27 -0
- kite_agent-0.1.0.dist-info/METADATA +621 -0
- kite_agent-0.1.0.dist-info/RECORD +61 -0
- kite_agent-0.1.0.dist-info/WHEEL +5 -0
- kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
- kite_agent-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python Execution Tool
|
|
3
|
+
====================
|
|
4
|
+
Allows agents to run Python code for data analysis, math, or visualization.
|
|
5
|
+
WARNING: This uses `exec`. In a real production environment, this should run in a Docker container or Firecracker VM.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict
|
|
9
|
+
import sys
|
|
10
|
+
import io
|
|
11
|
+
import traceback
|
|
12
|
+
from kite.tool import Tool
|
|
13
|
+
|
|
14
|
+
class PythonReplTool(Tool):
|
|
15
|
+
def __init__(self):
|
|
16
|
+
super().__init__(
|
|
17
|
+
name="python",
|
|
18
|
+
func=self.execute,
|
|
19
|
+
description="Executes Python code. Use this for data analysis (pandas), visualization (matplotlib), or complex calculations. Input must be valid python code string."
|
|
20
|
+
)
|
|
21
|
+
self.globals = {}
|
|
22
|
+
self.locals = {}
|
|
23
|
+
|
|
24
|
+
async def execute(self, code: str, **kwargs) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Executes the provided Python code and returns standout/stderr.
|
|
27
|
+
"""
|
|
28
|
+
# Capture stdout
|
|
29
|
+
old_stdout = sys.stdout
|
|
30
|
+
redirected_output = sys.stdout = io.StringIO()
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# Dangerous in real prod, but okay for local demo with user consent
|
|
34
|
+
# We strictly namespace it
|
|
35
|
+
|
|
36
|
+
# Pre-import common libs if available
|
|
37
|
+
try:
|
|
38
|
+
import pandas as pd
|
|
39
|
+
import matplotlib.pyplot as plt
|
|
40
|
+
self.globals['pd'] = pd
|
|
41
|
+
self.globals['plt'] = plt
|
|
42
|
+
except ImportError:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
exec(code, self.globals, self.locals)
|
|
46
|
+
|
|
47
|
+
output = redirected_output.getvalue()
|
|
48
|
+
return output if output.strip() else "[Code executed successfully with no output]"
|
|
49
|
+
|
|
50
|
+
except Exception as e:
|
|
51
|
+
return f"Error executing code:\n{traceback.format_exc()}"
|
|
52
|
+
finally:
|
|
53
|
+
sys.stdout = old_stdout
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .datetime_utils import get_current_datetime
|
|
2
|
+
|
|
3
|
+
# LinkedIn tools (optional - require linkedin_scraper package)
|
|
4
|
+
# from .linkedin import (
|
|
5
|
+
# search_linkedin_posts,
|
|
6
|
+
# get_linkedin_profile_details,
|
|
7
|
+
# get_linkedin_company_details,
|
|
8
|
+
# create_linkedin_session
|
|
9
|
+
# )
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'web_search',
|
|
13
|
+
'calculator',
|
|
14
|
+
'get_current_datetime',
|
|
15
|
+
# 'search_linkedin_posts',
|
|
16
|
+
# 'get_linkedin_profile_details',
|
|
17
|
+
# 'get_linkedin_company_details',
|
|
18
|
+
# 'create_linkedin_session'
|
|
19
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Calculator Tool - Simple arithmetic operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
def calculator(expression: str):
|
|
6
|
+
"""
|
|
7
|
+
Evaluate a mathematical expression.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
expression: Mathematical expression (e.g. "2 + 2", "15 * 4")
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
The result of the expression.
|
|
14
|
+
"""
|
|
15
|
+
try:
|
|
16
|
+
# NOTE: Using eval for simplicity in this mock,
|
|
17
|
+
# but in production, use a safe math parser!
|
|
18
|
+
# Restricting to basic math characters for safety.
|
|
19
|
+
allowed_chars = "0123456789+-*/(). "
|
|
20
|
+
if not all(c in allowed_chars for c in expression):
|
|
21
|
+
return {"error": "Invalid characters in expression"}
|
|
22
|
+
|
|
23
|
+
result = eval(expression)
|
|
24
|
+
return {"expression": expression, "result": result}
|
|
25
|
+
except Exception as e:
|
|
26
|
+
return {"error": str(e)}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DateTime Utilities - Get current date and time.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
def get_current_datetime():
|
|
8
|
+
"""
|
|
9
|
+
Get the current date and time.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
ISO formatted datetime string.
|
|
13
|
+
"""
|
|
14
|
+
now = datetime.now()
|
|
15
|
+
return {
|
|
16
|
+
"iso": now.isoformat(),
|
|
17
|
+
"date": now.strftime("%Y-%m-%d"),
|
|
18
|
+
"time": now.strftime("%H:%M:%S"),
|
|
19
|
+
"weekday": now.strftime("%A")
|
|
20
|
+
}
|
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
import asyncio
|
|
3
|
+
import os
|
|
4
|
+
import random
|
|
5
|
+
import urllib.parse
|
|
6
|
+
from linkedin_scraper import BrowserManager
|
|
7
|
+
|
|
8
|
+
async def is_really_logged_in(page):
|
|
9
|
+
"""A bulletproof check for LinkedIn login state using multiple signals."""
|
|
10
|
+
url = page.url.lower()
|
|
11
|
+
|
|
12
|
+
# Signal 1: URL path
|
|
13
|
+
logged_in_paths = ["/feed", "/mynetwork", "/messaging", "/notifications", "/me", "/company"]
|
|
14
|
+
if any(path in url for path in logged_in_paths):
|
|
15
|
+
if "/login" not in url and "/signup" not in url and "authwall" not in url:
|
|
16
|
+
return True
|
|
17
|
+
|
|
18
|
+
# Signal 2: Common modern UI elements
|
|
19
|
+
selectors = [
|
|
20
|
+
'.global-nav', '#global-nav', '.nav-item',
|
|
21
|
+
'button.global-nav__primary-link', 'img.global-nav__me-photo',
|
|
22
|
+
'.feed-identity-module', '.share-box-feed-entry__trigger'
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
for selector in selectors:
|
|
26
|
+
try:
|
|
27
|
+
if await page.locator(selector).count() > 0:
|
|
28
|
+
return True
|
|
29
|
+
except:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
async def create_linkedin_session(session_path: str = "linkedin_session.json", **kwargs):
|
|
35
|
+
"""Create or renew a LinkedIn session file with smart detection."""
|
|
36
|
+
print("="*60)
|
|
37
|
+
print("LinkedIn Session Manager (Smart Renewal)")
|
|
38
|
+
print("="*60)
|
|
39
|
+
|
|
40
|
+
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
41
|
+
|
|
42
|
+
# Use headless=False so the user can see and interact
|
|
43
|
+
async with BrowserManager(headless=False, user_agent=ua) as browser:
|
|
44
|
+
# 1. Attempt to load existing session
|
|
45
|
+
if os.path.exists(session_path):
|
|
46
|
+
print(f"📂 Found existing session: {session_path}. Loading...")
|
|
47
|
+
await browser.load_session(session_path)
|
|
48
|
+
await browser.page.goto("https://www.linkedin.com/feed/")
|
|
49
|
+
await asyncio.sleep(4)
|
|
50
|
+
else:
|
|
51
|
+
print("No existing session found. Starting fresh...")
|
|
52
|
+
await browser.page.goto("https://www.linkedin.com/login")
|
|
53
|
+
await asyncio.sleep(2)
|
|
54
|
+
|
|
55
|
+
# 2. Check if already logged in
|
|
56
|
+
if await is_really_logged_in(browser.page):
|
|
57
|
+
print("\n✅ SUCCESS: You are already logged in via valid session.")
|
|
58
|
+
else:
|
|
59
|
+
print("\n🔐 Session invalid or missing. Please log in manually in the opened browser.")
|
|
60
|
+
print(" I will auto-detect once you are on the Feed or any main page.")
|
|
61
|
+
|
|
62
|
+
# 3. Wait for login / Detection
|
|
63
|
+
print("\n⏳ Monitoring login status (timeout 5 mins)...\n")
|
|
64
|
+
start_time = asyncio.get_event_loop().time()
|
|
65
|
+
logged_in = False
|
|
66
|
+
|
|
67
|
+
while (asyncio.get_event_loop().time() - start_time) < 300: # 5 minutes
|
|
68
|
+
if await is_really_logged_in(browser.page):
|
|
69
|
+
logged_in = True
|
|
70
|
+
break
|
|
71
|
+
await asyncio.sleep(2)
|
|
72
|
+
|
|
73
|
+
if logged_in:
|
|
74
|
+
print(f"\n✅ VERIFIED: Login state confirmed!")
|
|
75
|
+
print(f"💾 Saving/Updating session to {session_path}...")
|
|
76
|
+
await asyncio.sleep(3)
|
|
77
|
+
# Ensure we are on a safe page before saving to avoid capturing transient states
|
|
78
|
+
await browser.save_session(session_path)
|
|
79
|
+
print("\n✅ DONE! Session is now current.")
|
|
80
|
+
return True
|
|
81
|
+
else:
|
|
82
|
+
print("\n❌ FAILED: Login not detected within 5 minutes.")
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
def _resolve_session_path(provided_path: str) -> Optional[str]:
|
|
86
|
+
"""Robust resolution for session file in multiple locations."""
|
|
87
|
+
# 1. Project Root (Preferred)
|
|
88
|
+
cwd = os.getcwd()
|
|
89
|
+
root_path = os.path.join(cwd, "linkedin_session_v2.json")
|
|
90
|
+
if os.path.exists(root_path): return root_path
|
|
91
|
+
if os.path.exists(provided_path): return provided_path
|
|
92
|
+
|
|
93
|
+
fallbacks = [
|
|
94
|
+
"linkedin_session_v2.json",
|
|
95
|
+
"linkedin_session.json",
|
|
96
|
+
os.path.join(cwd, "linkedin_session.json")
|
|
97
|
+
]
|
|
98
|
+
for f in fallbacks:
|
|
99
|
+
if os.path.exists(f): return f
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
async def search_linkedin_posts(query: str, limit: int = 30, session_path: str = "linkedin_session.json", **kwargs) -> List[Dict]:
|
|
103
|
+
"""Search for LinkedIn posts."""
|
|
104
|
+
fw = kwargs.get('framework')
|
|
105
|
+
def tool_log(msg):
|
|
106
|
+
if fw: fw.event_bus.emit("tool:log", {"agent": "LinkedInTool", "message": msg})
|
|
107
|
+
else: print(f" [Tool] {msg}")
|
|
108
|
+
|
|
109
|
+
tool_log(f"Starting search for: {query} (limit={limit})")
|
|
110
|
+
print(f"\n📡 Running LinkedIn search for: '{query}' (Target: {limit} posts)")
|
|
111
|
+
|
|
112
|
+
if "keywords" in kwargs:
|
|
113
|
+
if isinstance(kwargs["keywords"], list): query += " " + " ".join(kwargs["keywords"])
|
|
114
|
+
elif isinstance(kwargs["keywords"], str): query += " " + kwargs["keywords"]
|
|
115
|
+
|
|
116
|
+
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
117
|
+
results = []
|
|
118
|
+
all_content = set()
|
|
119
|
+
|
|
120
|
+
async with BrowserManager(headless=True, user_agent=ua) as bm:
|
|
121
|
+
active_session = _resolve_session_path(session_path)
|
|
122
|
+
if active_session:
|
|
123
|
+
tool_log(f"Loading session: {active_session}")
|
|
124
|
+
await bm.load_session(active_session)
|
|
125
|
+
else:
|
|
126
|
+
tool_log("CRITICAL: No session file found!")
|
|
127
|
+
return [{"error": "SESSION_MISSING", "message": "LinkedIn session file not found. Please run 'python3 create_new_session.py'."}]
|
|
128
|
+
|
|
129
|
+
encoded_query = urllib.parse.quote(query)
|
|
130
|
+
date_posted = kwargs.get("date_posted", "past-week")
|
|
131
|
+
search_url = f"https://www.linkedin.com/search/results/content/?keywords={encoded_query}&origin=GLOBAL_SEARCH_HEADER"
|
|
132
|
+
|
|
133
|
+
if date_posted:
|
|
134
|
+
# Common LinkedIn date filters: "past-24h", "past-week", "past-month"
|
|
135
|
+
search_url += f"&datePosted=%5B%22{date_posted}%22%5D"
|
|
136
|
+
|
|
137
|
+
tool_log(f"Navigating to: {search_url}")
|
|
138
|
+
await bm.page.goto(search_url, wait_until="domcontentloaded", timeout=90000)
|
|
139
|
+
await asyncio.sleep(5)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
posts_filter = bm.page.locator('button:has-text("Posts")').first
|
|
143
|
+
if await posts_filter.is_visible():
|
|
144
|
+
await posts_filter.click()
|
|
145
|
+
tool_log("Applied 'Posts' filter.")
|
|
146
|
+
await asyncio.sleep(3)
|
|
147
|
+
except: pass
|
|
148
|
+
|
|
149
|
+
# Initial wait for rehydration
|
|
150
|
+
await asyncio.sleep(10.0)
|
|
151
|
+
|
|
152
|
+
post_selectors = [
|
|
153
|
+
'[data-view-name="feed-full-update"]',
|
|
154
|
+
'[role="listitem"]',
|
|
155
|
+
'.reusable-search__result-container',
|
|
156
|
+
'.search-results-container [role="listitem"]',
|
|
157
|
+
'.feed-shared-update-v2',
|
|
158
|
+
'.occludable-update'
|
|
159
|
+
]
|
|
160
|
+
content_selectors = [
|
|
161
|
+
'[data-view-name="feed-commentary"]',
|
|
162
|
+
'[data-testid="expandable-text-box"]',
|
|
163
|
+
'.feed-shared-update-v2__description',
|
|
164
|
+
'.update-components-text-view'
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
max_scrolls = (limit // 3) + 3
|
|
168
|
+
consecutive_empty_scrolls = 0
|
|
169
|
+
|
|
170
|
+
for i in range(max_scrolls):
|
|
171
|
+
count_before = len(results)
|
|
172
|
+
for post_selector in post_selectors:
|
|
173
|
+
js_code = r"""(selector) => {
|
|
174
|
+
const posts = [];
|
|
175
|
+
const items = document.querySelectorAll(selector);
|
|
176
|
+
|
|
177
|
+
// Sequential scroll for each item to ensure hydration
|
|
178
|
+
// We only scroll the most recent items to trigger lazy loading
|
|
179
|
+
const startIndex = Math.max(0, items.length - 12);
|
|
180
|
+
for (let i = startIndex; i < items.length; i++) {
|
|
181
|
+
try {
|
|
182
|
+
items[i].scrollIntoView({block: 'center'});
|
|
183
|
+
} catch(e) {}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
for (const item of items) {
|
|
187
|
+
try {
|
|
188
|
+
const seeMore = item.querySelector('.feed-shared-inline-show-more-text__button');
|
|
189
|
+
if (seeMore) seeMore.click();
|
|
190
|
+
} catch(e) {}
|
|
191
|
+
|
|
192
|
+
// Try standard content selectors
|
|
193
|
+
let content = "";
|
|
194
|
+
const contentSels = [
|
|
195
|
+
'[data-view-name="feed-commentary"]',
|
|
196
|
+
'[data-testid="expandable-text-box"]',
|
|
197
|
+
'.feed-shared-update-v2__description',
|
|
198
|
+
'.update-components-text-view',
|
|
199
|
+
'.feed-shared-text',
|
|
200
|
+
'.feed-shared-update-v2__commentary'
|
|
201
|
+
];
|
|
202
|
+
for (const sel of contentSels) {
|
|
203
|
+
const el = item.querySelector(sel);
|
|
204
|
+
if (el && el.innerText.trim().length > 10) {
|
|
205
|
+
content = el.innerText.trim();
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (!content || content.length < 15) {
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
let sanitizedContent = content
|
|
215
|
+
.replace(/[\r\n]+/g, ' ')
|
|
216
|
+
.replace(/"/g, "'")
|
|
217
|
+
.replace(String.fromCharCode(92), '/')
|
|
218
|
+
.trim();
|
|
219
|
+
|
|
220
|
+
if (sanitizedContent.length > 1500) {
|
|
221
|
+
sanitizedContent = sanitizedContent.substring(0, 1500) + "... [TRUNCATED FOR AGENT]";
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
let name = "Unknown";
|
|
225
|
+
let profile = "";
|
|
226
|
+
let title = "LinkedIn User";
|
|
227
|
+
|
|
228
|
+
const links = Array.from(item.querySelectorAll('a[href*="/in/"]'));
|
|
229
|
+
for (const a of links) {
|
|
230
|
+
if (a.getAttribute('data-view-name') === 'feed-actor-image') continue;
|
|
231
|
+
const textLines = a.innerText.split('\n').map(l => l.trim()).filter(l => l);
|
|
232
|
+
if (textLines.length > 0) {
|
|
233
|
+
name = textLines[0].split('•')[0].trim();
|
|
234
|
+
if (name === "LinkedIn Member") {
|
|
235
|
+
// Try another link or fallback
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
profile = a.href;
|
|
239
|
+
if (textLines.length > 1) title = textLines[1].split('•')[0].trim();
|
|
240
|
+
break;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Fallback name if still unknown or "LinkedIn Member"
|
|
245
|
+
if (name === "Unknown" || name === "LinkedIn Member" || name.length < 2) {
|
|
246
|
+
const nameEl = item.querySelector('[data-view-name="feed-actor-name"], .update-components-actor__name, .hoverable-link-text');
|
|
247
|
+
if (nameEl) {
|
|
248
|
+
let foundName = (nameEl.getAttribute('title') || nameEl.innerText).split('•')[0].trim();
|
|
249
|
+
if (foundName && foundName.length > 1) name = foundName;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (title === "LinkedIn User") {
|
|
254
|
+
const titleEl = item.querySelector('.update-components-actor__description, .update-components-actor__headline, .t-14.t-black--light.t-normal');
|
|
255
|
+
if (titleEl) title = titleEl.innerText.trim();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
let postLink = "";
|
|
259
|
+
const pLinkEl = item.querySelector('a[href*="/feed/update/urn:li:"]');
|
|
260
|
+
if (pLinkEl) postLink = pLinkEl.href;
|
|
261
|
+
|
|
262
|
+
posts.push({
|
|
263
|
+
content: sanitizedContent,
|
|
264
|
+
author: {
|
|
265
|
+
name: name.replace(/"/g, "'").replace(String.fromCharCode(92), '/'),
|
|
266
|
+
title: title.replace(/[\r\n]+/g, ' ').replace(/"/g, "'").replace(String.fromCharCode(92), '/').trim(),
|
|
267
|
+
profile_link: profile
|
|
268
|
+
},
|
|
269
|
+
post_link: postLink
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
return posts;
|
|
273
|
+
}"""
|
|
274
|
+
|
|
275
|
+
raw_posts = await bm.page.evaluate(js_code, post_selector)
|
|
276
|
+
|
|
277
|
+
if not raw_posts: continue
|
|
278
|
+
|
|
279
|
+
tool_log(f"Extracted {len(raw_posts)} items via selector '{post_selector}'")
|
|
280
|
+
for post_data in raw_posts:
|
|
281
|
+
author_name = post_data["author"]["name"]
|
|
282
|
+
content_text = post_data["content"]
|
|
283
|
+
|
|
284
|
+
unique_key = f"{author_name}:{content_text[:100]}"
|
|
285
|
+
if unique_key not in all_content:
|
|
286
|
+
# Log more clearly for the user ONLY for new posts
|
|
287
|
+
content_preview = content_text[:60].replace('\n', ' ') + "..."
|
|
288
|
+
print(f" [LinkedIn] Found: {author_name} - {content_preview}")
|
|
289
|
+
|
|
290
|
+
all_content.add(unique_key)
|
|
291
|
+
results.append(post_data)
|
|
292
|
+
if fw:
|
|
293
|
+
fw.event_bus.emit("scraper:post_discovered", {"post": post_data})
|
|
294
|
+
fw.event_bus.emit("tool:log", {"message": f"Extracted: {author_name}"})
|
|
295
|
+
|
|
296
|
+
# If we found posts with one selector, don't try others in this scroll
|
|
297
|
+
break
|
|
298
|
+
|
|
299
|
+
new_posts = len(results) - count_before
|
|
300
|
+
if new_posts == 0:
|
|
301
|
+
consecutive_empty_scrolls += 1
|
|
302
|
+
else:
|
|
303
|
+
consecutive_empty_scrolls = 0
|
|
304
|
+
|
|
305
|
+
if len(results) >= limit: break
|
|
306
|
+
|
|
307
|
+
if consecutive_empty_scrolls >= 2 and len(results) == 0 and i > 0:
|
|
308
|
+
# Debug Check: If we found nothing after scrolling twice, capture HTML
|
|
309
|
+
tool_log("WARNING: Found 0 results after 2 scrolls. Capturing debug HTML...")
|
|
310
|
+
debug_path = f"debug_search_fail_{int(asyncio.get_event_loop().time())}.html"
|
|
311
|
+
try:
|
|
312
|
+
html = await bm.page.content()
|
|
313
|
+
with open(debug_path, "w") as f: f.write(html)
|
|
314
|
+
tool_log(f"Debug HTML saved to: {debug_path}")
|
|
315
|
+
except: pass
|
|
316
|
+
|
|
317
|
+
if consecutive_empty_scrolls >= 3:
|
|
318
|
+
tool_log(f"Stopping search: 3 consecutive scrolls found no new posts.")
|
|
319
|
+
break
|
|
320
|
+
|
|
321
|
+
tool_log(f"Scroll {i+1}: Found {new_posts} new posts. Total: {len(results)}")
|
|
322
|
+
|
|
323
|
+
# More persistent scroll jump
|
|
324
|
+
await bm.page.evaluate('''() => {
|
|
325
|
+
window.scrollBy(0, 1500);
|
|
326
|
+
const main = document.querySelector('.scaffold-layout__main') || document.querySelector('main');
|
|
327
|
+
if (main) main.scrollBy(0, 1500);
|
|
328
|
+
const lazyColumn = document.querySelector('[data-testid="lazy-column"]');
|
|
329
|
+
if (lazyColumn) lazyColumn.scrollBy(0, 1500);
|
|
330
|
+
}''')
|
|
331
|
+
|
|
332
|
+
# Use PageDown multiple times
|
|
333
|
+
for _ in range(3):
|
|
334
|
+
await bm.page.keyboard.press("PageDown")
|
|
335
|
+
await asyncio.sleep(0.5)
|
|
336
|
+
|
|
337
|
+
await asyncio.sleep(random.uniform(5.0, 8.0))
|
|
338
|
+
|
|
339
|
+
if new_posts == 0:
|
|
340
|
+
# If still no results, maybe it's the very bottom or a slow load
|
|
341
|
+
await bm.page.keyboard.press("End")
|
|
342
|
+
await asyncio.sleep(3)
|
|
343
|
+
|
|
344
|
+
tool_log(f"Completed search. Found {len(results)} posts.")
|
|
345
|
+
return results[:limit]
|
|
346
|
+
|
|
347
|
+
async def get_linkedin_profile_details(profile_url: str = None, session_path: str = "linkedin_session.json", **kwargs) -> Dict:
|
|
348
|
+
"""Extract detailed information from a LinkedIn profile. Requires 'profile_url'."""
|
|
349
|
+
fw = kwargs.get('framework')
|
|
350
|
+
def tool_log(msg):
|
|
351
|
+
if fw: fw.event_bus.emit("tool:log", {"agent": "LinkedInTool", "message": msg})
|
|
352
|
+
else: print(f" [Tool] {msg}")
|
|
353
|
+
|
|
354
|
+
# Resilience: If agent uses 'query' or other names instead of 'profile_url'
|
|
355
|
+
if not profile_url:
|
|
356
|
+
profile_url = kwargs.get('query') or kwargs.get('url') or kwargs.get('profile') or kwargs.get('link')
|
|
357
|
+
|
|
358
|
+
if not profile_url or not str(profile_url).startswith('http'):
|
|
359
|
+
return {"error": "INVALID_URL", "message": f"'{profile_url}' is not a valid LinkedIn URL. Please provide a full URL."}
|
|
360
|
+
|
|
361
|
+
tool_log(f"Visiting profile: {profile_url}")
|
|
362
|
+
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
363
|
+
async with BrowserManager(headless=True, user_agent=ua) as bm:
|
|
364
|
+
active_session = _resolve_session_path(session_path)
|
|
365
|
+
if active_session: await bm.load_session(active_session)
|
|
366
|
+
else: return {"error": "SESSION_MISSING", "message": "Session file not found."}
|
|
367
|
+
|
|
368
|
+
await bm.page.goto(profile_url, wait_until="domcontentloaded")
|
|
369
|
+
await asyncio.sleep(5)
|
|
370
|
+
|
|
371
|
+
details = {"name": "Unknown", "headline": "Unknown", "about": "", "experience": []}
|
|
372
|
+
try:
|
|
373
|
+
name_el = bm.page.locator('h1.text-heading-xlarge').first
|
|
374
|
+
if await name_el.count() > 0: details["name"] = (await name_el.inner_text()).strip()
|
|
375
|
+
headline_el = bm.page.locator('.text-body-medium.break-words').first
|
|
376
|
+
if await headline_el.count() > 0: details["headline"] = (await headline_el.inner_text()).strip()
|
|
377
|
+
about_section = bm.page.locator('#about').locator('..').locator('.display-flex.ph5.pv3').first
|
|
378
|
+
if await about_section.count() > 0: details["about"] = (await about_section.inner_text()).strip()
|
|
379
|
+
exp_items = await bm.page.locator('.experience-group-positions, .pvs-list__paged-list-item').all()
|
|
380
|
+
for item in exp_items[:3]:
|
|
381
|
+
text = await item.inner_text()
|
|
382
|
+
if text.strip(): details["experience"].append(text.strip().replace('\n', ' | '))
|
|
383
|
+
except Exception as e:
|
|
384
|
+
tool_log(f"Error scraping profile: {e}")
|
|
385
|
+
details["error"] = str(e)
|
|
386
|
+
return details
|
|
387
|
+
|
|
388
|
+
async def get_linkedin_company_details(company_url: str = None, session_path: str = "linkedin_session.json", **kwargs) -> Dict:
|
|
389
|
+
"""Extract detailed information from a LinkedIn Company page. Requires 'company_url'."""
|
|
390
|
+
fw = kwargs.get('framework')
|
|
391
|
+
def tool_log(msg):
|
|
392
|
+
if fw: fw.event_bus.emit("tool:log", {"agent": "LinkedInTool", "message": msg})
|
|
393
|
+
else: print(f" [Tool] {msg}")
|
|
394
|
+
|
|
395
|
+
# Resilience: If agent uses 'query' or other names instead of 'company_url'
|
|
396
|
+
if not company_url:
|
|
397
|
+
company_url = kwargs.get('query') or kwargs.get('url') or kwargs.get('company') or kwargs.get('link')
|
|
398
|
+
|
|
399
|
+
if not company_url or not str(company_url).startswith('http'):
|
|
400
|
+
return {"error": "INVALID_URL", "message": f"'{company_url}' is not a valid LinkedIn URL. Please provide a full URL."}
|
|
401
|
+
|
|
402
|
+
if "/about" not in company_url: company_url = company_url.rstrip('/') + "/about/"
|
|
403
|
+
tool_log(f"Visiting company: {company_url}")
|
|
404
|
+
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
405
|
+
async with BrowserManager(headless=True, user_agent=ua) as bm:
|
|
406
|
+
active_session = _resolve_session_path(session_path)
|
|
407
|
+
if active_session: await bm.load_session(active_session)
|
|
408
|
+
else: return {"error": "SESSION_MISSING", "message": "Session file not found."}
|
|
409
|
+
|
|
410
|
+
await bm.page.goto(company_url, wait_until="domcontentloaded")
|
|
411
|
+
await asyncio.sleep(5)
|
|
412
|
+
|
|
413
|
+
details = {"name": "Unknown", "industry": "Unknown", "size": "Unknown", "description": ""}
|
|
414
|
+
try:
|
|
415
|
+
name_el = bm.page.locator('h1.org-top-card-summary__title').first
|
|
416
|
+
if await name_el.count() > 0: details["name"] = (await name_el.inner_text()).strip()
|
|
417
|
+
desc_el = bm.page.locator('.org-about-us-organization-description__text').first
|
|
418
|
+
if await desc_el.count() > 0: details["description"] = (await desc_el.inner_text()).strip()
|
|
419
|
+
grid_items = await bm.page.locator('.org-page-details__definition-term').all()
|
|
420
|
+
for i, item in enumerate(grid_items):
|
|
421
|
+
term = (await item.inner_text()).lower()
|
|
422
|
+
value_el = bm.page.locator('.org-page-details__definition-text').nth(i)
|
|
423
|
+
if "industry" in term: details["industry"] = (await value_el.inner_text()).strip()
|
|
424
|
+
elif "company size" in term: details["size"] = (await value_el.inner_text()).strip()
|
|
425
|
+
except Exception as e:
|
|
426
|
+
tool_log(f"Error scraping company: {e}")
|
|
427
|
+
details["error"] = str(e)
|
|
428
|
+
return details
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web Search Tool - Generic wrapper for web search.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
def web_search(query: str):
|
|
8
|
+
"""
|
|
9
|
+
Search the web for information.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
query: Search query
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Dict with search results.
|
|
16
|
+
"""
|
|
17
|
+
# Placeholder for real search API (e.g. Tavily, Serper, etc.)
|
|
18
|
+
# For now, providing a mock implementation for demonstration.
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
'success': True,
|
|
22
|
+
'query': query,
|
|
23
|
+
'results': [
|
|
24
|
+
{
|
|
25
|
+
'title': f'Result for {query}',
|
|
26
|
+
'snippet': f'This is a mock search result for the query: {query}',
|
|
27
|
+
'url': 'https://example.com'
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MCP (Model Context Protocol) Server Implementations
|
|
3
|
+
|
|
4
|
+
This package contains MCP server implementations for external integrations.
|
|
5
|
+
MCP servers provide standardized interfaces to external services that can be
|
|
6
|
+
used by AI agents through the Kite framework.
|
|
7
|
+
|
|
8
|
+
Available MCP Servers:
|
|
9
|
+
- SlackMCPServer: Slack workspace integration
|
|
10
|
+
- StripeMCPServer: Stripe payment processing
|
|
11
|
+
- GmailMCPServer: Gmail email management
|
|
12
|
+
- GDriveMCPServer: Google Drive file management
|
|
13
|
+
- PostgresMCPServer: PostgreSQL database access
|
|
14
|
+
- DatabaseMCP: Multi-database connector (SQLite, MySQL, Redis, Neo4j, etc.)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .slack_mcp_server import SlackMCPServer
|
|
18
|
+
from .stripe_mcp_server import StripeMCPServer
|
|
19
|
+
from .gmail_mcp_server import GmailMCPServer
|
|
20
|
+
from .gdrive_mcp_server import GDriveMCPServer
|
|
21
|
+
from .postgres_mcp_server import PostgresMCPServer
|
|
22
|
+
from .database_mcp import DatabaseMCP
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
'SlackMCPServer',
|
|
26
|
+
'StripeMCPServer',
|
|
27
|
+
'GmailMCPServer',
|
|
28
|
+
'GDriveMCPServer',
|
|
29
|
+
'PostgresMCPServer',
|
|
30
|
+
'DatabaseMCP'
|
|
31
|
+
]
|