PyPI - pipulate - Versions diffs - 1.0.0__py3-none-any.whl - Mend

pipulate 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

browser_automation/google_search_automation_demo.py +294 -0
browser_automation/google_search_example.py +294 -0
browser_automation/interactive_google_search.py +314 -0
browser_automation/temp_scripts/ai_seo_search.py +109 -0
browser_automation/temp_scripts/ai_seo_search_stealth.py +273 -0
browser_automation/temp_scripts/enhanced_stealth_demo.py +261 -0
cli.py +141 -0
client/api.py +1 -0
client/ariat/robots/analyze.py +860 -0
client/client.py +40 -0
client/customizations.py +76 -0
client/deltagalil/404_buster.py +8 -0
client/deltagalil/url_status_checker.py +180 -0
client/deltagalil/view_results.py +52 -0
pipulate-1.0.0.dist-info/METADATA +1287 -0
pipulate-1.0.0.dist-info/RECORD +20 -0
pipulate-1.0.0.dist-info/WHEEL +5 -0
pipulate-1.0.0.dist-info/entry_points.txt +2 -0
pipulate-1.0.0.dist-info/licenses/LICENSE +7 -0
pipulate-1.0.0.dist-info/top_level.txt +3 -0

browser_automation/google_search_automation_demo.py ADDED Viewed

@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+"""
+🔍 GOOGLE SEARCH AUTOMATION DEMO - 2ND STAGE AUTOMATION
+This demonstrates the complete 2nd-stage automation process:
+1. Capture Google.com with redirect chain analysis
+2. Clean and analyze DOM structure
+3. Identify automation targets
+4. Execute search automation
+5. Extract and analyze results
+This is the "success guaranteed moment" - taking it all the way to actual search execution.
+"""
+import asyncio
+import json
+import os
+import sys
+from typing import Dict, List
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.options import Options
+from seleniumwire import webdriver as wire_webdriver
+import tempfile
+import time
+# Add current directory to path for imports
+sys.path.append('.')
+from server import _browser_scrape_page, _browser_interact_with_current_page
+from helpers.dom_processing.enhanced_dom_processor import EnhancedDOMProcessor
+class GoogleSearchAutomationDemo:
+    """Complete Google search automation demonstration"""
+    def __init__(self):
+        self.processor = EnhancedDOMProcessor()
+        self.looking_at_dir = "browser_automation/looking_at"
+        self.search_results = []
+        self.redirect_chain = []
+    async def step_1_capture_google(self) -> Dict:
+        """Step 1: Capture Google.com with full redirect chain analysis"""
+        print("🔍 STEP 1: Capturing Google.com...")
+        result = await _browser_scrape_page({
+            'url': 'https://google.com',
+            'wait_seconds': 5,
+            'take_screenshot': True,
+            'update_looking_at': True
+        })
+        if result['success']:
+            print(f"✅ Captured: {result['page_info']['title']}")
+            print(f"🌐 Final URL: {result['page_info']['url']}")
+            print(f"📄 Files created: {len(result['looking_at_files'])}")
+            # Check for redirect chain in headers
+            headers_path = os.path.join(self.looking_at_dir, 'headers.json')
+            if os.path.exists(headers_path):
+                with open(headers_path, 'r') as f:
+                    headers_data = json.load(f)
+                    print(f"📊 Redirect analysis: {headers_data.get('url', 'Unknown')}")
+        return result
+    def step_2_clean_and_analyze(self) -> Dict:
+        """Step 2: Clean DOM and analyze for automation targets"""
+        print("\\n🔧 STEP 2: Cleaning DOM and analyzing automation targets...")
+        results = self.processor.process_looking_at_directory(self.looking_at_dir)
+        print(f"📁 Files processed: {len(results['files_processed'])}")
+        print(f"🎯 Automation ready: {'✅ YES' if results['automation_ready'] else '❌ NO'}")
+        print(f"🌐 Strategy: {results['automation_hints'].get('automation_strategy', 'unknown')}")
+        if results['google_targets'].get('search_box'):
+            search_box = results['google_targets']['search_box']
+            print(f"🔍 Search box found: {search_box['css_selector']}")
+            print(f"📍 XPath: {search_box['xpath']}")
+            print(f"⭐ Priority: {search_box['priority']}")
+        return results
+    def step_3_verify_targets(self) -> Dict:
+        """Step 3: Verify automation targets with grep and regex"""
+        print("\\n🔍 STEP 3: Verifying automation targets...")
+        verification_results = {
+            'search_box_found': False,
+            'search_button_found': False,
+            'grep_results': {},
+            'file_sizes': {}
+        }
+        # Check file sizes
+        for filename in ['simple_dom.html', 'simple_dom_cleaned.html', 'beautiful_dom.html']:
+            filepath = os.path.join(self.looking_at_dir, filename)
+            if os.path.exists(filepath):
+                size = os.path.getsize(filepath)
+                verification_results['file_sizes'][filename] = size
+                print(f"📄 {filename}: {size:,} bytes")
+        # Test grep targets
+        grep_targets = ['name="q"', 'btnK', 'aria-label="Search"']
+        cleaned_dom_path = os.path.join(self.looking_at_dir, 'simple_dom_cleaned.html')
+        if os.path.exists(cleaned_dom_path):
+            with open(cleaned_dom_path, 'r') as f:
+                content = f.read()
+            for target in grep_targets:
+                if target in content:
+                    verification_results['grep_results'][target] = True
+                    print(f"✅ Found: {target}")
+                else:
+                    verification_results['grep_results'][target] = False
+                    print(f"❌ Missing: {target}")
+        # Check specific elements
+        if 'name="q"' in verification_results['grep_results'] and verification_results['grep_results']['name="q"']:
+            verification_results['search_box_found'] = True
+        return verification_results
+    async def step_4_execute_search(self, query: str = "AI automation tools") -> Dict:
+        """Step 4: Execute actual Google search automation"""
+        print(f"\\n🚀 STEP 4: Executing Google search for '{query}'...")
+        search_results = {
+            'query': query,
+            'success': False,
+            'results_found': 0,
+            'search_time': 0,
+            'error': None
+        }
+        try:
+            start_time = time.time()
+            # Use browser_interact_with_current_page to type in search box
+            type_result = await _browser_interact_with_current_page({
+                'action': 'type',
+                'selector': 'textarea[name="q"]',
+                'selector_type': 'css',
+                'text': query
+            })
+            if type_result['success']:
+                print(f"✅ Typed query: {query}")
+                # Press Enter to search
+                search_result = await _browser_interact_with_current_page({
+                    'action': 'key',
+                    'selector': 'textarea[name="q"]',
+                    'selector_type': 'css',
+                    'key': 'ENTER'
+                })
+                if search_result['success']:
+                    print("✅ Search submitted")
+                    # Wait a moment for results to load
+                    await asyncio.sleep(3)
+                    # Take screenshot of results
+                    screenshot_result = await _browser_interact_with_current_page({
+                        'action': 'screenshot'
+                    })
+                    if screenshot_result['success']:
+                        print("✅ Results screenshot captured")
+                        search_results['success'] = True
+                        search_results['search_time'] = time.time() - start_time
+                        # Try to count results (this would need DOM analysis)
+                        # For now, we'll just mark as successful
+                        search_results['results_found'] = 10  # Placeholder
+                else:
+                    search_results['error'] = "Failed to submit search"
+            else:
+                search_results['error'] = "Failed to type in search box"
+        except Exception as e:
+            search_results['error'] = str(e)
+            print(f"❌ Search failed: {e}")
+        return search_results
+    def step_5_analyze_results(self) -> Dict:
+        """Step 5: Analyze search results and extract data"""
+        print("\\n📊 STEP 5: Analyzing search results...")
+        analysis = {
+            'results_extracted': 0,
+            'links_found': 0,
+            'titles_found': 0,
+            'snippets_found': 0
+        }
+        # This would involve parsing the results page DOM
+        # For now, we'll simulate the analysis
+        print("📈 Results analysis would extract:")
+        print("  - Search result titles")
+        print("  - URLs and snippets")
+        print("  - Related searches")
+        print("  - Search statistics")
+        return analysis
+    async def run_complete_demo(self, search_query: str = "AI automation tools") -> Dict:
+        """Run the complete Google search automation demo"""
+        print("🎯 GOOGLE SEARCH AUTOMATION DEMO - COMPLETE PIPELINE")
+        print("=" * 60)
+        demo_results = {
+            'steps_completed': 0,
+            'total_time': 0,
+            'success': False,
+            'step_results': {}
+        }
+        start_time = time.time()
+        try:
+            # Step 1: Capture Google
+            step1_result = await self.step_1_capture_google()
+            demo_results['step_results']['capture'] = step1_result
+            demo_results['steps_completed'] = 1
+            # Step 2: Clean and analyze
+            step2_result = self.step_2_clean_and_analyze()
+            demo_results['step_results']['analyze'] = step2_result
+            demo_results['steps_completed'] = 2
+            # Step 3: Verify targets
+            step3_result = self.step_3_verify_targets()
+            demo_results['step_results']['verify'] = step3_result
+            demo_results['steps_completed'] = 3
+            # Step 4: Execute search (only if verification passed)
+            if step3_result['search_box_found']:
+                step4_result = await self.step_4_execute_search(search_query)
+                demo_results['step_results']['search'] = step4_result
+                demo_results['steps_completed'] = 4
+                # Step 5: Analyze results
+                if step4_result['success']:
+                    step5_result = self.step_5_analyze_results()
+                    demo_results['step_results']['analyze_results'] = step5_result
+                    demo_results['steps_completed'] = 5
+                    demo_results['success'] = True
+            demo_results['total_time'] = time.time() - start_time
+        except Exception as e:
+            print(f"❌ Demo failed at step {demo_results['steps_completed']}: {e}")
+            demo_results['error'] = str(e)
+        # Summary
+        print("\\n🎯 DEMO SUMMARY")
+        print("=" * 30)
+        print(f"Steps completed: {demo_results['steps_completed']}/5")
+        print(f"Total time: {demo_results['total_time']:.2f}s")
+        print(f"Success: {'✅ YES' if demo_results['success'] else '❌ NO'}")
+        return demo_results
+async def main():
+    """Main demo function"""
+    demo = GoogleSearchAutomationDemo()
+    # Run the complete demo
+    results = await demo.run_complete_demo("AI automation tools")
+    # Save results
+    results_file = "browser_automation/looking_at/demo_results.json"
+    with open(results_file, 'w') as f:
+        # Convert any non-serializable objects to strings
+        serializable_results = json.loads(json.dumps(results, default=str))
+        json.dump(serializable_results, f, indent=2)
+    print(f"\\n📄 Demo results saved to: {results_file}")
+    return results
+if __name__ == "__main__":
+    asyncio.run(main())

browser_automation/google_search_example.py ADDED Viewed

@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+"""
+🔍 GOOGLE SEARCH AUTOMATION EXAMPLE
+Demonstrates the power of the AI DOM Beautifier and Automation Registry
+for real-world browser automation tasks.
+This example shows how an AI assistant can:
+1. Capture and beautify Google's search page
+2. Build a comprehensive automation registry
+3. Perform searches using multiple selector strategies
+4. Extract and analyze search results
+Usage:
+    python google_search_example.py "AI automation tools"
+"""
+import sys
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from helpers.dom_processing.ai_dom_beautifier import AIDOMBeautifier
+class GoogleSearchAutomator:
+    """AI-powered Google search automation using DOM beautification"""
+    def __init__(self):
+        self.driver = None
+        self.beautifier = AIDOMBeautifier()
+        self.automation_registry = []
+    def setup_driver(self):
+        """Set up Chrome driver with optimal settings"""
+        chrome_options = Options()
+        chrome_options.add_argument('--no-sandbox')
+        chrome_options.add_argument('--disable-dev-shm-usage')
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--window-size=1920,1080')
+        self.driver = webdriver.Chrome(options=chrome_options)
+        return self.driver
+    def capture_and_beautify_page(self, url: str, save_prefix: str = "google"):
+        """Capture page and create beautiful DOM with automation registry"""
+        print(f"🌐 Navigating to: {url}")
+        self.driver.get(url)
+        time.sleep(2)
+        # Get page source after JavaScript execution
+        dom_html = self.driver.execute_script("return document.documentElement.outerHTML;")
+        print("🎨 Creating beautiful DOM and automation registry...")
+        beautiful_dom, self.automation_registry = self.beautifier.beautify_dom(dom_html)
+        # Save files to proper looking_at directory
+        import os
+        looking_at_dir = 'looking_at'
+        os.makedirs(looking_at_dir, exist_ok=True)
+        with open(f"{looking_at_dir}/{save_prefix}_beautiful_dom.html", 'w', encoding='utf-8') as f:
+            f.write(beautiful_dom)
+        with open(f"{looking_at_dir}/{save_prefix}_automation_registry.json", 'w', encoding='utf-8') as f:
+            f.write(self.beautifier.export_automation_registry('json'))
+        with open(f"{looking_at_dir}/{save_prefix}_automation_targets.py", 'w', encoding='utf-8') as f:
+            f.write(self.beautifier._export_python_registry())
+        with open(f"{looking_at_dir}/{save_prefix}_automation_summary.txt", 'w', encoding='utf-8') as f:
+            f.write(self.beautifier._export_summary())
+        print(f"📊 Found {len(self.automation_registry)} automation targets")
+        high_priority = [t for t in self.automation_registry if t.priority_score >= 70]
+        print(f"🎯 High priority targets: {len(high_priority)}")
+        return beautiful_dom, self.automation_registry
+    def find_search_box(self):
+        """Find Google search box using multiple strategies"""
+        print("🔍 Looking for Google search box...")
+        # Strategy 1: Try common Google search selectors
+        search_selectors = [
+            ('name', 'q'),  # Most reliable for Google
+            ('css', 'input[name="q"]'),
+            ('css', 'input[type="search"]'),
+            ('css', 'textarea[name="q"]'),  # Google sometimes uses textarea
+        ]
+        for selector_type, selector in search_selectors:
+            try:
+                if selector_type == 'name':
+                    element = self.driver.find_element(By.NAME, selector)
+                elif selector_type == 'css':
+                    element = self.driver.find_element(By.CSS_SELECTOR, selector)
+                print(f"✅ Found search box using {selector_type}: {selector}")
+                return element
+            except:
+                continue
+        # Strategy 2: Use automation registry if available
+        for target in self.automation_registry:
+            if target.tag == 'input' and target.priority_score >= 70:
+                try:
+                    if 'search' in target.text.lower() or target.automation_attributes.get('name') == 'q':
+                        element = self.driver.find_element(By.CSS_SELECTOR, target.css_selector.split(' > ')[-1])
+                        print(f"✅ Found search box using automation registry: {target.css_selector}")
+                        return element
+                except:
+                    continue
+        raise Exception("❌ Could not find Google search box")
+    def find_search_button(self):
+        """Find Google search button using multiple strategies"""
+        print("🔍 Looking for Google search button...")
+        # Strategy 1: Try common Google search button selectors
+        button_selectors = [
+            ('css', 'input[name="btnK"]'),  # Google Search button
+            ('css', 'input[value*="Google Search"]'),
+            ('css', 'input[value*="Search"]'),
+            ('css', 'button[type="submit"]'),
+        ]
+        for selector_type, selector in button_selectors:
+            try:
+                element = self.driver.find_element(By.CSS_SELECTOR, selector)
+                print(f"✅ Found search button using {selector_type}: {selector}")
+                return element
+            except:
+                continue
+        # Strategy 2: Use automation registry
+        for target in self.automation_registry:
+            if target.tag in ['button', 'input'] and target.priority_score >= 50:
+                if 'search' in target.text.lower() or 'submit' in target.automation_attributes.get('type', ''):
+                    try:
+                        element = self.driver.find_element(By.CSS_SELECTOR, target.css_selector.split(' > ')[-1])
+                        print(f"✅ Found search button using automation registry: {target.css_selector}")
+                        return element
+                    except:
+                        continue
+        print("⚠️ Could not find search button, will use Enter key instead")
+        return None
+    def perform_search(self, query: str):
+        """Perform Google search using AI-identified elements"""
+        print(f"🔍 Performing search for: '{query}'")
+        # Find and interact with search box
+        search_box = self.find_search_box()
+        search_box.clear()
+        search_box.send_keys(query)
+        # Try to find and click search button, or use Enter
+        search_button = self.find_search_button()
+        if search_button:
+            search_button.click()
+        else:
+            from selenium.webdriver.common.keys import Keys
+            search_box.send_keys(Keys.RETURN)
+        # Wait for results to load
+        print("⏳ Waiting for search results...")
+        WebDriverWait(self.driver, 10).until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, "#search"))
+        )
+        print("✅ Search completed successfully!")
+    def analyze_search_results(self):
+        """Analyze search results using DOM beautification"""
+        print("📊 Analyzing search results...")
+        # Capture and beautify results page
+        beautiful_dom, results_registry = self.capture_and_beautify_page(
+            self.driver.current_url,
+            "google_results"
+        )
+        # Find result elements using automation registry
+        result_targets = []
+        for target in results_registry:
+            # Look for likely search result elements
+            if (target.tag in ['div', 'a', 'h3'] and
+                target.priority_score >= 30 and
+                len(target.text) > 10):
+                result_targets.append(target)
+        print(f"🎯 Found {len(result_targets)} potential result elements")
+        # Extract actual search results
+        results = []
+        try:
+            # Common Google result selectors
+            result_elements = self.driver.find_elements(By.CSS_SELECTOR, "div.g")
+            for i, element in enumerate(result_elements[:5]):  # Top 5 results
+                try:
+                    title_elem = element.find_element(By.CSS_SELECTOR, "h3")
+                    link_elem = element.find_element(By.CSS_SELECTOR, "a")
+                    result = {
+                        'position': i + 1,
+                        'title': title_elem.text,
+                        'url': link_elem.get_attribute('href'),
+                        'snippet': ''
+                    }
+                    # Try to get snippet
+                    try:
+                        snippet_elem = element.find_element(By.CSS_SELECTOR, "span")
+                        result['snippet'] = snippet_elem.text[:200] + "..." if len(snippet_elem.text) > 200 else snippet_elem.text
+                    except:
+                        pass
+                    results.append(result)
+                except Exception as e:
+                    continue
+        except Exception as e:
+            print(f"⚠️ Error extracting results: {e}")
+        return results
+    def demonstrate_ai_automation(self, search_query: str):
+        """Complete demonstration of AI-powered automation"""
+        print("🤖 AI GOOGLE SEARCH AUTOMATION DEMO")
+        print("=" * 50)
+        try:
+            # 1. Set up browser
+            self.setup_driver()
+            # 2. Navigate to Google and analyze page
+            beautiful_dom, registry = self.capture_and_beautify_page("https://www.google.com", "google_homepage")
+            # 3. Perform search using AI-identified elements
+            self.perform_search(search_query)
+            # 4. Analyze results
+            results = self.analyze_search_results()
+            # 5. Display results
+            print("\n🎉 SEARCH RESULTS:")
+            print("=" * 30)
+            for result in results:
+                print(f"{result['position']}. {result['title']}")
+                print(f"   URL: {result['url']}")
+                if result['snippet']:
+                    print(f"   Snippet: {result['snippet']}")
+                print()
+            print(f"✅ Successfully automated Google search for '{search_query}'")
+            print(f"📊 Total automation targets identified: {len(self.automation_registry)}")
+        except Exception as e:
+            print(f"❌ Error during automation: {e}")
+        finally:
+            if self.driver:
+                self.driver.quit()
+                print("🔚 Browser closed")
+def main():
+    """Main function to run the Google search automation demo"""
+    search_query = sys.argv[1] if len(sys.argv) > 1 else "AI browser automation tools"
+    automator = GoogleSearchAutomator()
+    automator.demonstrate_ai_automation(search_query)
+    print("\n📁 Generated files in looking_at/ directory:")
+    print("- looking_at/google_homepage_beautiful_dom.html")
+    print("- looking_at/google_homepage_automation_registry.json")
+    print("- looking_at/google_homepage_automation_targets.py")
+    print("- looking_at/google_homepage_automation_summary.txt")
+    print("- looking_at/google_results_beautiful_dom.html")
+    print("- looking_at/google_results_automation_registry.json")
+    print("- looking_at/google_results_automation_targets.py")
+    print("- looking_at/google_results_automation_summary.txt")
+if __name__ == "__main__":
+    main()