moriarty-project 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moriarty/__init__.py +5 -0
- moriarty/adapters/__init__.py +0 -0
- moriarty/agent/__init__.py +0 -0
- moriarty/assets/modules/.gitkeep +0 -0
- moriarty/assets/modules/asia/douban.yaml +19 -0
- moriarty/assets/modules/asia/kakao.yaml +19 -0
- moriarty/assets/modules/asia/line.yaml +19 -0
- moriarty/assets/modules/asia/mixi.yaml +19 -0
- moriarty/assets/modules/asia/naver.yaml +19 -0
- moriarty/assets/modules/asia/qq.yaml +19 -0
- moriarty/assets/modules/asia/vk.yaml +19 -0
- moriarty/assets/modules/asia/wechat.yaml +19 -0
- moriarty/assets/modules/asia/weibo.yaml +19 -0
- moriarty/assets/modules/asia/xiaohongshu.yaml +19 -0
- moriarty/assets/modules/behance.yaml +47 -0
- moriarty/assets/modules/business/crunchbase.yaml +27 -0
- moriarty/assets/modules/business/fiverr.yaml +32 -0
- moriarty/assets/modules/business/freelancer.yaml +27 -0
- moriarty/assets/modules/business/glassdoor.yaml +27 -0
- moriarty/assets/modules/business/guru.yaml +26 -0
- moriarty/assets/modules/business/indeed.yaml +25 -0
- moriarty/assets/modules/business/monster.yaml +25 -0
- moriarty/assets/modules/business/peopleperhour.yaml +26 -0
- moriarty/assets/modules/business/toptal.yaml +28 -0
- moriarty/assets/modules/business/upwork.yaml +27 -0
- moriarty/assets/modules/business/ziprecruiter.yaml +25 -0
- moriarty/assets/modules/content/buymeacoffee.yaml +27 -0
- moriarty/assets/modules/content/gumroad.yaml +27 -0
- moriarty/assets/modules/content/ko-fi.yaml +32 -0
- moriarty/assets/modules/content/onlyfans.yaml +27 -0
- moriarty/assets/modules/content/patreon.yaml +33 -0
- moriarty/assets/modules/content/substack.yaml +32 -0
- moriarty/assets/modules/creative/500px.yaml +31 -0
- moriarty/assets/modules/creative/artstation.yaml +33 -0
- moriarty/assets/modules/creative/deviantart.yaml +32 -0
- moriarty/assets/modules/creative/flickr.yaml +31 -0
- moriarty/assets/modules/creative/pexels.yaml +26 -0
- moriarty/assets/modules/creative/unsplash.yaml +26 -0
- moriarty/assets/modules/creative/vimeo.yaml +31 -0
- moriarty/assets/modules/crypto/binance.yaml +27 -0
- moriarty/assets/modules/crypto/bitcointalk.yaml +33 -0
- moriarty/assets/modules/crypto/coinbase.yaml +26 -0
- moriarty/assets/modules/crypto/etherscan.yaml +32 -0
- moriarty/assets/modules/crypto/foundation.yaml +28 -0
- moriarty/assets/modules/crypto/kraken.yaml +27 -0
- moriarty/assets/modules/crypto/mirror.yaml +27 -0
- moriarty/assets/modules/crypto/niftygateway.yaml +26 -0
- moriarty/assets/modules/crypto/opensea.yaml +32 -0
- moriarty/assets/modules/crypto/rarible.yaml +27 -0
- moriarty/assets/modules/crypto/superrare.yaml +29 -0
- moriarty/assets/modules/dating/bumble.yaml +25 -0
- moriarty/assets/modules/dating/grindr.yaml +27 -0
- moriarty/assets/modules/dating/happn.yaml +25 -0
- moriarty/assets/modules/dating/her.yaml +27 -0
- moriarty/assets/modules/dating/hinge.yaml +25 -0
- moriarty/assets/modules/dating/match.yaml +25 -0
- moriarty/assets/modules/dating/meetme.yaml +27 -0
- moriarty/assets/modules/dating/okcupid.yaml +25 -0
- moriarty/assets/modules/dating/pof.yaml +25 -0
- moriarty/assets/modules/dating/tinder.yaml +25 -0
- moriarty/assets/modules/dating-nsfw/adultfriendfinder.yaml +28 -0
- moriarty/assets/modules/dating-nsfw/ashley-madison.yaml +26 -0
- moriarty/assets/modules/design/adobe-portfolio.yaml +27 -0
- moriarty/assets/modules/design/carbonmade.yaml +27 -0
- moriarty/assets/modules/design/cgsociety.yaml +27 -0
- moriarty/assets/modules/design/coroflot.yaml +27 -0
- moriarty/assets/modules/design/figma.yaml +27 -0
- moriarty/assets/modules/design/sketch.yaml +26 -0
- moriarty/assets/modules/dev/bitbucket.yaml +35 -0
- moriarty/assets/modules/dev/codeforces.yaml +32 -0
- moriarty/assets/modules/dev/codepen.yaml +34 -0
- moriarty/assets/modules/dev/hackerone.yaml +32 -0
- moriarty/assets/modules/dev/hackthebox.yaml +27 -0
- moriarty/assets/modules/dev/huggingface.yaml +27 -0
- moriarty/assets/modules/dev/kaggle.yaml +32 -0
- moriarty/assets/modules/dev/leetcode.yaml +32 -0
- moriarty/assets/modules/dev/replit.yaml +31 -0
- moriarty/assets/modules/dribbble.yaml +53 -0
- moriarty/assets/modules/ecommerce/etsy.yaml +32 -0
- moriarty/assets/modules/education/duolingo.yaml +32 -0
- moriarty/assets/modules/education/edx.yaml +26 -0
- moriarty/assets/modules/education/khanacademy.yaml +26 -0
- moriarty/assets/modules/education/lynda.yaml +27 -0
- moriarty/assets/modules/education/memrise.yaml +27 -0
- moriarty/assets/modules/education/pluralsight.yaml +27 -0
- moriarty/assets/modules/education/skillshare.yaml +27 -0
- moriarty/assets/modules/education/udacity.yaml +27 -0
- moriarty/assets/modules/email/github_email.yaml +40 -0
- moriarty/assets/modules/email/gravatar.yaml +23 -0
- moriarty/assets/modules/europe/badoo.yaml +19 -0
- moriarty/assets/modules/europe/lovoo.yaml +19 -0
- moriarty/assets/modules/europe/myspace.yaml +19 -0
- moriarty/assets/modules/europe/netlog.yaml +19 -0
- moriarty/assets/modules/europe/ok.yaml +19 -0
- moriarty/assets/modules/europe/skyrock.yaml +19 -0
- moriarty/assets/modules/europe/studivz.yaml +19 -0
- moriarty/assets/modules/europe/tuenti.yaml +19 -0
- moriarty/assets/modules/europe/viadeo.yaml +19 -0
- moriarty/assets/modules/europe/xing.yaml +19 -0
- moriarty/assets/modules/fitness/fitbit.yaml +27 -0
- moriarty/assets/modules/fitness/garmin.yaml +27 -0
- moriarty/assets/modules/fitness/myfitnesspal.yaml +27 -0
- moriarty/assets/modules/fitness/strava.yaml +33 -0
- moriarty/assets/modules/fitness/zwift.yaml +28 -0
- moriarty/assets/modules/food/allrecipes.yaml +27 -0
- moriarty/assets/modules/food/tasty.yaml +27 -0
- moriarty/assets/modules/food/yelp.yaml +32 -0
- moriarty/assets/modules/food/zomato.yaml +28 -0
- moriarty/assets/modules/forums/4chan.yaml +26 -0
- moriarty/assets/modules/forums/8kun.yaml +26 -0
- moriarty/assets/modules/forums/9gag.yaml +26 -0
- moriarty/assets/modules/forums/discourse.yaml +26 -0
- moriarty/assets/modules/forums/disqus.yaml +31 -0
- moriarty/assets/modules/forums/hackernews.yaml +32 -0
- moriarty/assets/modules/forums/launchpad.yaml +27 -0
- moriarty/assets/modules/forums/phpbb.yaml +25 -0
- moriarty/assets/modules/forums/quora.yaml +32 -0
- moriarty/assets/modules/forums/serverfault.yaml +27 -0
- moriarty/assets/modules/forums/slashdot.yaml +28 -0
- moriarty/assets/modules/forums/stackexchange.yaml +32 -0
- moriarty/assets/modules/forums/superuser.yaml +27 -0
- moriarty/assets/modules/forums/vbulletin.yaml +25 -0
- moriarty/assets/modules/forums/xenforo.yaml +25 -0
- moriarty/assets/modules/forums-nsfw/kiwifarms.yaml +25 -0
- moriarty/assets/modules/forums-nsfw/lolcow.yaml +26 -0
- moriarty/assets/modules/gaming/apextracker.yaml +27 -0
- moriarty/assets/modules/gaming/battlenet.yaml +26 -0
- moriarty/assets/modules/gaming/chess.yaml +30 -0
- moriarty/assets/modules/gaming/discord-public.yaml +27 -0
- moriarty/assets/modules/gaming/dotabuff.yaml +32 -0
- moriarty/assets/modules/gaming/epicgames.yaml +25 -0
- moriarty/assets/modules/gaming/faceit.yaml +33 -0
- moriarty/assets/modules/gaming/fortnitetracker.yaml +32 -0
- moriarty/assets/modules/gaming/gog.yaml +26 -0
- moriarty/assets/modules/gaming/itch.yaml +32 -0
- moriarty/assets/modules/gaming/kongregate.yaml +25 -0
- moriarty/assets/modules/gaming/minecraft.yaml +31 -0
- moriarty/assets/modules/gaming/opgg.yaml +32 -0
- moriarty/assets/modules/gaming/origin.yaml +26 -0
- moriarty/assets/modules/gaming/playstation.yaml +30 -0
- moriarty/assets/modules/gaming/roblox.yaml +31 -0
- moriarty/assets/modules/gaming/xbox.yaml +25 -0
- moriarty/assets/modules/github.yaml +68 -0
- moriarty/assets/modules/gitlab.yaml +60 -0
- moriarty/assets/modules/instagram.yaml +48 -0
- moriarty/assets/modules/latam/fotolog.yaml +27 -0
- moriarty/assets/modules/latam/orkut.yaml +26 -0
- moriarty/assets/modules/latam/taringa.yaml +27 -0
- moriarty/assets/modules/learning/coursera.yaml +26 -0
- moriarty/assets/modules/learning/udemy.yaml +26 -0
- moriarty/assets/modules/linkedin.yaml +40 -0
- moriarty/assets/modules/marketplaces/depop.yaml +28 -0
- moriarty/assets/modules/marketplaces/ebay.yaml +32 -0
- moriarty/assets/modules/marketplaces/grailed.yaml +27 -0
- moriarty/assets/modules/marketplaces/mercari.yaml +26 -0
- moriarty/assets/modules/marketplaces/poshmark.yaml +27 -0
- moriarty/assets/modules/marketplaces/reverb.yaml +27 -0
- moriarty/assets/modules/marketplaces/vinted.yaml +28 -0
- moriarty/assets/modules/medium.yaml +44 -0
- moriarty/assets/modules/music/audiomack.yaml +26 -0
- moriarty/assets/modules/music/bandcamp.yaml +30 -0
- moriarty/assets/modules/music/beatport.yaml +28 -0
- moriarty/assets/modules/music/deezer.yaml +26 -0
- moriarty/assets/modules/music/discogs.yaml +32 -0
- moriarty/assets/modules/music/genius.yaml +26 -0
- moriarty/assets/modules/music/lastfm.yaml +30 -0
- moriarty/assets/modules/music/mixcloud.yaml +26 -0
- moriarty/assets/modules/music/reverbnation.yaml +31 -0
- moriarty/assets/modules/music/soundcloud.yaml +31 -0
- moriarty/assets/modules/music/spotify.yaml +26 -0
- moriarty/assets/modules/music/tidal.yaml +26 -0
- moriarty/assets/modules/nsfw/adultwork.yaml +27 -0
- moriarty/assets/modules/nsfw/bongacams.yaml +28 -0
- moriarty/assets/modules/nsfw/cam4.yaml +28 -0
- moriarty/assets/modules/nsfw/chaturbate.yaml +28 -0
- moriarty/assets/modules/nsfw/clips4sale.yaml +27 -0
- moriarty/assets/modules/nsfw/extralunchmoney.yaml +27 -0
- moriarty/assets/modules/nsfw/fansly.yaml +28 -0
- moriarty/assets/modules/nsfw/fetlife.yaml +28 -0
- moriarty/assets/modules/nsfw/iwantclips.yaml +27 -0
- moriarty/assets/modules/nsfw/justforfans.yaml +28 -0
- moriarty/assets/modules/nsfw/loyalfans.yaml +28 -0
- moriarty/assets/modules/nsfw/manyvids.yaml +27 -0
- moriarty/assets/modules/nsfw/myfreecams.yaml +28 -0
- moriarty/assets/modules/nsfw/niteflirt.yaml +26 -0
- moriarty/assets/modules/nsfw/pornhub.yaml +32 -0
- moriarty/assets/modules/nsfw/redtube.yaml +27 -0
- moriarty/assets/modules/nsfw/stripchat.yaml +28 -0
- moriarty/assets/modules/nsfw/xhamster.yaml +27 -0
- moriarty/assets/modules/nsfw/xvideos.yaml +27 -0
- moriarty/assets/modules/nsfw/youporn.yaml +27 -0
- moriarty/assets/modules/photography/eyeem.yaml +25 -0
- moriarty/assets/modules/photography/fotki.yaml +25 -0
- moriarty/assets/modules/photography/photobucket.yaml +26 -0
- moriarty/assets/modules/photography/smugmug.yaml +25 -0
- moriarty/assets/modules/photography/vsco.yaml +27 -0
- moriarty/assets/modules/pinterest.yaml +40 -0
- moriarty/assets/modules/podcasts/anchor.yaml +26 -0
- moriarty/assets/modules/podcasts/castbox.yaml +26 -0
- moriarty/assets/modules/podcasts/podbean.yaml +26 -0
- moriarty/assets/modules/professional/about.yaml +31 -0
- moriarty/assets/modules/professional/academia.yaml +27 -0
- moriarty/assets/modules/professional/angellist.yaml +27 -0
- moriarty/assets/modules/professional/calendly.yaml +26 -0
- moriarty/assets/modules/professional/issuu.yaml +27 -0
- moriarty/assets/modules/professional/mendeley.yaml +27 -0
- moriarty/assets/modules/professional/notion.yaml +27 -0
- moriarty/assets/modules/professional/orcid.yaml +27 -0
- moriarty/assets/modules/professional/producthunt.yaml +31 -0
- moriarty/assets/modules/professional/researchgate.yaml +32 -0
- moriarty/assets/modules/professional/scribd.yaml +27 -0
- moriarty/assets/modules/professional/slideshare.yaml +31 -0
- moriarty/assets/modules/professional/trello.yaml +26 -0
- moriarty/assets/modules/professional/typeform.yaml +27 -0
- moriarty/assets/modules/reddit.yaml +46 -0
- moriarty/assets/modules/regional/amino.yaml +27 -0
- moriarty/assets/modules/regional/ask-fm.yaml +32 -0
- moriarty/assets/modules/regional/babycenter.yaml +26 -0
- moriarty/assets/modules/regional/cafemom.yaml +27 -0
- moriarty/assets/modules/regional/care2.yaml +27 -0
- moriarty/assets/modules/regional/diaspora.yaml +26 -0
- moriarty/assets/modules/regional/ello.yaml +27 -0
- moriarty/assets/modules/regional/gaia.yaml +27 -0
- moriarty/assets/modules/regional/habbo.yaml +27 -0
- moriarty/assets/modules/regional/imvu.yaml +27 -0
- moriarty/assets/modules/regional/lemmy.yaml +27 -0
- moriarty/assets/modules/regional/peertube.yaml +26 -0
- moriarty/assets/modules/regional/pixelfed.yaml +27 -0
- moriarty/assets/modules/regional/plurk.yaml +26 -0
- moriarty/assets/modules/regional/recroom.yaml +27 -0
- moriarty/assets/modules/regional/secondlife.yaml +26 -0
- moriarty/assets/modules/regional/vine-archive.yaml +27 -0
- moriarty/assets/modules/regional/vrchat.yaml +27 -0
- moriarty/assets/modules/regional/weheartit.yaml +27 -0
- moriarty/assets/modules/social/anilist.yaml +27 -0
- moriarty/assets/modules/social/beacons.yaml +26 -0
- moriarty/assets/modules/social/blogger.yaml +27 -0
- moriarty/assets/modules/social/crunchyroll.yaml +27 -0
- moriarty/assets/modules/social/discord.yaml +27 -0
- moriarty/assets/modules/social/dreamwidth.yaml +26 -0
- moriarty/assets/modules/social/facebook.yaml +34 -0
- moriarty/assets/modules/social/goodreads.yaml +32 -0
- moriarty/assets/modules/social/imdb.yaml +27 -0
- moriarty/assets/modules/social/kitsu.yaml +27 -0
- moriarty/assets/modules/social/letterboxd.yaml +32 -0
- moriarty/assets/modules/social/linktree.yaml +26 -0
- moriarty/assets/modules/social/livejournal.yaml +27 -0
- moriarty/assets/modules/social/mastodon.yaml +30 -0
- moriarty/assets/modules/social/minds.yaml +25 -0
- moriarty/assets/modules/social/myanimelist.yaml +32 -0
- moriarty/assets/modules/social/ravelry.yaml +27 -0
- moriarty/assets/modules/social/snapchat.yaml +25 -0
- moriarty/assets/modules/social/telegram.yaml +35 -0
- moriarty/assets/modules/social/tiktok.yaml +35 -0
- moriarty/assets/modules/social/trakt.yaml +28 -0
- moriarty/assets/modules/social/wattpad.yaml +32 -0
- moriarty/assets/modules/social/wordpress-com.yaml +26 -0
- moriarty/assets/modules/sports/espn.yaml +26 -0
- moriarty/assets/modules/sports/untappd.yaml +32 -0
- moriarty/assets/modules/stackoverflow.yaml +47 -0
- moriarty/assets/modules/steam.yaml +47 -0
- moriarty/assets/modules/streaming/caffeine.yaml +25 -0
- moriarty/assets/modules/streaming/dlive.yaml +27 -0
- moriarty/assets/modules/streaming/trovo.yaml +25 -0
- moriarty/assets/modules/travel/airbnb.yaml +26 -0
- moriarty/assets/modules/travel/booking.yaml +26 -0
- moriarty/assets/modules/travel/couchsurfing.yaml +27 -0
- moriarty/assets/modules/travel/tripadvisor.yaml +32 -0
- moriarty/assets/modules/tumblr.yaml +40 -0
- moriarty/assets/modules/twitch.yaml +48 -0
- moriarty/assets/modules/twitter.yaml +39 -0
- moriarty/assets/modules/youtube.yaml +42 -0
- moriarty/assets/templates/cves/CVE-2017-5638.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2018-7600.yaml +30 -0
- moriarty/assets/templates/cves/CVE-2019-11510.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2019-19781.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2020-14882.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2020-14883.yaml +29 -0
- moriarty/assets/templates/cves/CVE-2020-3452.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2020-5902.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2021-21972.yaml +31 -0
- moriarty/assets/templates/cves/CVE-2021-21985.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2021-26084.yaml +30 -0
- moriarty/assets/templates/cves/CVE-2021-41773.yaml +25 -0
- moriarty/assets/templates/cves/CVE-2021-42013.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2021-44228.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2022-0185.yaml +21 -0
- moriarty/assets/templates/cves/CVE-2022-1388.yaml +36 -0
- moriarty/assets/templates/cves/CVE-2022-22954.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2022-22965.yaml +31 -0
- moriarty/assets/templates/cves/CVE-2022-26134.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2023-22515.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2023-22527.yaml +29 -0
- moriarty/assets/templates/cves/CVE-2023-23752.yaml +33 -0
- moriarty/assets/templates/cves/CVE-2023-27350.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2023-2868.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2023-34362.yaml +27 -0
- moriarty/assets/templates/cves/CVE-2023-3519.yaml +28 -0
- moriarty/assets/templates/cves/CVE-2023-4966.yaml +27 -0
- moriarty/assets/templates/default-logins/admin-weak.yaml +40 -0
- moriarty/assets/templates/default-logins/wordpress-default.yaml +38 -0
- moriarty/assets/templates/exposures/aws-credentials.yaml +35 -0
- moriarty/assets/templates/exposures/backup-files.yaml +36 -0
- moriarty/assets/templates/exposures/database-files.yaml +34 -0
- moriarty/assets/templates/exposures/docker-exposed.yaml +31 -0
- moriarty/assets/templates/exposures/env-exposed.yaml +41 -0
- moriarty/assets/templates/exposures/git-exposed.yaml +41 -0
- moriarty/assets/templates/exposures/phpinfo.yaml +36 -0
- moriarty/assets/templates/exposures/svn-exposed.yaml +28 -0
- moriarty/assets/templates/fuzzing/api-endpoints.yaml +39 -0
- moriarty/assets/templates/fuzzing/common-files.yaml +37 -0
- moriarty/assets/templates/fuzzing/open-redirect-fuzz.yaml +35 -0
- moriarty/assets/templates/fuzzing/xss-search-fuzz.yaml +29 -0
- moriarty/assets/templates/git-config.yaml +18 -0
- moriarty/assets/templates/misconfigurations/cors-misconfiguration.yaml +30 -0
- moriarty/assets/templates/misconfigurations/debug-enabled.yaml +29 -0
- moriarty/assets/templates/misconfigurations/directory-listing.yaml +33 -0
- moriarty/assets/templates/misconfigurations/jwt-none-algo.yaml +30 -0
- moriarty/assets/templates/misconfigurations/ssl-tls-weak.yaml +23 -0
- moriarty/assets/templates/vulnerabilities/lfi-basic.yaml +31 -0
- moriarty/assets/templates/vulnerabilities/open-redirect.yaml +31 -0
- moriarty/assets/templates/vulnerabilities/rce-basic.yaml +34 -0
- moriarty/assets/templates/vulnerabilities/sqli-error.yaml +39 -0
- moriarty/assets/templates/vulnerabilities/ssrf-basic.yaml +31 -0
- moriarty/assets/templates/vulnerabilities/xss-reflected.yaml +38 -0
- moriarty/assets/templates/vulnerabilities/xxe-basic.yaml +30 -0
- moriarty/assets/wordlists/subdomains-1000.txt +1063 -0
- moriarty/cli/__init__.py +3 -0
- moriarty/cli/app.py +120 -0
- moriarty/cli/async_utils.py +19 -0
- moriarty/cli/dns.py +83 -0
- moriarty/cli/domain_cmd.py +572 -0
- moriarty/cli/email.py +383 -0
- moriarty/cli/email_investigate.py +224 -0
- moriarty/cli/intelligence.py +329 -0
- moriarty/cli/output.py +62 -0
- moriarty/cli/rdap.py +94 -0
- moriarty/cli/state.py +38 -0
- moriarty/cli/tls.py +91 -0
- moriarty/cli/user.py +227 -0
- moriarty/core/cache_backend.py +223 -0
- moriarty/core/config_manager.py +303 -0
- moriarty/correlator/__init__.py +0 -0
- moriarty/data/__init__.py +81 -0
- moriarty/data/ioc/__init__.py +142 -0
- moriarty/data/ioc/matcher.py +254 -0
- moriarty/data/ioc/types.py +267 -0
- moriarty/data/local_intelligence.py +507 -0
- moriarty/data/signature_loaders/__init__.py +103 -0
- moriarty/data/signature_loaders/base.py +54 -0
- moriarty/data/signature_loaders/ioc_feed.py +356 -0
- moriarty/data/signature_loaders/wappalyzer.py +112 -0
- moriarty/dsl/__init__.py +0 -0
- moriarty/dsl/loader.py +99 -0
- moriarty/dsl/schema.py +47 -0
- moriarty/export/__init__.py +0 -0
- moriarty/intelligence/__init__.py +27 -0
- moriarty/intelligence/__main__.py +150 -0
- moriarty/intelligence/config.py +395 -0
- moriarty/intelligence/ioc.py +267 -0
- moriarty/intelligence/signatures.py +550 -0
- moriarty/intelligence/storage.py +501 -0
- moriarty/interop/__init__.py +0 -0
- moriarty/logging/__init__.py +0 -0
- moriarty/logging/config.py +47 -0
- moriarty/models/__init__.py +16 -0
- moriarty/models/assertion.py +24 -0
- moriarty/models/entity.py +22 -0
- moriarty/models/evidence.py +37 -0
- moriarty/models/relation.py +24 -0
- moriarty/models/types.py +28 -0
- moriarty/modules/__init__.py +0 -0
- moriarty/modules/avatar_hash.py +184 -0
- moriarty/modules/directory_fuzzer.py +322 -0
- moriarty/modules/dns_scan.py +40 -0
- moriarty/modules/domain_scanner.py +620 -0
- moriarty/modules/email_check.py +98 -0
- moriarty/modules/email_investigate.py +267 -0
- moriarty/modules/email_security.py +274 -0
- moriarty/modules/googlemaps_lookup.py +106 -0
- moriarty/modules/headless_executor.py +201 -0
- moriarty/modules/orchestrator.py +60 -0
- moriarty/modules/passive_recon.py +444 -0
- moriarty/modules/phone_extractor.py +151 -0
- moriarty/modules/pipeline_orchestrator.py +726 -0
- moriarty/modules/port_scanner.py +129 -0
- moriarty/modules/rdap.py +61 -0
- moriarty/modules/rdap_extended.py +188 -0
- moriarty/modules/stealth_mode.py +610 -0
- moriarty/modules/subdomain_discovery.py +595 -0
- moriarty/modules/technology_profiler.py +361 -0
- moriarty/modules/template_executor.py +239 -0
- moriarty/modules/template_scanner.py +1048 -0
- moriarty/modules/tls_scan.py +46 -0
- moriarty/modules/tls_validator.py +188 -0
- moriarty/modules/vuln_scanner.py +483 -0
- moriarty/modules/waf_detector.py +585 -0
- moriarty/modules/wayback_discovery.py +234 -0
- moriarty/modules/web_crawler.py +163 -0
- moriarty/net/__init__.py +0 -0
- moriarty/net/dns_cache.py +175 -0
- moriarty/net/dns_client.py +188 -0
- moriarty/net/rdap_client.py +52 -0
- moriarty/net/smtp_client.py +114 -0
- moriarty/net/tls_client.py +111 -0
- moriarty/parsers/__init__.py +0 -0
- moriarty/parsers/html_parser.py +136 -0
- moriarty/tests/__init__.py +0 -0
- moriarty/tests/test_email_service.py +17 -0
- moriarty/tests/test_models.py +46 -0
- moriarty/tests/test_orchestrator.py +30 -0
- moriarty/tests/test_tls_client.py +18 -0
- moriarty_project-0.1.6.dist-info/METADATA +388 -0
- moriarty_project-0.1.6.dist-info/RECORD +418 -0
- moriarty_project-0.1.6.dist-info/WHEEL +4 -0
- moriarty_project-0.1.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,361 @@
|
|
1
|
+
"""HTTP fingerprinting and lightweight technology detection (Wappalyzer-style)."""
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
import asyncio
|
5
|
+
import re
|
6
|
+
from dataclasses import dataclass, field, asdict
|
7
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
8
|
+
from urllib.parse import urljoin
|
9
|
+
|
10
|
+
import httpx
|
11
|
+
import structlog
|
12
|
+
|
13
|
+
if TYPE_CHECKING: # pragma: no cover - type checking only
|
14
|
+
from moriarty.modules.stealth_mode import StealthMode
|
15
|
+
|
16
|
+
|
17
|
+
logger = structlog.get_logger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class TechnologyDetection:
|
22
|
+
"""Single technology detection with evidence metadata."""
|
23
|
+
|
24
|
+
name: str
|
25
|
+
confidence: int
|
26
|
+
evidence: List[str] = field(default_factory=list)
|
27
|
+
categories: List[str] = field(default_factory=list)
|
28
|
+
tags: List[str] = field(default_factory=list)
|
29
|
+
|
30
|
+
def to_dict(self) -> Dict[str, Any]:
|
31
|
+
return asdict(self)
|
32
|
+
|
33
|
+
|
34
|
+
DEFAULT_ENDPOINTS = [
|
35
|
+
"/",
|
36
|
+
"/robots.txt",
|
37
|
+
"/manifest.json",
|
38
|
+
"/.well-known/security.txt",
|
39
|
+
"/wp-json/",
|
40
|
+
"/graphql",
|
41
|
+
]
|
42
|
+
|
43
|
+
|
44
|
+
async def profile_domain(
|
45
|
+
domain: str,
|
46
|
+
session: Optional[httpx.AsyncClient] = None,
|
47
|
+
*,
|
48
|
+
base_url: Optional[str] = None,
|
49
|
+
stealth: Optional["StealthMode"] = None,
|
50
|
+
timeout: float = 10.0,
|
51
|
+
endpoints: Optional[List[str]] = None,
|
52
|
+
) -> Dict[str, Any]:
|
53
|
+
"""Collect headers, interesting endpoints and infer running technologies.
|
54
|
+
|
55
|
+
Parameters
|
56
|
+
----------
|
57
|
+
domain: str
|
58
|
+
Domain to fingerprint (without scheme).
|
59
|
+
session: Optional[httpx.AsyncClient]
|
60
|
+
Optional client to reuse. If omitted a temporary client is created.
|
61
|
+
base_url: Optional[str]
|
62
|
+
Override base URL (defaults to https://<domain>).
|
63
|
+
stealth: Optional[StealthMode]
|
64
|
+
When provided, randomizes headers/timing similarly to the StealthMode module.
|
65
|
+
timeout: float
|
66
|
+
Per-request timeout when creating a temporary client.
|
67
|
+
endpoints: Optional[List[str]]
|
68
|
+
Overrides the default endpoint list.
|
69
|
+
"""
|
70
|
+
|
71
|
+
created_client = False
|
72
|
+
if session is None:
|
73
|
+
session = httpx.AsyncClient(timeout=timeout, follow_redirects=True)
|
74
|
+
created_client = True
|
75
|
+
|
76
|
+
profile: Dict[str, Any] = {"headers": {}, "endpoints": {}, "detections": []}
|
77
|
+
detections: List[TechnologyDetection] = []
|
78
|
+
targets = endpoints or DEFAULT_ENDPOINTS
|
79
|
+
root_url = base_url or f"https://{domain}"
|
80
|
+
|
81
|
+
try:
|
82
|
+
base_headers = _build_headers(stealth)
|
83
|
+
logger.debug("techprofiler.fetch.base", url=root_url)
|
84
|
+
try:
|
85
|
+
response = await session.get(root_url, headers=base_headers)
|
86
|
+
except Exception as exc:
|
87
|
+
logger.debug("techprofiler.base.error", url=root_url, error=str(exc))
|
88
|
+
response = None
|
89
|
+
|
90
|
+
if response is not None:
|
91
|
+
profile["headers"] = dict(response.headers)
|
92
|
+
detections.extend(_detect_from_headers(response.headers))
|
93
|
+
if response.headers.get("set-cookie"):
|
94
|
+
detections.extend(_detect_from_cookies(response.headers.get("set-cookie", "")))
|
95
|
+
|
96
|
+
async def fetch(path: str) -> None:
|
97
|
+
url = urljoin(root_url, path)
|
98
|
+
headers = _build_headers(stealth)
|
99
|
+
try:
|
100
|
+
resp = await session.get(url, headers=headers)
|
101
|
+
except Exception as exc: # pragma: no cover - network errors tolerated
|
102
|
+
logger.debug("techprofiler.endpoint.error", url=url, error=str(exc))
|
103
|
+
return
|
104
|
+
|
105
|
+
if resp.status_code >= 400:
|
106
|
+
logger.debug("techprofiler.endpoint.skip", url=url, status=resp.status_code)
|
107
|
+
return
|
108
|
+
|
109
|
+
record: Dict[str, Any] = {"status": resp.status_code}
|
110
|
+
content_type = resp.headers.get("content-type", "").lower()
|
111
|
+
body_sample: Optional[str] = None
|
112
|
+
|
113
|
+
if "json" in content_type:
|
114
|
+
try:
|
115
|
+
record["json"] = resp.json()
|
116
|
+
except Exception:
|
117
|
+
body_sample = resp.text[:400]
|
118
|
+
else:
|
119
|
+
body_sample = resp.text[:400]
|
120
|
+
|
121
|
+
if body_sample:
|
122
|
+
record["snippet"] = body_sample
|
123
|
+
profile["endpoints"][path] = record
|
124
|
+
|
125
|
+
detections.extend(_detect_from_endpoint(path, resp, body_sample))
|
126
|
+
|
127
|
+
await asyncio.gather(*(fetch(ep) for ep in targets))
|
128
|
+
|
129
|
+
# Aggregate detections (dedupe by name keeping highest confidence)
|
130
|
+
aggregated: Dict[str, TechnologyDetection] = {}
|
131
|
+
for detection in detections:
|
132
|
+
existing = aggregated.get(detection.name)
|
133
|
+
if not existing or detection.confidence > existing.confidence:
|
134
|
+
aggregated[detection.name] = detection
|
135
|
+
elif existing and detection.evidence:
|
136
|
+
# merge new evidence/tags/categories when confidence tie
|
137
|
+
for bucket, value in (
|
138
|
+
("evidence", detection.evidence),
|
139
|
+
("categories", detection.categories),
|
140
|
+
("tags", detection.tags),
|
141
|
+
):
|
142
|
+
merged = getattr(existing, bucket)
|
143
|
+
for item in value:
|
144
|
+
if item not in merged:
|
145
|
+
merged.append(item)
|
146
|
+
|
147
|
+
profile["detections"] = [det.to_dict() for det in aggregated.values()]
|
148
|
+
profile["components"] = _build_component_index(profile["detections"])
|
149
|
+
return profile
|
150
|
+
|
151
|
+
finally:
|
152
|
+
if created_client:
|
153
|
+
await session.aclose()
|
154
|
+
|
155
|
+
|
156
|
+
def _build_headers(stealth: Optional["StealthMode"]) -> Dict[str, str]:
|
157
|
+
headers: Dict[str, str] = {}
|
158
|
+
if stealth:
|
159
|
+
headers.update(stealth.get_random_headers())
|
160
|
+
headers.setdefault("User-Agent", "Mozilla/5.0 (Moriarty Recon)")
|
161
|
+
headers.setdefault("Accept", "*/*")
|
162
|
+
return headers
|
163
|
+
|
164
|
+
|
165
|
+
def _detect_from_headers(headers: httpx.Headers) -> List[TechnologyDetection]:
|
166
|
+
detections: List[TechnologyDetection] = []
|
167
|
+
server = headers.get("server")
|
168
|
+
if server:
|
169
|
+
name = server.split(" ")[0].split("/")[0]
|
170
|
+
detections.append(
|
171
|
+
TechnologyDetection(
|
172
|
+
name=name,
|
173
|
+
confidence=65,
|
174
|
+
evidence=[f"Server header: {server}"],
|
175
|
+
categories=["server"],
|
176
|
+
tags=[name.lower()],
|
177
|
+
)
|
178
|
+
)
|
179
|
+
|
180
|
+
powered = headers.get("x-powered-by")
|
181
|
+
if powered:
|
182
|
+
detections.append(
|
183
|
+
TechnologyDetection(
|
184
|
+
name=powered,
|
185
|
+
confidence=80,
|
186
|
+
evidence=[f"X-Powered-By: {powered}"],
|
187
|
+
categories=["platform"],
|
188
|
+
tags=[powered.split("/")[0].lower()],
|
189
|
+
)
|
190
|
+
)
|
191
|
+
|
192
|
+
asp = headers.get("x-aspnet-version")
|
193
|
+
if asp:
|
194
|
+
detections.append(
|
195
|
+
TechnologyDetection(
|
196
|
+
name="ASP.NET",
|
197
|
+
confidence=90,
|
198
|
+
evidence=[f"X-AspNet-Version: {asp}"],
|
199
|
+
categories=["framework"],
|
200
|
+
tags=["aspnet", "dotnet", "microsoft"],
|
201
|
+
)
|
202
|
+
)
|
203
|
+
|
204
|
+
if headers.get("cf-ray") or headers.get("cf-cache-status"):
|
205
|
+
detections.append(
|
206
|
+
TechnologyDetection(
|
207
|
+
name="Cloudflare",
|
208
|
+
confidence=90,
|
209
|
+
evidence=["Cloudflare headers present"],
|
210
|
+
categories=["cdn", "waf"],
|
211
|
+
tags=["cloudflare"],
|
212
|
+
)
|
213
|
+
)
|
214
|
+
|
215
|
+
if headers.get("akamai-ghost"):
|
216
|
+
detections.append(
|
217
|
+
TechnologyDetection(
|
218
|
+
name="Akamai",
|
219
|
+
confidence=80,
|
220
|
+
evidence=["Akamai edge headers"],
|
221
|
+
categories=["cdn"],
|
222
|
+
tags=["akamai"],
|
223
|
+
)
|
224
|
+
)
|
225
|
+
|
226
|
+
return detections
|
227
|
+
|
228
|
+
|
229
|
+
def _detect_from_cookies(cookie_header: str) -> List[TechnologyDetection]:
|
230
|
+
detections: List[TechnologyDetection] = []
|
231
|
+
cookie_header = cookie_header.lower()
|
232
|
+
if "wordpress_logged_in" in cookie_header or "wp-settings" in cookie_header:
|
233
|
+
detections.append(
|
234
|
+
TechnologyDetection(
|
235
|
+
name="WordPress",
|
236
|
+
confidence=95,
|
237
|
+
evidence=["WordPress session cookies"],
|
238
|
+
categories=["cms"],
|
239
|
+
tags=["wordpress", "php", "cms"],
|
240
|
+
)
|
241
|
+
)
|
242
|
+
if "laravel_session" in cookie_header:
|
243
|
+
detections.append(
|
244
|
+
TechnologyDetection(
|
245
|
+
name="Laravel",
|
246
|
+
confidence=85,
|
247
|
+
evidence=["Laravel session cookie"],
|
248
|
+
categories=["framework"],
|
249
|
+
tags=["laravel", "php"],
|
250
|
+
)
|
251
|
+
)
|
252
|
+
if "mage-cache-sessid" in cookie_header or "mage-cache-storage" in cookie_header:
|
253
|
+
detections.append(
|
254
|
+
TechnologyDetection(
|
255
|
+
name="Magento",
|
256
|
+
confidence=80,
|
257
|
+
evidence=["Magento session cookies"],
|
258
|
+
categories=["ecommerce"],
|
259
|
+
tags=["magento", "php", "ecommerce"],
|
260
|
+
)
|
261
|
+
)
|
262
|
+
return detections
|
263
|
+
|
264
|
+
|
265
|
+
def _detect_from_endpoint(path: str, response: httpx.Response, snippet: Optional[str]) -> List[TechnologyDetection]:
|
266
|
+
detections: List[TechnologyDetection] = []
|
267
|
+
lowered_path = path.lower()
|
268
|
+
snippet = snippet or ""
|
269
|
+
|
270
|
+
if lowered_path.startswith("/wp-json"):
|
271
|
+
detections.append(
|
272
|
+
TechnologyDetection(
|
273
|
+
name="WordPress",
|
274
|
+
confidence=90,
|
275
|
+
evidence=["/wp-json endpoint accessible"],
|
276
|
+
categories=["cms"],
|
277
|
+
tags=["wordpress", "php", "cms"],
|
278
|
+
)
|
279
|
+
)
|
280
|
+
|
281
|
+
if lowered_path == "/robots.txt" and "wp-admin" in snippet:
|
282
|
+
detections.append(
|
283
|
+
TechnologyDetection(
|
284
|
+
name="WordPress",
|
285
|
+
confidence=70,
|
286
|
+
evidence=["robots.txt contains wp-admin"],
|
287
|
+
categories=["cms"],
|
288
|
+
tags=["wordpress", "cms"],
|
289
|
+
)
|
290
|
+
)
|
291
|
+
|
292
|
+
if lowered_path == "/graphql" and response.status_code == 200:
|
293
|
+
detections.append(
|
294
|
+
TechnologyDetection(
|
295
|
+
name="GraphQL API",
|
296
|
+
confidence=90,
|
297
|
+
evidence=["/graphql responded with 200"],
|
298
|
+
categories=["api"],
|
299
|
+
tags=["graphql"],
|
300
|
+
)
|
301
|
+
)
|
302
|
+
|
303
|
+
if lowered_path == "/manifest.json" and "gcm_sender_id" in snippet:
|
304
|
+
detections.append(
|
305
|
+
TechnologyDetection(
|
306
|
+
name="Progressive Web App",
|
307
|
+
confidence=60,
|
308
|
+
evidence=["manifest.json contains PWA keys"],
|
309
|
+
categories=["frontend"],
|
310
|
+
tags=["pwa"],
|
311
|
+
)
|
312
|
+
)
|
313
|
+
|
314
|
+
if snippet and 'SetEnvIfNoCase Request_URI "/wp-' in snippet:
|
315
|
+
detections.append(
|
316
|
+
TechnologyDetection(
|
317
|
+
name="ModSecurity",
|
318
|
+
confidence=55,
|
319
|
+
evidence=["security.txt hints ModSecurity"],
|
320
|
+
categories=["waf"],
|
321
|
+
tags=["modsecurity"],
|
322
|
+
)
|
323
|
+
)
|
324
|
+
|
325
|
+
if snippet and re.search(r"Drupal\s*\d", snippet, re.IGNORECASE):
|
326
|
+
detections.append(
|
327
|
+
TechnologyDetection(
|
328
|
+
name="Drupal",
|
329
|
+
confidence=60,
|
330
|
+
evidence=["Endpoint content references Drupal"],
|
331
|
+
categories=["cms"],
|
332
|
+
tags=["drupal", "php", "cms"],
|
333
|
+
)
|
334
|
+
)
|
335
|
+
|
336
|
+
if snippet and "woocommerce" in snippet.lower():
|
337
|
+
detections.append(
|
338
|
+
TechnologyDetection(
|
339
|
+
name="WooCommerce",
|
340
|
+
confidence=55,
|
341
|
+
evidence=["Content references WooCommerce"],
|
342
|
+
categories=["ecommerce"],
|
343
|
+
tags=["woocommerce", "wordpress", "ecommerce"],
|
344
|
+
)
|
345
|
+
)
|
346
|
+
|
347
|
+
return detections
|
348
|
+
|
349
|
+
|
350
|
+
def _build_component_index(detections: List[Dict[str, Any]]) -> Dict[str, List[str]]:
|
351
|
+
components: Dict[str, List[str]] = {}
|
352
|
+
for detection in detections:
|
353
|
+
for category in detection.get("categories", []):
|
354
|
+
components.setdefault(category, [])
|
355
|
+
name = detection.get("name")
|
356
|
+
if name and name not in components[category]:
|
357
|
+
components[category].append(name)
|
358
|
+
return components
|
359
|
+
|
360
|
+
|
361
|
+
__all__ = ["profile_domain", "TechnologyDetection"]
|
@@ -0,0 +1,239 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import hashlib
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from time import perf_counter
|
6
|
+
from typing import Any, Dict, List, Optional
|
7
|
+
|
8
|
+
import httpx
|
9
|
+
import structlog
|
10
|
+
|
11
|
+
from ..dsl.schema import TemplateSpec
|
12
|
+
from ..parsers.html_parser import HTMLExtractor
|
13
|
+
|
14
|
+
logger = structlog.get_logger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass(slots=True)
|
18
|
+
class ExecutionResult:
|
19
|
+
"""Resultado da execução de um template."""
|
20
|
+
|
21
|
+
site: str
|
22
|
+
url: str
|
23
|
+
exists: bool
|
24
|
+
confidence: float
|
25
|
+
extracted: Dict[str, Any]
|
26
|
+
page_hash: str
|
27
|
+
latency_ms: float
|
28
|
+
status_code: int
|
29
|
+
error: Optional[str] = None
|
30
|
+
|
31
|
+
|
32
|
+
class TemplateExecutor:
|
33
|
+
"""Executa templates contra alvos."""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
timeout: float = 8.0,
|
38
|
+
user_agent: Optional[str] = None,
|
39
|
+
cookie_store: Optional[Dict[str, httpx.Cookies]] = None,
|
40
|
+
) -> None:
|
41
|
+
self._timeout = timeout
|
42
|
+
self._user_agent = user_agent or "Moriarty/0.1.0 (OSINT Client)"
|
43
|
+
self._cookie_store = cookie_store if cookie_store is not None else {}
|
44
|
+
|
45
|
+
async def execute(
|
46
|
+
self, template: TemplateSpec, variables: Dict[str, str]
|
47
|
+
) -> ExecutionResult:
|
48
|
+
"""Executa um template com as variáveis fornecidas."""
|
49
|
+
start = perf_counter()
|
50
|
+
|
51
|
+
# Renderiza URL
|
52
|
+
url = template.url_template.format(**variables)
|
53
|
+
|
54
|
+
logger.info(
|
55
|
+
"template.execute.start",
|
56
|
+
site=template.site,
|
57
|
+
url=url,
|
58
|
+
method=template.method,
|
59
|
+
)
|
60
|
+
|
61
|
+
# Prepara headers
|
62
|
+
headers = {
|
63
|
+
"User-Agent": self._user_agent,
|
64
|
+
**template.headers,
|
65
|
+
}
|
66
|
+
|
67
|
+
target_host = httpx.URL(url).host
|
68
|
+
cookies = self._cookie_store.get(target_host)
|
69
|
+
|
70
|
+
request_body = None
|
71
|
+
request_json = None
|
72
|
+
if isinstance(template.body, dict):
|
73
|
+
# Permite templates explicitarem JSON ou form-encoded
|
74
|
+
if template.method.upper() == "POST" and template.headers.get("Content-Type", "").lower() == "application/x-www-form-urlencoded":
|
75
|
+
request_body = template.body
|
76
|
+
else:
|
77
|
+
request_json = template.body
|
78
|
+
elif template.body is not None:
|
79
|
+
request_body = template.body
|
80
|
+
|
81
|
+
# Faz requisição
|
82
|
+
try:
|
83
|
+
async with httpx.AsyncClient(
|
84
|
+
timeout=self._timeout,
|
85
|
+
follow_redirects=True,
|
86
|
+
http2=True,
|
87
|
+
cookies=cookies,
|
88
|
+
) as client:
|
89
|
+
if template.method.upper() == "GET":
|
90
|
+
response = await client.get(url, headers=headers)
|
91
|
+
elif template.method.upper() == "HEAD":
|
92
|
+
response = await client.head(url, headers=headers)
|
93
|
+
elif template.method.upper() == "POST":
|
94
|
+
response = await client.post(url, headers=headers, json=request_json, data=request_body)
|
95
|
+
else:
|
96
|
+
response = await client.request(
|
97
|
+
template.method.upper(),
|
98
|
+
url,
|
99
|
+
headers=headers,
|
100
|
+
json=request_json,
|
101
|
+
data=request_body,
|
102
|
+
)
|
103
|
+
|
104
|
+
# Armazena cookies retornados
|
105
|
+
if response.cookies:
|
106
|
+
merged = httpx.Cookies()
|
107
|
+
if cookies:
|
108
|
+
for cookie in cookies.jar:
|
109
|
+
merged.set(cookie[0], cookie[1], domain=cookie[2]["domain"], path=cookie[2]["path"])
|
110
|
+
for cookie in response.cookies.jar:
|
111
|
+
merged.set(cookie[0], cookie[1], domain=cookie[2]["domain"], path=cookie[2]["path"])
|
112
|
+
self._cookie_store[target_host] = merged
|
113
|
+
|
114
|
+
status_code = response.status_code
|
115
|
+
html = response.text
|
116
|
+
|
117
|
+
except Exception as exc:
|
118
|
+
latency_ms = (perf_counter() - start) * 1000
|
119
|
+
logger.warning(
|
120
|
+
"template.execute.error",
|
121
|
+
site=template.site,
|
122
|
+
url=url,
|
123
|
+
error=str(exc),
|
124
|
+
latency_ms=round(latency_ms, 2),
|
125
|
+
)
|
126
|
+
return ExecutionResult(
|
127
|
+
site=template.site,
|
128
|
+
url=url,
|
129
|
+
exists=False,
|
130
|
+
confidence=0.0,
|
131
|
+
extracted={},
|
132
|
+
page_hash="",
|
133
|
+
latency_ms=latency_ms,
|
134
|
+
status_code=0,
|
135
|
+
error=str(exc),
|
136
|
+
)
|
137
|
+
|
138
|
+
# Parse HTML
|
139
|
+
extractor = HTMLExtractor(html)
|
140
|
+
|
141
|
+
# Verifica existência
|
142
|
+
exists = self._check_exists(extractor, template)
|
143
|
+
|
144
|
+
# Extrai dados
|
145
|
+
extracted = self._extract_data(extractor, template)
|
146
|
+
|
147
|
+
# Calcula hash da página
|
148
|
+
page_hash = hashlib.sha256(html.encode("utf-8")).hexdigest()[:16]
|
149
|
+
|
150
|
+
# Calcula confidence
|
151
|
+
confidence = self._calculate_confidence(exists, extracted, template, status_code)
|
152
|
+
|
153
|
+
latency_ms = (perf_counter() - start) * 1000
|
154
|
+
|
155
|
+
logger.info(
|
156
|
+
"template.execute.success",
|
157
|
+
site=template.site,
|
158
|
+
url=url,
|
159
|
+
exists=exists,
|
160
|
+
confidence=confidence,
|
161
|
+
status=status_code,
|
162
|
+
latency_ms=round(latency_ms, 2),
|
163
|
+
)
|
164
|
+
|
165
|
+
return ExecutionResult(
|
166
|
+
site=template.site,
|
167
|
+
url=url,
|
168
|
+
exists=exists,
|
169
|
+
confidence=confidence,
|
170
|
+
extracted=extracted,
|
171
|
+
page_hash=page_hash,
|
172
|
+
latency_ms=latency_ms,
|
173
|
+
status_code=status_code,
|
174
|
+
)
|
175
|
+
|
176
|
+
def _check_exists(self, extractor: HTMLExtractor, template: TemplateSpec) -> bool:
|
177
|
+
"""Verifica se o perfil existe."""
|
178
|
+
# Se encontrar "not found", não existe
|
179
|
+
for selector in template.not_found_selectors:
|
180
|
+
if extractor.exists(selector):
|
181
|
+
return False
|
182
|
+
|
183
|
+
# Se não tiver "exists" selectors, assume que existe se status 200
|
184
|
+
if not template.exists_selectors:
|
185
|
+
return True
|
186
|
+
|
187
|
+
# Precisa encontrar pelo menos um "exists" selector
|
188
|
+
for selector in template.exists_selectors:
|
189
|
+
if extractor.exists(selector):
|
190
|
+
return True
|
191
|
+
|
192
|
+
return False
|
193
|
+
|
194
|
+
def _extract_data(
|
195
|
+
self, extractor: HTMLExtractor, template: TemplateSpec
|
196
|
+
) -> Dict[str, Any]:
|
197
|
+
"""Extrai dados usando extractors."""
|
198
|
+
data: Dict[str, Any] = {}
|
199
|
+
|
200
|
+
for ext in template.extractors:
|
201
|
+
value = extractor.extract(ext.selector)
|
202
|
+
|
203
|
+
if value is None and ext.required:
|
204
|
+
value = ext.default
|
205
|
+
|
206
|
+
if value is not None:
|
207
|
+
data[ext.name] = value
|
208
|
+
|
209
|
+
return data
|
210
|
+
|
211
|
+
def _calculate_confidence(
|
212
|
+
self,
|
213
|
+
exists: bool,
|
214
|
+
extracted: Dict[str, Any],
|
215
|
+
template: TemplateSpec,
|
216
|
+
status_code: int,
|
217
|
+
) -> float:
|
218
|
+
"""Calcula confidence score (0.0 - 1.0)."""
|
219
|
+
if not exists:
|
220
|
+
return 0.0
|
221
|
+
|
222
|
+
if status_code != 200:
|
223
|
+
return 0.3
|
224
|
+
|
225
|
+
# Base confidence
|
226
|
+
confidence = 0.7
|
227
|
+
|
228
|
+
# Aumenta se extraiu dados
|
229
|
+
if extracted:
|
230
|
+
confidence += 0.2 * (len(extracted) / max(len(template.extractors), 1))
|
231
|
+
|
232
|
+
# Aumenta se tem "exists" selectors
|
233
|
+
if template.exists_selectors:
|
234
|
+
confidence += 0.1
|
235
|
+
|
236
|
+
return min(confidence, 1.0)
|
237
|
+
|
238
|
+
|
239
|
+
__all__ = ["TemplateExecutor", "ExecutionResult"]
|