PyPI - polyswarm-engine - Versions diffs - 3.1.1__py2.py3-none-any.whl - Mend

polyswarm-engine 3.1.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

polyswarm_engine/__init__.py +49 -0
polyswarm_engine/backend.py +302 -0
polyswarm_engine/bidutils.py +69 -0
polyswarm_engine/bounty.py +387 -0
polyswarm_engine/celeryconfig.py +76 -0
polyswarm_engine/cli.py +316 -0
polyswarm_engine/command.py +34 -0
polyswarm_engine/constants.py +37 -0
polyswarm_engine/engine.py +123 -0
polyswarm_engine/exceptions.py +41 -0
polyswarm_engine/log_config.py +104 -0
polyswarm_engine/py.typed +0 -0
polyswarm_engine/settings.py +41 -0
polyswarm_engine/typing.py +125 -0
polyswarm_engine/utils.py +434 -0
polyswarm_engine/wine.py +125 -0
polyswarm_engine/wsgi.py +122 -0
polyswarm_engine-3.1.1.dist-info/METADATA +361 -0
polyswarm_engine-3.1.1.dist-info/RECORD +21 -0
polyswarm_engine-3.1.1.dist-info/WHEEL +6 -0
polyswarm_engine-3.1.1.dist-info/top_level.txt +1 -0

polyswarm_engine/cli.py ADDED Viewed

@@ -0,0 +1,316 @@
+import os
+import builtins
+import datetime as dt
+import functools
+import inspect
+import importlib
+import json
+import logging
+import pathlib
+import typing as t
+import click
+import polyswarm_engine.settings
+from polyswarm_engine.bounty import forge_local_bounty
+from polyswarm_engine.constants import (
+    ARTIFACT_TYPES,
+    BENIGN,
+    EICAR_CONTENT,
+    FILE_ARTIFACT,
+    MALICIOUS,
+    UNKNOWN,
+    URL_ARTIFACT,
+)
+from polyswarm_engine.utils import is_fifo
+logger = logging.getLogger(__name__)
+# Artifact type for manually constructed bounties
+BOUNTY_ARTIFACT = "bounty"
+@click.group()
+def engine_cli():
+    from logging.config import dictConfig
+    from polyswarm_engine import log_config
+    dictConfig(log_config.get_logging(handler='click'))
+@engine_cli.command("devserver")
+@click.option('--port', '-p', help='Server port', type=int, default=8000, show_default=True)
+@click.option('--secret', '-s', help='Webhook secret  [env: PSENGINE_WEBHOOK_SECRET]', envvar='PSENGINE_WEBHOOK_SECRET')
+@click.pass_obj
+def devserver(engine, port, secret, **kwargs):
+    """
+    Simple HTTP server only usable during development
+    """
+    from logging.config import dictConfig
+    from wsgiref.simple_server import make_server
+    from polyswarm_engine import log_config
+    from polyswarm_engine.wsgi import ValidateSenderMiddleware, application as wsgi_app
+    dictConfig(log_config.get_logging())
+    wsgi_app = ValidateSenderMiddleware(wsgi_app, secret=secret)
+    with make_server('', port, wsgi_app) as httpd:
+        print("Serving {} on port {}, control-C to stop".format(engine.name, port))
+        httpd.serve_forever()
+@engine_cli.command('create-vhost', context_settings=dict(show_default=True))
+@click.option('--vhost', envvar='PSENGINE_BROKER_VHOST', default='engines')
+@click.option('--broker', envvar='PSENGINE_BROKER_URL', default='amqp://user:password@rabbitmq:5672')
+def create_vhost(vhost: str, broker: str):
+    """Ensure that a vhost exists in the RabbitMQ broker"""
+    from urllib import parse
+    import requests
+    from polyswarm_engine.celeryconfig import CeleryConfig
+    broker_url = CeleryConfig(broker=broker, vhost=vhost).broker_url
+    parsed_url = parse.urlparse(broker_url)
+    vhost = vhost or parsed_url.path.strip(' /')
+    if vhost:
+        logger.info("Creating '%s' vhost", vhost)
+        create_url = parse.urlunparse(
+            ('http', f'{parsed_url.hostname}:1{parsed_url.port}', f'/api/vhosts/{vhost}', '', '', '')
+        )
+        r = requests.put(create_url, auth=(parsed_url.username, parsed_url.password))
+        r.raise_for_status()
+        click.echo(f'Successfully create vhost {vhost}')
+    else:
+        click.echo('No vhost defined for the celery broker')
+def _gather_analyses(backend, artifacts, artifact_type):
+    futures = list()
+    for artifact in artifacts:
+        bounty = _make_bounty(artifact, artifact_type)
+        analysis = backend.analyze(bounty)
+        result = (artifact, bounty, analysis)
+        # If our result is already ready, print it immediately
+        if analysis.ready():
+            yield result
+        else:
+            futures.append(result)
+    yield from futures
+@engine_cli.command("analyze", help="Analyze artifacts")
+@click.option("-v", "--verbose", count=True)
+@click.option("--check-empty", help="Verify this engine can analyze an empty bounty", default=False, is_flag=True)
+@click.option("--check-eicar", help="Verify this engine can analyze EICAR test file", default=False, is_flag=True)
+@click.option(
+    '--check-wicar',
+    '--check-exploit-url',
+    help='Verify this engine can analyze the WICAR exploit kit URL',
+    default=False,
+    is_flag=True,
+)
+@click.option(
+    "--artifact-type",
+    "-t",
+    type=click.Choice([BOUNTY_ARTIFACT, *ARTIFACT_TYPES], case_sensitive=False),
+    default=FILE_ARTIFACT,
+    help="Artifact type to use when constructing bounties. "
+    f"'{BOUNTY_ARTIFACT}' loads manually constructed bounties, "
+    "treating each argument as the path to a JSON-encoded bounty object"
+)
+@click.argument("artifacts", nargs=-1)
+@click.pass_obj
+def analyze(engine, artifacts, artifact_type, verbose, check_eicar, check_empty, check_wicar, **kwargs):
+    # force celery backend to be eager when running local analyze
+    os.environ['PSENGINE_TASK_ALWAYS_EAGER'] = '1'
+    importlib.reload(polyswarm_engine.settings)
+    with engine.create_backend() as backend:
+        if check_eicar:
+            analysis = backend.analyze(_make_bounty(EICAR_CONTENT, FILE_ARTIFACT)).get()
+            _check_analysis(analysis, expected={MALICIOUS})
+        if check_empty:
+            analysis = backend.analyze(_make_bounty(b'', FILE_ARTIFACT)).get()
+            _check_analysis(analysis, expected={BENIGN, UNKNOWN})
+        if check_wicar:
+            # MS05-054 Microsoft Internet Explorer JavaScript OnLoad Handler
+            url = "http://malware.wicar.org/data/ms05_054_onload.html"
+            analysis = backend.analyze(_make_bounty(url, URL_ARTIFACT)).get()
+            _check_analysis(analysis, expected={MALICIOUS})
+        for artifact, bounty, future in _gather_analyses(backend, artifacts, artifact_type):
+            if artifact and len(artifacts) > 1:
+                _echo(f"{artifact:-^80}", ostream="stderr")
+            if verbose:
+                _echo("Bounty: ", nl=False, ostream="stderr")
+                _echo(bounty, bold=True, ostream="stderr")
+                _echo("Analysis: ", nl=False, ostream="stderr")
+            _echo(future.get(), bold=bool(verbose))
+def _check_analysis(analysis, expected):
+    _echo(analysis)
+    assert isinstance(analysis, t.Mapping)
+    assert analysis["verdict"] in expected, f"Received '{analysis['verdict']}' instead of {' or '.join(expected)}"
+def _make_bounty(artifact, artifact_type, **kwargs):
+    forge = functools.partial(forge_local_bounty, artifact_type=artifact_type, **kwargs)
+    if artifact_type == BOUNTY_ARTIFACT:
+        return json.load(click.open_file(artifact, "rb"))
+    elif isinstance(artifact, bytes):
+        return forge(data=artifact)
+    elif artifact == "-" or is_fifo(artifact):
+        return forge(stream=click.open_file(artifact, "rb"))
+    elif artifact_type == URL_ARTIFACT:
+        return forge(data=artifact)
+    elif artifact_type == FILE_ARTIFACT:
+        return forge(path=artifact)
+    else:
+        raise ValueError(f"Invalid artifact: {artifact}")
+@engine_cli.command("create-bounty", help="Make a fresh bounty from a file or URL artifact")
+@click.option(
+    "--artifact-type",
+    "-t",
+    type=click.Choice(list(ARTIFACT_TYPES), case_sensitive=False),
+    default=FILE_ARTIFACT,
+    help="Artifact type to use when constructing bounties. "
+)
+@click.option("--expiration", type=int, default=60 * 60 * 24 * 365, help="Number of seconds until bounty expiration")
+@click.option("--response-url", help="The URL to send results to")
+@click.argument("artifact")
+def create_bounty(artifact, artifact_type, expiration, response_url):
+    bounty = _make_bounty(artifact, artifact_type, expiration=dt.timedelta(seconds=expiration))
+    if response_url:
+        bounty["response_url"] = response_url
+    _echo(bounty)
+@engine_cli.command(
+    "worker",
+    help="Start celery worker",
+    context_settings=dict(ignore_unknown_options=True),
+)
+@click.argument("celery_args", nargs=-1)
+@click.pass_obj
+def worker(engine, celery_args, **kwargs):
+    from logging.config import dictConfig
+    from polyswarm_engine import log_config
+    dictConfig(log_config.get_logging())
+    with engine.create_backend() as backend:
+        backend.app.worker_main(argv=["worker", *celery_args])
+class EngineCommandsGroup(click.MultiCommand):
+    def list_commands(self, ctx):
+        return ctx.obj.cmd
+    def get_command(self, ctx, name):
+        engine = ctx.obj
+        cmd = engine.cmd[name]
+        func = cmd["func"]
+        argspec = inspect.getfullargspec(func)
+        docstr = cmd["doc"]
+        def callback(**params):
+            args = []
+            for arg in set(argspec.args) & set(params.keys()):
+                args.append(params.pop(arg))
+            if argspec.varargs in params:
+                args.extend(params.pop(argspec.varargs))
+            with engine.create_backend():
+                result = func(*args, **params)
+                _echo(result, fg="black", bold=True)
+        return click.Command(
+            name=name,
+            callback=callback,
+            help=docstr or name,
+            short_help=docstr.split("\n")[0] or None,
+            params=list(self._argspec_to_params(argspec)),
+        )
+    @staticmethod
+    def _argspec_to_params(spec: "inspect.FullArgSpec") -> "t.Iterator[click.Parameter]":
+        """Convert the function signature of a command to `click.Parameter` objects"""
+        def get_type(param_name):
+            if not spec.annotations:
+                return None
+            typ = spec.annotations.get(param_name)
+            if isinstance(typ, str):
+                if hasattr(builtins, typ):
+                    return getattr(builtins, typ)
+                elif typ == "Path" or typ == "pathlib.Path":
+                    return pathlib.Path
+                else:
+                    return None
+            return typ
+        # `kwonlyargs` is a list of keyword-only parameter names in declaration order
+        if spec.kwonlyargs:
+            for name in spec.kwonlyargs:
+                # `kwonlydefaults` holds dictionary mapping parameter names from `kwonlyargs`
+                # to the default values used if no argument is supplied
+                if spec.kwonlydefaults and name in spec.kwonlydefaults:
+                    yield click.Option([f"--{name}"], default=spec.kwonlydefaults[name], type=get_type(name))
+                else:
+                    yield click.Option([f"--{name}"], required=True, type=get_type(name))
+        # `args` is a list of the positional parameter names
+        if spec.args:
+            # `defaults` is an n-tuple of default argument values for the last n positional parameters
+            if spec.defaults:
+                index = len(spec.args) - len(spec.defaults)
+                # yield each of the positional args w/o any associated defaults
+                for name in spec.args[:index]:
+                    yield click.Argument([name], required=True, type=get_type(name))
+                # ... and then the rest of the positional args w/ default values
+                for name, default in zip(spec.args[index:], spec.defaults):
+                    yield click.Argument([name], default=default, type=get_type(name))
+            else:
+                for name in spec.args:
+                    yield click.Argument([name], required=True, type=get_type(name))
+        # `varargs` is the name of the * parameter or `None` if not accepted.
+        if spec.varargs:
+            yield click.Argument([spec.varargs], nargs=-1)
+engine_cli.add_command(EngineCommandsGroup(name="commands", help="Engine commands"))
+def _echo(msg, **echo_options):
+    if msg is None:
+        return
+    elif isinstance(msg, dict):
+        msg = json.dumps(msg, indent=2)
+    if "ostream" in echo_options:
+        echo_options["file"] = click.get_text_stream(echo_options.pop("ostream"))
+    click.secho(msg, **echo_options)

polyswarm_engine/command.py ADDED Viewed

@@ -0,0 +1,34 @@
+import inspect
+from .utils import get_func_name, get_func_qual
+class CommandRegistry:
+    def __init__(self):
+        self._metadata = dict()
+    def __iter__(self):
+        return iter(self._metadata)
+    def __getitem__(self, name):
+        return {"func": self.__dict__[name], **self._metadata[name]}
+    def __getattr__(self, name):
+        # XXX: This is here to make sure that `mypy` doesn't raise an error for items inside `self.__dict__`
+        ...
+    def _add(self, func, name=None):
+        module, fnname = get_func_qual(func)
+        name = name or fnname
+        if name.startswith('_'):
+            raise ValueError("Cannot use a command name starting with '_'")
+        self._metadata[name] = {
+            "name": name,
+            "qualname": get_func_name(func),
+            "module": module,
+            "doc": inspect.getdoc(func) or "",
+        }
+        self.__dict__[name] = func

polyswarm_engine/constants.py ADDED Viewed

@@ -0,0 +1,37 @@
+import typing as t
+import uuid
+import base64
+from .typing import AnalysisResult, ArtifactType
+URL_MIMETYPE = "text/uri-list"
+FILE_ARTIFACT: "ArtifactType" = "FILE"
+URL_ARTIFACT: "ArtifactType" = "URL"
+ARTIFACT_TYPES: "t.Set[ArtifactType]" = set(t.get_args(ArtifactType))
+# Analysis Conclusions
+BENIGN: "AnalysisResult" = "benign"
+MALICIOUS: "AnalysisResult" = "malicious"
+SUSPICIOUS: "AnalysisResult" = "suspicious"
+UNKNOWN: "AnalysisResult" = "unknown"
+AnalysisConclusions: "t.Set[AnalysisResult]" = set(t.get_args(AnalysisResult))
+# These defined UUIDv5 namespaces are necessary to support the goal of semantic equivalence of some bounty objects.
+# See: ``polyswarm_engine.bounty._forge_bounty_uuid``
+BOUNTY_UUID = uuid.UUID("fafee1eb-ee7d-4b31-bee5-1547bd26c731")
+FILE_BOUNTY_UUID = uuid.uuid5(BOUNTY_UUID, FILE_ARTIFACT)
+URL_BOUNTY_UUID = uuid.uuid5(BOUNTY_UUID, URL_ARTIFACT)
+SKIPPED_COMMENT = "SKIPPED"
+SKIPPED_ENCRYPTED_COMMENT = f"{SKIPPED_COMMENT}:ENCRYPTED"
+SKIPPED_HIGHCOMPRESSION_COMMENT = f"{SKIPPED_COMMENT}:DECOMPRESSION-UNSAFE"
+SKIPPED_UNSUPPORTED_COMMENT = f"{SKIPPED_COMMENT}:TYPE-UNSUPPORTED"
+SKIPPED_CANNOT_FETCH_COMMENT = f'{SKIPPED_COMMENT}:CANNOT-FETCH'
+EICAR_CONTENT = base64.b64decode(
+    b'WDVPIVAlQEFQWzRcUFpYNTQoUF4pN0NDKTd9JEVJQ0FSLVNUQU5EQVJELUFOVElWSVJVUy1URVNULUZJTEUhJEgrSCo='
+)
+# For easing the bid maths
+NCT_TO_WEI_CONVERSION = 10**18

polyswarm_engine/engine.py ADDED Viewed

@@ -0,0 +1,123 @@
+from __future__ import annotations
+import contextlib
+import logging
+from polyswarm_engine.backend import CeleryBackend
+from polyswarm_engine.cli import engine_cli
+from polyswarm_engine.command import CommandRegistry
+logger = logging.getLogger(__name__)
+class EngineManager:
+    def __init__(self, name, vendor=None, config=None, backend_kwargs: dict = None, **kwargs):
+        self.name: str = name
+        self.vendor: str = vendor
+        self.config = config or dict()
+        self.ctx = dict()
+        self.backend: CeleryBackend|None = None
+        self.backend_kwargs = dict()
+        self.cmd = CommandRegistry()
+        # in case a lifecycle is not defined, use a nop context manager
+        self._lifecycle = lambda: contextlib.nullcontext()
+        self._head = None
+        self._analyze = None
+    def cli(self):
+        engine_cli(prog_name=self.name, obj=self)
+    @contextlib.contextmanager
+    def create_backend(self):
+        """
+        Start with backend
+        Example
+        -------
+            >>> with Engine.create_backend() as backend:
+            >>>    ...
+        """
+        self.backend = CeleryBackend(
+            self.name,
+            self._analyze,
+            self._head,
+            self._lifecycle,
+            **self.backend_kwargs,
+        )
+        with self.backend.run() as backend:
+            yield backend
+        self.backend = None
+    def expose_command(self, func: "EngineCommandCallable"):
+        """Decorate to expose an internal engine function"""
+        self.cmd._add(func)
+        return func
+    def register_analyzer(self, func: "EngineAnalyzeCallable"):
+        """Decorator used to register this engine's analyzer function
+        Example::
+            @engine.register_analyzer
+            def analyze(bounty: polyswarm_engine.Bounty) -> polyswarm_engine.Analysis:
+                result = engine.cmd.scan_stream(get_artifact_stream(bounty))
+                analysis = {"verdict": polyswarm_engine.UNKNOWN}
+                if result["is_malicious"]:
+                    analysis["verdict"] = polyswarm_engine.MALICIOUS
+                if "result_name" in result:
+                    analysis["metadata"] = {"malware_family": result["result_name"]}
+                return analysis
+        """
+        self._analyze = func
+        return func
+    def register_head(self, func: "EngineHeadCallable"):
+        """Decorator used to gather engine metadata at startup
+        Notes::
+        This should decorate a function that gathers any data you'd
+        like to include with your analyses, but which isn't produced
+        as part of the scanning process such as:
+            - Engine version
+            - Signature version
+            - Current environment
+        Example::
+            @engine.register_head
+            def head() -> polyswarm_engine.AnalysisMetadata:
+                info = engine.cmd.info()
+                return {
+                    "product": info["productName"],
+                    "scanner": {
+                        "vendor_version": info["productVersion"],
+                        "signatures_version": info["vbaseVersion"],
+                    }
+                }
+        """
+        self._head = func
+        return func
+    def register_lifecycle_manager(self, func: "EngineLifecycleCallable"):
+        """Wraps a function acting as a engine lifecycle ContextManager
+        Example::
+            @engine.register_lifecycle_manager
+            def lifecycle(Engine):
+                pid = run([Engine.config["DAEMON"], "--start"]) # Setup worker
+                yield
+                terminate(pid) # Worker has terminated, run cleanup code...
+        """
+        self._lifecycle = contextlib.contextmanager(func)
+        return func
+__all__ = ["EngineManager"]

polyswarm_engine/exceptions.py ADDED Viewed

@@ -0,0 +1,41 @@
+class EngineException(Exception):
+    pass
+class EngineTimeoutError(EngineException):
+    pass
+class EngineFileNotFoundError(EngineException):
+    pass
+class EngineWorkerInterrupt(EngineException):
+    """ An exception that is not KeyboardInterrupt to allow subprocesses
+        to be interrupted.
+    """
+    pass
+class EnginePollingException(EngineException):
+    """Base exception that stores all return values of attempted polls"""
+    def __init__(self, last=None):
+        self.last = last
+class EngineExpiredException(EnginePollingException):
+    """Exception raised if polling function times out"""
+class EngineMaxCallException(EnginePollingException):
+    """Exception raised if maximum number of iterations is exceeded"""
+class BountyException(Exception):
+    """Bounty had problems"""
+class BountyFetchException(Exception):
+    """Bounty artifact could not be fetched"""

polyswarm_engine/log_config.py ADDED Viewed

@@ -0,0 +1,104 @@
+from datetime import datetime, timezone as tz
+import logging
+from polyswarm_engine.settings import LOG_LEVEL, LOG_FORMAT
+try:
+    from pythonjsonlogger import jsonlogger
+except ImportError:
+    jsonlogger = None
+else:
+    class JSONFormatter(jsonlogger.JsonFormatter):
+        """
+        Class to add custom JSON fields to our logger.
+        Presently just adds a timestamp if one isn't present and the log level.
+        INFO: https://github.com/madzak/python-json-logger#customizing-fields
+        """
+        def add_fields(self, log_record, record, message_dict):
+            super(JSONFormatter, self).add_fields(log_record, record, message_dict)
+            if not log_record.get('timestamp'):
+                # this doesn't use record.created, so it is slightly off
+                now = datetime.now(tz.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ')
+                log_record['timestamp'] = now
+            if log_record.get('level'):
+                log_record['level'] = log_record['level'].upper()
+            else:
+                log_record['level'] = record.levelname
+try:
+    import click
+    import click_log
+except ImportError:
+    click_log = None
+else:
+    # adding color to INFO log messages as well
+    click_log.core.ColorFormatter.colors['info'] = dict(fg='green')
+    class NamedColorFormatter(logging.Formatter):
+        colors = {
+            'error': dict(fg='red'),
+            'exception': dict(fg='red'),
+            'critical': dict(fg='red'),
+            'debug': dict(fg='blue'),
+            'warning': dict(fg='yellow'),
+            'info': dict(fg='green'),
+        }
+        def format(self, record):
+            if not record.exc_info:
+                level = record.levelname.lower()
+                msg = logging.Formatter.format(self, record)
+                if level in self.colors:
+                    sopts = self.colors[level]
+                    lines = msg.splitlines()
+                    msg = '\n'.join(click.style(x, **sopts) for x in lines)  # type: ignore
+                return msg
+            return logging.Formatter.format(self, record)
+def get_logging(log_level=None, handler='console'):
+    log_level = log_level or LOG_LEVEL
+    return {
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            'text': {
+                'format': '%(asctime)s - %(levelname)-2s [%(filename)s:%(lineno)d][%(funcName)1s] %(message)s',
+            },
+            'json': {
+                'format': '%(asctime)s %(levelname) %(message) %(filename) %(lineno) %(funcName)',
+                'class': 'polyswarm_engine.log_config.JSONFormatter',
+            },
+            'click': {
+                'format': '%(asctime)s - %(levelname)-2s [%(filename)s:%(lineno)d][%(funcName)1s] %(message)s',
+                'class': 'polyswarm_engine.log_config.NamedColorFormatter',
+            },
+        },
+        'handlers': {
+            'console': {
+                'level': log_level,
+                'class': 'logging.StreamHandler',
+                'formatter': LOG_FORMAT,
+            },
+            'click': {
+                'level': log_level,
+                'class': 'click_log.core.ClickHandler',
+                'formatter': 'click',
+            },
+        },
+        'loggers': {
+            'polyswarm_engine': {
+                'level': log_level,
+            },
+            'celery': {
+                'level': log_level,
+            },
+        },
+        'root': {
+            'handlers': [handler],
+            'level': log_level,
+        }
+    }

polyswarm_engine/py.typed ADDED Viewed

File without changes