classifyre-cli 0.4.2__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- classifyre_cli-0.4.3/.turbo/turbo-build.log +3 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/PKG-INFO +1 -1
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/package.json +1 -1
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/pyproject.toml +1 -1
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/detector.py +6 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/secrets/detector.py +3 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/threat/code_security_detector.py +3 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/threat/yara_detector.py +8 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/main.py +81 -75
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/models/generated_input.py +3 -9
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/pipeline/detector_pipeline.py +333 -187
- classifyre_cli-0.4.3/src/pipeline/worker_pool.py +287 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/azure_blob_storage/source.py +3 -6
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/confluence/source.py +0 -7
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/google_cloud_storage/source.py +2 -8
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/jira/source.py +0 -7
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/object_storage/base.py +3 -16
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/s3_compatible_storage/source.py +3 -10
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/servicedesk/source.py +0 -7
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/pipeline/test_detector_pipeline.py +8 -8
- classifyre_cli-0.4.3/tests/pipeline/test_worker_pool.py +479 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_azure_blob_storage_source.py +0 -1
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_google_cloud_storage_source.py +0 -1
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_s3_compatible_storage_source.py +2 -5
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/uv.lock +6 -2
- classifyre_cli-0.4.2/.turbo/turbo-build.log +0 -3
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/.gitignore +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/.python-version +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/README.md +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/main.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/scripts/generate_models.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/base.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/broken_links/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/broken_links/detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/config.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/content/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_base.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_factory.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_feature_extraction.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_gliner2.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_image_classification.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_llm.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_object_detection.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_regex.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/runners/_text_classification.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/custom/trainer.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/dependencies.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/pii/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/pii/detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/secrets/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/threat/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/models/generated_detectors.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/models/generated_single_asset_scan_results.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/outputs/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/outputs/base.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/outputs/console.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/outputs/factory.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/outputs/file.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/outputs/rest.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/pipeline/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/pipeline/content_provider.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/pipeline/parsed_content_provider.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sandbox/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sandbox/runner.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/atlassian_common.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/azure_blob_storage/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/base.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/confluence/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/databricks/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/databricks/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/dependencies.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/google_cloud_storage/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/hive/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/hive/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/jira/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/mongodb/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/mongodb/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/mssql/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/mssql/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/mysql/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/mysql/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/neo4j/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/neo4j/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/oracle/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/oracle/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/postgresql/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/postgresql/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/powerbi/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/powerbi/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/recipe_normalizer.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/s3_compatible_storage/README.md +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/s3_compatible_storage/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/servicedesk/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/slack/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/slack/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/snowflake/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/snowflake/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/tableau/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/tableau/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/tabular_utils.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/wordpress/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/sources/wordpress/source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/telemetry.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/utils/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/utils/content_extraction.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/utils/file_parser.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/utils/hashing.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/utils/uv_sync.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/utils/validation.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/conftest.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/broken_links/test_broken_links_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/conftest.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/content/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/custom/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/custom/conftest.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/custom/test_invoice_extraction.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/custom/test_pipeline_integration.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/custom/test_regex_runner.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/custom/test_transformer_runners.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/pii/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/pii/conftest.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/pii/sample_invoice.pdf +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/pii/test_pii_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/pii/test_pii_detector_extended.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/secrets/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/secrets/test_secrets_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/secrets/test_secrets_detector_extended.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_base_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_custom_detector_examples_runtime.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_detector_catalog_commercial.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_detector_pipeline_types.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_detector_schema_examples.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_detector_types.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_phase2_detectors.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/test_registry.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/threat/__init__.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/threat/test_code_security_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/detectors/threat/test_yara_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/integration/test_wordpress_broken_links_detector.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/integration/test_wordpress_links_assets.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_base_source_attachment.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_base_source_sampling.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_confluence_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_databricks_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_hashing.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_hive_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_jira_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_mongodb_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_mssql_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_mysql_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_neo4j_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_oracle_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_outputs.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_postgresql_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_powerbi_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_recipe_normalizer.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_servicedesk_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_slack_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_snowflake_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_source_dependency_groups.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_tableau_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_tabular_utils.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/test_wordpress_source.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/utils/test_content_extraction.py +0 -0
- {classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/tests/utils/test_file_parser.py +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import asyncio
|
|
5
6
|
import logging
|
|
6
7
|
|
|
7
8
|
from ...models.generated_detectors import (
|
|
@@ -34,6 +35,11 @@ class CustomDetector(BaseDetector):
|
|
|
34
35
|
|
|
35
36
|
async def detect(
|
|
36
37
|
self, content: str | bytes, content_type: str = "text/plain"
|
|
38
|
+
) -> list[DetectionResult]:
|
|
39
|
+
return await asyncio.to_thread(self._detect_sync, content, content_type)
|
|
40
|
+
|
|
41
|
+
def _detect_sync(
|
|
42
|
+
self, content: str | bytes, content_type: str = "text/plain"
|
|
37
43
|
) -> list[DetectionResult]:
|
|
38
44
|
findings = self._runner.detect(content, content_type)
|
|
39
45
|
max_findings = self.custom_config.max_findings
|
|
@@ -5,6 +5,7 @@ plugin's ``analyze_line`` directly. No temp files, no global Settings state,
|
|
|
5
5
|
and no ``SecretsCollection`` needed.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import asyncio
|
|
8
9
|
import importlib
|
|
9
10
|
import logging
|
|
10
11
|
import pkgutil
|
|
@@ -304,7 +305,9 @@ class SecretsDetector(BaseDetector):
|
|
|
304
305
|
len(content),
|
|
305
306
|
)
|
|
306
307
|
return []
|
|
308
|
+
return await asyncio.to_thread(self._detect_sync, content)
|
|
307
309
|
|
|
310
|
+
def _detect_sync(self, content: str) -> list[DetectionResult]:
|
|
308
311
|
plugins = self._build_plugins()
|
|
309
312
|
if not plugins:
|
|
310
313
|
return []
|
{classifyre_cli-0.4.2 → classifyre_cli-0.4.3}/src/detectors/threat/code_security_detector.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Code security detector using Bandit static analysis."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import subprocess
|
|
@@ -137,7 +138,9 @@ class CodeSecurityDetector(BaseDetector):
|
|
|
137
138
|
return []
|
|
138
139
|
if not content.strip():
|
|
139
140
|
return []
|
|
141
|
+
return await asyncio.to_thread(self._detect_sync, content)
|
|
140
142
|
|
|
143
|
+
def _detect_sync(self, content: str) -> list[DetectionResult]:
|
|
141
144
|
threshold = self._cfg.confidence_threshold or 0.7
|
|
142
145
|
max_findings = self._cfg.max_findings or 25
|
|
143
146
|
findings: list[DetectionResult] = []
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""YARA-based threat detector — compiles structured rule objects into a live ruleset."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import logging
|
|
4
5
|
import re
|
|
5
6
|
|
|
@@ -89,6 +90,13 @@ class YaraDetector(BaseDetector):
|
|
|
89
90
|
|
|
90
91
|
async def detect(
|
|
91
92
|
self, content: str | bytes, content_type: str = "text/plain"
|
|
93
|
+
) -> list[DetectionResult]:
|
|
94
|
+
if self._rules is None:
|
|
95
|
+
return []
|
|
96
|
+
return await asyncio.to_thread(self._detect_sync, content, content_type)
|
|
97
|
+
|
|
98
|
+
def _detect_sync(
|
|
99
|
+
self, content: str | bytes, content_type: str = "text/plain"
|
|
92
100
|
) -> list[DetectionResult]:
|
|
93
101
|
if self._rules is None:
|
|
94
102
|
return []
|
|
@@ -179,15 +179,32 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
|
|
|
179
179
|
sink_started = True
|
|
180
180
|
|
|
181
181
|
from .pipeline.detector_pipeline import DetectorPipeline
|
|
182
|
+
from .pipeline.worker_pool import (
|
|
183
|
+
DetectorWorkerPool,
|
|
184
|
+
compute_pool_workers,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
pool_workers = compute_pool_workers(
|
|
188
|
+
override=args.max_pool_workers,
|
|
189
|
+
)
|
|
190
|
+
worker_pool: DetectorWorkerPool | None = None
|
|
182
191
|
|
|
183
192
|
pipeline = DetectorPipeline.from_recipe(
|
|
184
193
|
recipe,
|
|
185
194
|
source,
|
|
186
195
|
runner_id,
|
|
187
|
-
max_concurrent_assets=args.detector_max_concurrent,
|
|
188
196
|
)
|
|
189
197
|
has_detectors = bool(pipeline.detectors)
|
|
190
198
|
|
|
199
|
+
if has_detectors:
|
|
200
|
+
worker_pool = DetectorWorkerPool(max_workers=pool_workers)
|
|
201
|
+
pipeline = DetectorPipeline.from_recipe(
|
|
202
|
+
recipe,
|
|
203
|
+
source,
|
|
204
|
+
runner_id,
|
|
205
|
+
worker_pool=worker_pool,
|
|
206
|
+
)
|
|
207
|
+
|
|
191
208
|
# --- Phase 1: Discovery ---
|
|
192
209
|
source.set_discovery_only(True)
|
|
193
210
|
all_stubs: list[Any] = []
|
|
@@ -225,74 +242,75 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
|
|
|
225
242
|
if has_detectors and all_stubs:
|
|
226
243
|
import asyncio as _asyncio
|
|
227
244
|
|
|
228
|
-
workers = args.processing_workers
|
|
229
|
-
semaphore = _asyncio.Semaphore(workers)
|
|
230
245
|
processed_count = 0
|
|
246
|
+
logger.info(
|
|
247
|
+
"Phase 2 starting: %d assets, pool_workers=%s",
|
|
248
|
+
len(all_stubs),
|
|
249
|
+
worker_pool.max_workers if worker_pool else "none",
|
|
250
|
+
)
|
|
231
251
|
error_count = 0
|
|
232
252
|
|
|
233
253
|
async def _process_one(asset: Any) -> None:
|
|
234
254
|
nonlocal processed_count, error_count
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
]
|
|
250
|
-
await sink.emit_batch([stub_payload], skip_findings=False)
|
|
251
|
-
if hasattr(sink, "update_asset_status"):
|
|
252
|
-
f_total, f_by_sev, f_by_det = _compute_findings_counts(
|
|
253
|
-
partial
|
|
254
|
-
)
|
|
255
|
-
await sink.update_asset_status(
|
|
256
|
-
asset_hash,
|
|
257
|
-
"PROCESSING",
|
|
258
|
-
findings_total=f_total,
|
|
259
|
-
findings_by_severity=f_by_sev,
|
|
260
|
-
findings_by_detector=f_by_det,
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
result = await pipeline.process_single_asset(
|
|
264
|
-
asset,
|
|
265
|
-
on_findings_flushed=_on_findings_flushed,
|
|
266
|
-
findings_flush_size=args.detector_flush_batch_size,
|
|
267
|
-
)
|
|
268
|
-
payload = _asset_to_payload(result)
|
|
269
|
-
await sink.emit_batch([payload], skip_findings=False)
|
|
270
|
-
|
|
255
|
+
asset_hash = getattr(asset, "hash", None) or ""
|
|
256
|
+
try:
|
|
257
|
+
if hasattr(sink, "update_asset_status"):
|
|
258
|
+
await sink.update_asset_status(asset_hash, "PROCESSING")
|
|
259
|
+
|
|
260
|
+
async def _on_findings_flushed(partial: list[Any]) -> None:
|
|
261
|
+
stub_payload = _asset_to_payload(asset)
|
|
262
|
+
stub_payload["findings"] = [
|
|
263
|
+
f.model_dump(mode="json", exclude_none=True)
|
|
264
|
+
if hasattr(f, "model_dump")
|
|
265
|
+
else f
|
|
266
|
+
for f in partial
|
|
267
|
+
]
|
|
268
|
+
await sink.emit_batch([stub_payload], skip_findings=False)
|
|
271
269
|
if hasattr(sink, "update_asset_status"):
|
|
272
270
|
f_total, f_by_sev, f_by_det = _compute_findings_counts(
|
|
273
|
-
|
|
271
|
+
partial
|
|
274
272
|
)
|
|
275
273
|
await sink.update_asset_status(
|
|
276
274
|
asset_hash,
|
|
277
|
-
"
|
|
275
|
+
"PROCESSING",
|
|
278
276
|
findings_total=f_total,
|
|
279
277
|
findings_by_severity=f_by_sev,
|
|
280
278
|
findings_by_detector=f_by_det,
|
|
281
279
|
)
|
|
282
280
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
281
|
+
result = await pipeline.process_single_asset(
|
|
282
|
+
asset,
|
|
283
|
+
on_findings_flushed=_on_findings_flushed,
|
|
284
|
+
findings_flush_size=args.detector_flush_batch_size,
|
|
285
|
+
)
|
|
286
|
+
payload = _asset_to_payload(result)
|
|
287
|
+
await sink.emit_batch([payload], skip_findings=False)
|
|
288
|
+
|
|
289
|
+
if hasattr(sink, "update_asset_status"):
|
|
290
|
+
f_total, f_by_sev, f_by_det = _compute_findings_counts(
|
|
291
|
+
result.findings or []
|
|
292
|
+
)
|
|
293
|
+
await sink.update_asset_status(
|
|
294
|
+
asset_hash,
|
|
295
|
+
"PROCESSED",
|
|
296
|
+
findings_total=f_total,
|
|
297
|
+
findings_by_severity=f_by_sev,
|
|
298
|
+
findings_by_detector=f_by_det,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
source.evict_asset_cache(asset_hash)
|
|
302
|
+
processed_count += 1
|
|
303
|
+
except Exception as exc:
|
|
304
|
+
error_count += 1
|
|
305
|
+
logger.error("Asset %s failed: %s", asset_hash, exc)
|
|
306
|
+
if hasattr(sink, "update_asset_status"):
|
|
307
|
+
try:
|
|
308
|
+
error_msg = str(exc) or type(exc).__name__
|
|
309
|
+
await sink.update_asset_status(
|
|
310
|
+
asset_hash, "ERROR", error_msg
|
|
311
|
+
)
|
|
312
|
+
except Exception:
|
|
313
|
+
pass
|
|
296
314
|
|
|
297
315
|
tasks = [_asyncio.create_task(_process_one(a)) for a in all_stubs]
|
|
298
316
|
await _asyncio.gather(*tasks, return_exceptions=True)
|
|
@@ -340,6 +358,9 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
|
|
|
340
358
|
"Failed to mark sink failure: %s", sink_error, exc_info=True
|
|
341
359
|
)
|
|
342
360
|
raise
|
|
361
|
+
finally:
|
|
362
|
+
if worker_pool is not None:
|
|
363
|
+
worker_pool.shutdown(wait=True)
|
|
343
364
|
|
|
344
365
|
except Exception as e:
|
|
345
366
|
logger.debug("Traceback for %s failure:", args.command, exc_info=True)
|
|
@@ -527,16 +548,10 @@ def main() -> None:
|
|
|
527
548
|
help="How many detector-processed assets to accumulate before pushing findings to the API (default: 5, env: CLASSIFYRE_DETECTOR_FLUSH_BATCH_SIZE)",
|
|
528
549
|
)
|
|
529
550
|
parser.add_argument(
|
|
530
|
-
"--
|
|
531
|
-
type=int,
|
|
532
|
-
default=None,
|
|
533
|
-
help="Max assets processed in parallel by the detector pipeline (default: 10, env: CLASSIFYRE_DETECTOR_MAX_CONCURRENT)",
|
|
534
|
-
)
|
|
535
|
-
parser.add_argument(
|
|
536
|
-
"--processing-workers",
|
|
551
|
+
"--max-pool-workers",
|
|
537
552
|
type=int,
|
|
538
553
|
default=None,
|
|
539
|
-
help="
|
|
554
|
+
help="Max OS processes in the detector pool. Auto-sized from CPU/memory when omitted (env: CLASSIFYRE_MAX_POOL_WORKERS)",
|
|
540
555
|
)
|
|
541
556
|
|
|
542
557
|
args = parser.parse_args()
|
|
@@ -549,21 +564,12 @@ def main() -> None:
|
|
|
549
564
|
args.detector_flush_batch_size = 5
|
|
550
565
|
args.detector_flush_batch_size = max(args.detector_flush_batch_size, 1)
|
|
551
566
|
|
|
552
|
-
if args.
|
|
553
|
-
env_val = os.environ.get("
|
|
554
|
-
try:
|
|
555
|
-
args.detector_max_concurrent = int(env_val) if env_val else 10
|
|
556
|
-
except ValueError:
|
|
557
|
-
args.detector_max_concurrent = 10
|
|
558
|
-
args.detector_max_concurrent = max(args.detector_max_concurrent, 1)
|
|
559
|
-
|
|
560
|
-
if args.processing_workers is None:
|
|
561
|
-
env_val = os.environ.get("CLASSIFYRE_PROCESSING_WORKERS")
|
|
567
|
+
if args.max_pool_workers is None:
|
|
568
|
+
env_val = os.environ.get("CLASSIFYRE_MAX_POOL_WORKERS")
|
|
562
569
|
try:
|
|
563
|
-
args.
|
|
570
|
+
args.max_pool_workers = int(env_val) if env_val else None
|
|
564
571
|
except ValueError:
|
|
565
|
-
args.
|
|
566
|
-
args.processing_workers = max(args.processing_workers, 1)
|
|
572
|
+
args.max_pool_workers = None
|
|
567
573
|
|
|
568
574
|
if args.debug:
|
|
569
575
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
@@ -163,17 +163,11 @@ class ResourceOverrides(BaseModel):
|
|
|
163
163
|
ge=60,
|
|
164
164
|
le=86400,
|
|
165
165
|
)
|
|
166
|
-
|
|
166
|
+
max_pool_workers: int | None = Field(
|
|
167
167
|
None,
|
|
168
|
-
description='
|
|
168
|
+
description='Max OS processes in the detector pool. Auto-sized from CPU/memory limits when omitted.',
|
|
169
169
|
ge=1,
|
|
170
|
-
le=
|
|
171
|
-
)
|
|
172
|
-
detector_max_concurrent: int | None = Field(
|
|
173
|
-
None,
|
|
174
|
-
description='Max concurrent detector invocations across all pages (default: 5)',
|
|
175
|
-
ge=1,
|
|
176
|
-
le=50,
|
|
170
|
+
le=16,
|
|
177
171
|
)
|
|
178
172
|
|
|
179
173
|
|