classifyre-cli 0.4.14__tar.gz → 0.4.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/.turbo/turbo-build.log +1 -1
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/PKG-INFO +1 -1
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/package.json +1 -1
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/pyproject.toml +17 -1
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_object_detection.py +1 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/main.py +31 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/models/generated_input.py +103 -3
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/rest.py +49 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/base.py +13 -0
- classifyre_cli-0.4.16/src/sources/email/__init__.py +3 -0
- classifyre_cli-0.4.16/src/sources/email/source.py +555 -0
- classifyre_cli-0.4.16/src/utils/dependency_groups.py +68 -0
- classifyre_cli-0.4.16/src/utils/uv_sync.py +259 -0
- classifyre_cli-0.4.16/tests/test_dependency_groups.py +67 -0
- classifyre_cli-0.4.16/tests/test_email_source.py +260 -0
- classifyre_cli-0.4.16/tests/test_uv_sync.py +132 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/uv.lock +577 -590
- classifyre_cli-0.4.14/src/utils/uv_sync.py +0 -79
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/.gitignore +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/.python-version +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/README.md +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/main.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/scripts/generate_models.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/base.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/broken_links/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/broken_links/detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/config.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/content/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/extractor.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_base.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_factory.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_feature_extraction.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_gliner2.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_image_classification.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_llm.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_regex.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_text_classification.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/trainer.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/dependencies.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/pii/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/pii/detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/secrets/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/secrets/detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/threat/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/threat/code_security_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/threat/yara_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/models/generated_detectors.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/models/generated_single_asset_scan_results.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/base.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/console.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/factory.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/file.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/content_provider.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/detector_pipeline.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/parsed_content_provider.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/worker_pool.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sandbox/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sandbox/runner.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/asset_metadata.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/atlassian_common.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/azure_blob_storage/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/azure_blob_storage/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/confluence/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/confluence/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/databricks/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/databricks/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/dependencies.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/google_cloud_storage/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/google_cloud_storage/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/hive/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/hive/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/jira/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/jira/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mongodb/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mongodb/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mssql/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mssql/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mysql/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mysql/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/neo4j/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/neo4j/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/notion/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/notion/client.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/notion/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/object_storage/base.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/oracle/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/oracle/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/postgresql/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/postgresql/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/powerbi/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/powerbi/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/recipe_normalizer.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/s3_compatible_storage/README.md +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/s3_compatible_storage/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/s3_compatible_storage/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/servicedesk/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/servicedesk/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/slack/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/slack/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/snowflake/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/snowflake/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/sqlite/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/sqlite/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tableau/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tableau/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tabular_base.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tabular_utils.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/wordpress/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/wordpress/source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/telemetry.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/content_extraction.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/embedded_images.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/file_metadata.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/file_parser.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/file_to_images.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/hashing.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/validation.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/conftest.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/broken_links/test_broken_links_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/conftest.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/content/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/conftest.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_invoice_extraction.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_llm_runner.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_pipeline_integration.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_regex_runner.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_transformer_runners.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/conftest.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/sample_invoice.pdf +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/test_pii_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/test_pii_detector_extended.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/secrets/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/secrets/test_secrets_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/secrets/test_secrets_detector_extended.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_base_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_custom_detector_examples_runtime.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_catalog_commercial.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_pipeline_types.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_schema_examples.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_types.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_phase2_detectors.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_registry.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/threat/__init__.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/threat/test_code_security_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/threat/test_yara_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/integration/test_wordpress_broken_links_detector.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/integration/test_wordpress_links_assets.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/pipeline/test_detector_pipeline.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/pipeline/test_worker_pool.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_assets_metadata_catalog.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_azure_blob_storage_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_base_source_attachment.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_base_source_sampling.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_confluence_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_custom_extractor.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_databricks_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_google_cloud_storage_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_hashing.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_hive_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_jira_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_mongodb_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_mssql_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_mysql_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_neo4j_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_notion_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_oracle_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_outputs.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_postgresql_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_powerbi_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_recipe_normalizer.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_s3_compatible_storage_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_sandbox_runner.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_servicedesk_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_slack_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_snowflake_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_source_dependency_groups.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_sqlite_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_tableau_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_tabular_utils.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_wordpress_source.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_content_extraction.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_embedded_images.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_file_metadata.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_file_parser.py +0 -0
- {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_file_to_images.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "classifyre-cli"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.16"
|
|
4
4
|
description = "Classifyre CLI — scan and classify unstructured data sources"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -21,6 +21,18 @@ dependencies = [
|
|
|
21
21
|
# Local editable install for development — ignored when published to PyPI
|
|
22
22
|
classifyre-schemas = { path = "../../packages/schemas", editable = true }
|
|
23
23
|
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
|
|
24
|
+
# CPU-only torch on Linux. The default PyPI Linux wheels bundle the full NVIDIA
|
|
25
|
+
# CUDA stack (~5-7 GB) which is dead weight on our CPU-only CLI jobs (no GPU
|
|
26
|
+
# requested). Pull torch/torchvision from the PyTorch CPU index on Linux so the
|
|
27
|
+
# nvidia-* packages drop out entirely. macOS/other platforms already use CPU
|
|
28
|
+
# wheels from PyPI. (A GPU image can override this in the future.)
|
|
29
|
+
torch = [{ index = "pytorch-cpu", marker = "sys_platform == 'linux'" }]
|
|
30
|
+
torchvision = [{ index = "pytorch-cpu", marker = "sys_platform == 'linux'" }]
|
|
31
|
+
|
|
32
|
+
[[tool.uv.index]]
|
|
33
|
+
name = "pytorch-cpu"
|
|
34
|
+
url = "https://download.pytorch.org/whl/cpu"
|
|
35
|
+
explicit = true
|
|
24
36
|
|
|
25
37
|
[project.scripts]
|
|
26
38
|
classifyre = "src.main:main"
|
|
@@ -82,6 +94,7 @@ custom = [
|
|
|
82
94
|
"transformers>=4.50.0,<6.0.0",
|
|
83
95
|
"torch>=2.12.0",
|
|
84
96
|
"pillow>=12.2.0",
|
|
97
|
+
"timm>=1.0.27",
|
|
85
98
|
"setfit>=1.1.3",
|
|
86
99
|
"datasets>=4.8.5",
|
|
87
100
|
"scikit-learn>=1.7.2",
|
|
@@ -167,6 +180,9 @@ azure-blob-storage = [
|
|
|
167
180
|
google-cloud-storage = [
|
|
168
181
|
"google-cloud-storage>=3.10.1,<4.0.0",
|
|
169
182
|
]
|
|
183
|
+
email = [
|
|
184
|
+
"imap-tools>=1.10.0,<2.0.0",
|
|
185
|
+
]
|
|
170
186
|
otel = [
|
|
171
187
|
"opentelemetry-sdk>=1.42.0",
|
|
172
188
|
"opentelemetry-exporter-otlp-proto-http>=1.27.0",
|
|
@@ -197,6 +197,27 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
|
|
|
197
197
|
has_detectors = bool(pipeline.detectors)
|
|
198
198
|
|
|
199
199
|
if has_detectors:
|
|
200
|
+
# Warm this run's optional dependency groups once, here in
|
|
201
|
+
# the parent process, before the worker pool spawns — so the
|
|
202
|
+
# pool's worker processes don't each race on their own
|
|
203
|
+
# `uv sync` against the shared venv. Best-effort: the
|
|
204
|
+
# lock-protected require_module path remains the safety net.
|
|
205
|
+
from .utils.dependency_groups import recipe_uv_groups
|
|
206
|
+
from .utils.uv_sync import warm_groups
|
|
207
|
+
|
|
208
|
+
warm = recipe_uv_groups(recipe)
|
|
209
|
+
if warm:
|
|
210
|
+
logger.info(
|
|
211
|
+
"Warming optional dependency groups: %s",
|
|
212
|
+
", ".join(sorted(warm)),
|
|
213
|
+
)
|
|
214
|
+
warm_ok, warm_detail = warm_groups(warm)
|
|
215
|
+
if not warm_ok:
|
|
216
|
+
logger.warning(
|
|
217
|
+
"Dependency warm-up incomplete (workers will retry): %s",
|
|
218
|
+
warm_detail,
|
|
219
|
+
)
|
|
220
|
+
|
|
200
221
|
worker_pool = DetectorWorkerPool(max_workers=pool_workers)
|
|
201
222
|
pipeline = DetectorPipeline.from_recipe(
|
|
202
223
|
recipe,
|
|
@@ -351,6 +372,16 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
|
|
|
351
372
|
total_assets,
|
|
352
373
|
output_batch_count,
|
|
353
374
|
)
|
|
375
|
+
|
|
376
|
+
# Phase 1: emit source-derived relationship edges (best-effort).
|
|
377
|
+
if hasattr(source, "collect_relationships") and hasattr(sink, "emit_edges"):
|
|
378
|
+
try:
|
|
379
|
+
edges = await source.collect_relationships()
|
|
380
|
+
if edges:
|
|
381
|
+
await sink.emit_edges(edges)
|
|
382
|
+
logger.info("Emitted %d source-derived relationship edges", len(edges))
|
|
383
|
+
except Exception as rel_error:
|
|
384
|
+
logger.warning("Relationship emission failed (non-fatal): %s", rel_error)
|
|
354
385
|
except Exception as extraction_error:
|
|
355
386
|
if _is_timeout_error(extraction_error):
|
|
356
387
|
logger.warning(
|
|
@@ -43,6 +43,7 @@ class AssetType(StrEnum):
|
|
|
43
43
|
SERVICEDESK = 'SERVICEDESK'
|
|
44
44
|
SQLITE = 'SQLITE'
|
|
45
45
|
NOTION = 'NOTION'
|
|
46
|
+
EMAIL = 'EMAIL'
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class SourceCategory(StrEnum):
|
|
@@ -238,6 +239,85 @@ class WordPressOptional(BaseModel):
|
|
|
238
239
|
content: WordPressOptionalContent | None = None
|
|
239
240
|
|
|
240
241
|
|
|
242
|
+
class EmailRequired(BaseModel):
|
|
243
|
+
model_config = ConfigDict(
|
|
244
|
+
extra='forbid',
|
|
245
|
+
)
|
|
246
|
+
host: str = Field(
|
|
247
|
+
..., description='IMAP server host (e.g. imap.gmail.com, outlook.office365.com)'
|
|
248
|
+
)
|
|
249
|
+
port: int | None = Field(
|
|
250
|
+
993, description='IMAP server port (993 for IMAPS)', ge=1, le=65535
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class EmailMasked(BaseModel):
|
|
255
|
+
model_config = ConfigDict(
|
|
256
|
+
extra='forbid',
|
|
257
|
+
)
|
|
258
|
+
username: str = Field(
|
|
259
|
+
..., description='Mailbox login (usually the full email address)'
|
|
260
|
+
)
|
|
261
|
+
password: str = Field(
|
|
262
|
+
...,
|
|
263
|
+
description='Account or app-specific password (use an app password for Gmail/Outlook)',
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class EmailOptionalConnection(BaseModel):
|
|
268
|
+
"""
|
|
269
|
+
Transport-level connection controls.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
model_config = ConfigDict(
|
|
273
|
+
extra='forbid',
|
|
274
|
+
)
|
|
275
|
+
use_ssl: bool | None = Field(
|
|
276
|
+
True,
|
|
277
|
+
description='Connect over implicit TLS (IMAPS). Disable only for STARTTLS/plain servers.',
|
|
278
|
+
)
|
|
279
|
+
timeout_seconds: int | None = Field(
|
|
280
|
+
30, description='Socket timeout for IMAP operations', ge=1
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class EmailOptionalScope(BaseModel):
|
|
285
|
+
"""
|
|
286
|
+
Which messages and attachments to ingest.
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
model_config = ConfigDict(
|
|
290
|
+
extra='forbid',
|
|
291
|
+
)
|
|
292
|
+
folders: list[str] | None = Field(['INBOX'], description='Mailbox folders to scan')
|
|
293
|
+
since_date: str | None = Field(
|
|
294
|
+
None,
|
|
295
|
+
description='Only fetch messages on/after this date (ISO 8601 date, e.g. 2026-01-01)',
|
|
296
|
+
)
|
|
297
|
+
before_date: str | None = Field(
|
|
298
|
+
None, description='Only fetch messages before this date (ISO 8601 date)'
|
|
299
|
+
)
|
|
300
|
+
unseen_only: bool | None = Field(
|
|
301
|
+
False, description='Only fetch unread (UNSEEN) messages'
|
|
302
|
+
)
|
|
303
|
+
include_attachments: bool | None = Field(
|
|
304
|
+
True, description='Emit attachments as separate assets linked to the email'
|
|
305
|
+
)
|
|
306
|
+
max_attachment_size_bytes: int | None = Field(
|
|
307
|
+
None,
|
|
308
|
+
description='Skip downloading attachment bytes above this size (still emits a metadata-only asset). Unset means no limit.',
|
|
309
|
+
ge=0,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class EmailOptional(BaseModel):
|
|
314
|
+
model_config = ConfigDict(
|
|
315
|
+
extra='forbid',
|
|
316
|
+
)
|
|
317
|
+
connection: EmailOptionalConnection | None = None
|
|
318
|
+
scope: EmailOptionalScope | None = None
|
|
319
|
+
|
|
320
|
+
|
|
241
321
|
class SlackRequired(BaseModel):
|
|
242
322
|
model_config = ConfigDict(
|
|
243
323
|
extra='forbid',
|
|
@@ -1872,6 +1952,7 @@ class Type(StrEnum):
|
|
|
1872
1952
|
SERVICEDESK = 'SERVICEDESK'
|
|
1873
1953
|
SQLITE = 'SQLITE'
|
|
1874
1954
|
NOTION = 'NOTION'
|
|
1955
|
+
EMAIL = 'EMAIL'
|
|
1875
1956
|
|
|
1876
1957
|
|
|
1877
1958
|
class SlackInput(CoreInput):
|
|
@@ -1892,6 +1973,22 @@ class SlackInput(CoreInput):
|
|
|
1892
1973
|
resources: ResourceOverrides | None = None
|
|
1893
1974
|
|
|
1894
1975
|
|
|
1976
|
+
class EmailInput(CoreInput):
|
|
1977
|
+
type: Literal['EMAIL'] = Field('EMAIL', description='Type of the asset or source')
|
|
1978
|
+
required: EmailRequired
|
|
1979
|
+
masked: EmailMasked
|
|
1980
|
+
optional: EmailOptional | None = None
|
|
1981
|
+
detectors: list[Detector] | None = Field(
|
|
1982
|
+
None, description='Detectors to run on ingested content'
|
|
1983
|
+
)
|
|
1984
|
+
custom_detectors: list[CustomDetectorSelection] | None = Field(
|
|
1985
|
+
None,
|
|
1986
|
+
description='Reusable custom detector IDs selected from the custom detector catalog.',
|
|
1987
|
+
)
|
|
1988
|
+
sampling: SamplingConfig
|
|
1989
|
+
resources: ResourceOverrides | None = None
|
|
1990
|
+
|
|
1991
|
+
|
|
1895
1992
|
class S3CompatibleStorageInput(CoreInput):
|
|
1896
1993
|
type: Literal['S3_COMPATIBLE_STORAGE'] = Field(
|
|
1897
1994
|
'S3_COMPATIBLE_STORAGE', description='Type of the asset or source'
|
|
@@ -2331,7 +2428,7 @@ class ConfluenceOptionalConnection(BaseModel):
|
|
|
2331
2428
|
)
|
|
2332
2429
|
|
|
2333
2430
|
|
|
2334
|
-
class
|
|
2431
|
+
class Type17(StrEnum):
|
|
2335
2432
|
"""
|
|
2336
2433
|
Filter spaces by space type
|
|
2337
2434
|
"""
|
|
@@ -2368,7 +2465,7 @@ class ConfluenceOptionalScopeSpaces(BaseModel):
|
|
|
2368
2465
|
keys: list[str] | None = Field(
|
|
2369
2466
|
None, description='Filter spaces by keys (up to 250)', max_length=250
|
|
2370
2467
|
)
|
|
2371
|
-
type:
|
|
2468
|
+
type: Type17 | None = Field(None, description='Filter spaces by space type')
|
|
2372
2469
|
status: Status | None = Field(None, description='Filter spaces by status')
|
|
2373
2470
|
labels: list[str] | None = Field(
|
|
2374
2471
|
None,
|
|
@@ -2634,7 +2731,7 @@ class ServiceDeskOptional(BaseModel):
|
|
|
2634
2731
|
content: ServiceDeskOptionalContent | None = None
|
|
2635
2732
|
|
|
2636
2733
|
|
|
2637
|
-
class
|
|
2734
|
+
class Type18(StrEnum):
|
|
2638
2735
|
"""
|
|
2639
2736
|
Type of the asset or source
|
|
2640
2737
|
"""
|
|
@@ -2660,6 +2757,7 @@ class Type17(StrEnum):
|
|
|
2660
2757
|
SERVICEDESK = 'SERVICEDESK'
|
|
2661
2758
|
SQLITE = 'SQLITE'
|
|
2662
2759
|
NOTION = 'NOTION'
|
|
2760
|
+
EMAIL = 'EMAIL'
|
|
2663
2761
|
|
|
2664
2762
|
|
|
2665
2763
|
class ConfluenceInput(CoreInput):
|
|
@@ -2920,6 +3018,7 @@ class SourceInput(
|
|
|
2920
3018
|
| ServiceDeskInput
|
|
2921
3019
|
| SQLiteInput
|
|
2922
3020
|
| NotionInput
|
|
3021
|
+
| EmailInput
|
|
2923
3022
|
]
|
|
2924
3023
|
):
|
|
2925
3024
|
root: (
|
|
@@ -2944,6 +3043,7 @@ class SourceInput(
|
|
|
2944
3043
|
| ServiceDeskInput
|
|
2945
3044
|
| SQLiteInput
|
|
2946
3045
|
| NotionInput
|
|
3046
|
+
| EmailInput
|
|
2947
3047
|
) = Field(
|
|
2948
3048
|
...,
|
|
2949
3049
|
description='Merged configuration schema with all source types and common definitions',
|
|
@@ -97,6 +97,31 @@ class BulkIngestAssetsRequest(BaseModel):
|
|
|
97
97
|
skip_findings: bool = Field(False, serialization_alias="skipFindings")
|
|
98
98
|
|
|
99
99
|
|
|
100
|
+
class IngestEdge(BaseModel):
|
|
101
|
+
"""A source-derived relationship edge for the investigation graph.
|
|
102
|
+
|
|
103
|
+
Identify endpoints by UUID (from_id / to_id) or by asset hash
|
|
104
|
+
(from_hash / to_hash — the API resolves hashes to UUIDs).
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
108
|
+
|
|
109
|
+
from_type: str = Field(serialization_alias="fromType")
|
|
110
|
+
from_id: str | None = Field(None, serialization_alias="fromId")
|
|
111
|
+
from_hash: str | None = Field(None, serialization_alias="fromHash")
|
|
112
|
+
to_type: str = Field(serialization_alias="toType")
|
|
113
|
+
to_id: str | None = Field(None, serialization_alias="toId")
|
|
114
|
+
to_hash: str | None = Field(None, serialization_alias="toHash")
|
|
115
|
+
relation_type: str = Field(serialization_alias="relationType")
|
|
116
|
+
confidence: float = 1.0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class BulkIngestEdgesRequest(BaseModel):
|
|
120
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
121
|
+
|
|
122
|
+
edges: list[IngestEdge]
|
|
123
|
+
|
|
124
|
+
|
|
100
125
|
class FinalizeIngestRunRequest(BaseModel):
|
|
101
126
|
model_config = ConfigDict(populate_by_name=True)
|
|
102
127
|
|
|
@@ -252,6 +277,30 @@ class RestOutputSink:
|
|
|
252
277
|
update_error,
|
|
253
278
|
)
|
|
254
279
|
|
|
280
|
+
async def emit_edges(self, edges: list[IngestEdge]) -> None:
|
|
281
|
+
"""Bulk-upsert source-derived relationship edges to the investigation graph.
|
|
282
|
+
|
|
283
|
+
Idempotent — safe to call multiple times with overlapping data.
|
|
284
|
+
Silently skips if the list is empty.
|
|
285
|
+
"""
|
|
286
|
+
if not edges:
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
_edge_batch = 500
|
|
290
|
+
for i in range(0, len(edges), _edge_batch):
|
|
291
|
+
chunk = edges[i : i + _edge_batch]
|
|
292
|
+
payload = BulkIngestEdgesRequest(edges=chunk)
|
|
293
|
+
try:
|
|
294
|
+
self._request_json(
|
|
295
|
+
"POST",
|
|
296
|
+
"/graph/edges",
|
|
297
|
+
payload.model_dump(mode="json", by_alias=True),
|
|
298
|
+
)
|
|
299
|
+
logger.debug("Emitted %d source-derived edges to graph", len(chunk))
|
|
300
|
+
except Exception as exc:
|
|
301
|
+
# Edge emission is best-effort: log and continue.
|
|
302
|
+
logger.warning("Failed to emit edges to graph: %s", exc)
|
|
303
|
+
|
|
255
304
|
async def register_discovered_assets(self, hashes: list[str]) -> None:
|
|
256
305
|
runner_id = self._require_runner_id()
|
|
257
306
|
for i in range(0, len(hashes), 500):
|
|
@@ -4,6 +4,7 @@ from collections.abc import AsyncGenerator, Generator
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any
|
|
5
5
|
|
|
6
6
|
from ..models.generated_single_asset_scan_results import DetectionResult, SingleAssetScanResults
|
|
7
|
+
from ..outputs.rest import IngestEdge
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
from ..utils.file_parser import ParsedBytes
|
|
@@ -315,3 +316,15 @@ class BaseSource(ABC):
|
|
|
315
316
|
this and map those identifiers back to their original URLs.
|
|
316
317
|
"""
|
|
317
318
|
return normalize_http_url(link)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
async def collect_relationships(self) -> list[IngestEdge]:
|
|
322
|
+
"""Return source-derived relationship edges for the investigation graph.
|
|
323
|
+
|
|
324
|
+
Connectors override this to emit typed edges (READS, ATTACHED_TO,
|
|
325
|
+
SENT_TO, OWNS, ACCESSED, etc.) discovered during extraction. The caller
|
|
326
|
+
(main.py) will forward these to ``RestOutputSink.emit_edges()``.
|
|
327
|
+
|
|
328
|
+
Default: no relationships (empty list).
|
|
329
|
+
"""
|
|
330
|
+
return []
|