classifyre-cli 0.4.22__tar.gz → 0.4.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/.turbo/turbo-build.log +1 -1
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/PKG-INFO +1 -1
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/package.json +1 -1
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/pyproject.toml +1 -1
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/pipeline/detector_pipeline.py +45 -3
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/pipeline/parsed_content_provider.py +22 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/pipeline/worker_pool.py +3 -57
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/object_storage/base.py +72 -14
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/file_parser.py +12 -0
- classifyre_cli-0.4.24/src/utils/resources.py +65 -0
- classifyre_cli-0.4.24/src/utils/transcription.py +383 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/uv.lock +113 -122
- classifyre_cli-0.4.22/src/utils/transcription.py +0 -177
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/.gitignore +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/.python-version +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/README.md +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/main.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/scripts/generate_models.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/config.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/base.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/broken_links/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/broken_links/detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/config.py +0 -0
- {classifyre_cli-0.4.22/tests/detectors/threat → classifyre_cli-0.4.24/src/detectors/content}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/extractor.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_base.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_factory.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_feature_extraction.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_gliner2.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_image_classification.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_llm.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_object_detection.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_regex.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/runners/_text_classification.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/custom/trainer.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/dependencies.py +0 -0
- {classifyre_cli-0.4.22/tests/detectors/secrets → classifyre_cli-0.4.24/src/detectors/pii}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/pii/detector.py +0 -0
- {classifyre_cli-0.4.22/tests/detectors/pii → classifyre_cli-0.4.24/src/detectors/secrets}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/secrets/detector.py +0 -0
- {classifyre_cli-0.4.22/tests/detectors/custom → classifyre_cli-0.4.24/src/detectors/threat}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/threat/code_security_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/detectors/threat/yara_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/main.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/models/generated_detectors.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/models/generated_input.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/models/generated_single_asset_scan_results.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/outputs/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/outputs/base.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/outputs/console.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/outputs/factory.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/outputs/file.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/outputs/rest.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/pipeline/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/pipeline/content_provider.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sandbox/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sandbox/runner.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/asset_metadata.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/atlassian_common.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/azure_blob_storage/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/azure_blob_storage/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/base.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/confluence/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/confluence/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/databricks/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/databricks/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/dependencies.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/email/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/email/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/google_cloud_storage/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/google_cloud_storage/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/hive/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/hive/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/jira/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/jira/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/mongodb/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/mongodb/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/mssql/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/mssql/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/mysql/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/mysql/source.py +0 -0
- {classifyre_cli-0.4.22/tests/detectors/content → classifyre_cli-0.4.24/src/sources/neo4j}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/neo4j/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/notion/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/notion/client.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/notion/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/oracle/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/oracle/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/postgresql/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/postgresql/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/powerbi/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/powerbi/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/recipe_normalizer.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/s3_compatible_storage/README.md +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/s3_compatible_storage/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/s3_compatible_storage/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/servicedesk/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/servicedesk/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/slack/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/slack/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/snowflake/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/snowflake/source.py +0 -0
- {classifyre_cli-0.4.22/tests/detectors → classifyre_cli-0.4.24/src/sources/sqlite}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/sqlite/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/tableau/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/tableau/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/tabular_base.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/tabular_utils.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/wordpress/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/wordpress/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/youtube/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/sources/youtube/source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/telemetry.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/content_extraction.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/dependency_groups.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/embedded_images.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/file_metadata.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/file_to_images.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/hashing.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/uv_sync.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/src/utils/validation.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/conftest.py +0 -0
- {classifyre_cli-0.4.22/src/sources/sqlite → classifyre_cli-0.4.24/tests/detectors}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/broken_links/test_broken_links_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/conftest.py +0 -0
- {classifyre_cli-0.4.22/src/sources/neo4j → classifyre_cli-0.4.24/tests/detectors/content}/__init__.py +0 -0
- {classifyre_cli-0.4.22/src/detectors/threat → classifyre_cli-0.4.24/tests/detectors/custom}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/custom/conftest.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/custom/test_invoice_extraction.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/custom/test_llm_runner.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/custom/test_pipeline_integration.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/custom/test_regex_runner.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/custom/test_transformer_runners.py +0 -0
- {classifyre_cli-0.4.22/src/detectors/secrets → classifyre_cli-0.4.24/tests/detectors/pii}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/pii/conftest.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/pii/sample_invoice.pdf +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/pii/test_pii_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/pii/test_pii_detector_extended.py +0 -0
- {classifyre_cli-0.4.22/src/detectors/pii → classifyre_cli-0.4.24/tests/detectors/secrets}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/secrets/test_secrets_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/secrets/test_secrets_detector_extended.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_base_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_custom_detector_examples_runtime.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_detector_catalog_commercial.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_detector_pipeline_types.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_detector_schema_examples.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_detector_types.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_phase2_detectors.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/test_registry.py +0 -0
- {classifyre_cli-0.4.22/src/detectors/content → classifyre_cli-0.4.24/tests/detectors/threat}/__init__.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/threat/test_code_security_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/detectors/threat/test_yara_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/integration/test_wordpress_broken_links_detector.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/integration/test_wordpress_links_assets.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/pipeline/test_detector_pipeline.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/pipeline/test_worker_pool.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_assets_metadata_catalog.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_azure_blob_storage_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_base_source_attachment.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_base_source_sampling.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_config.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_confluence_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_custom_extractor.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_databricks_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_dependency_groups.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_email_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_google_cloud_storage_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_hashing.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_hive_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_jira_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_mongodb_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_mssql_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_mysql_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_neo4j_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_notion_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_oracle_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_outputs.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_postgresql_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_powerbi_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_recipe_normalizer.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_s3_compatible_storage_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_sandbox_runner.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_servicedesk_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_slack_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_snowflake_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_source_dependency_groups.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_sqlite_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_tableau_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_tabular_utils.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_uv_sync.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_wordpress_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_youtube_source.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/test_youtube_source_integration.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/utils/test_content_extraction.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/utils/test_embedded_images.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/utils/test_file_metadata.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/utils/test_file_parser.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/utils/test_file_to_images.py +0 -0
- {classifyre_cli-0.4.22 → classifyre_cli-0.4.24}/tests/utils/test_transcription.py +0 -0
|
@@ -109,7 +109,10 @@ class DetectorPipeline:
|
|
|
109
109
|
|
|
110
110
|
scan_started = datetime.now(UTC)
|
|
111
111
|
ocr_enabled = self.source.ocr_enabled()
|
|
112
|
-
|
|
112
|
+
transcription_enabled = self.source.transcription_enabled()
|
|
113
|
+
text_content_type = self._text_content_type_for_asset(
|
|
114
|
+
asset.asset_type, ocr_enabled, transcription_enabled
|
|
115
|
+
)
|
|
113
116
|
link_content = self._build_links_payload(asset.links)
|
|
114
117
|
|
|
115
118
|
text_detectors = []
|
|
@@ -294,13 +297,23 @@ class DetectorPipeline:
|
|
|
294
297
|
page_num=page_num,
|
|
295
298
|
)
|
|
296
299
|
elapsed = int((time.monotonic() - t0) * 1000)
|
|
300
|
+
snippet = page_content[:120].replace("\n", "\\n") if page_content else ""
|
|
297
301
|
logger.info(
|
|
298
|
-
" %s page %d
|
|
302
|
+
" %s page %d: %d findings in %dms — snippet: %s",
|
|
299
303
|
asset.name,
|
|
300
304
|
page_num,
|
|
301
305
|
len(page_findings),
|
|
302
306
|
elapsed,
|
|
307
|
+
snippet,
|
|
303
308
|
)
|
|
309
|
+
if page_findings:
|
|
310
|
+
for f in page_findings[:5]:
|
|
311
|
+
logger.info(
|
|
312
|
+
" finding: type=%s detector=%s matched=%.100s",
|
|
313
|
+
f.finding_type,
|
|
314
|
+
f.detector_type,
|
|
315
|
+
f.matched_content[:100].replace("\n", " "),
|
|
316
|
+
)
|
|
304
317
|
return page_findings, page_types, page_errors, page_content, page_num
|
|
305
318
|
|
|
306
319
|
def _collect_done() -> None:
|
|
@@ -401,13 +414,23 @@ class DetectorPipeline:
|
|
|
401
414
|
page_num=page_num,
|
|
402
415
|
)
|
|
403
416
|
elapsed = int((time.monotonic() - t0) * 1000)
|
|
417
|
+
snippet = page_content[:120].replace("\n", "\\n") if page_content else ""
|
|
404
418
|
logger.info(
|
|
405
|
-
" %s page %d
|
|
419
|
+
" %s page %d: %d findings in %dms — snippet: %s",
|
|
406
420
|
asset.name,
|
|
407
421
|
page_num,
|
|
408
422
|
len(page_findings),
|
|
409
423
|
elapsed,
|
|
424
|
+
snippet,
|
|
410
425
|
)
|
|
426
|
+
if page_findings:
|
|
427
|
+
for f in page_findings[:5]:
|
|
428
|
+
logger.info(
|
|
429
|
+
" finding: type=%s detector=%s matched=%.100s",
|
|
430
|
+
f.finding_type,
|
|
431
|
+
f.detector_type,
|
|
432
|
+
f.matched_content[:100].replace("\n", " "),
|
|
433
|
+
)
|
|
411
434
|
return page_findings, page_types, page_errors, page_content, page_num
|
|
412
435
|
|
|
413
436
|
async def _collect_done_and_flush(min_findings: int = 1) -> None:
|
|
@@ -488,6 +511,12 @@ class DetectorPipeline:
|
|
|
488
511
|
continue
|
|
489
512
|
candidate_ids.append(value)
|
|
490
513
|
|
|
514
|
+
logger.info(
|
|
515
|
+
"_iter_text_content_pages(%s): trying candidates %s",
|
|
516
|
+
asset.name,
|
|
517
|
+
candidate_ids,
|
|
518
|
+
)
|
|
519
|
+
|
|
491
520
|
for candidate_id in candidate_ids:
|
|
492
521
|
saw_candidate_content = False
|
|
493
522
|
async for text_content in self.content_provider.fetch_text_pages(candidate_id):
|
|
@@ -499,6 +528,16 @@ class DetectorPipeline:
|
|
|
499
528
|
if saw_candidate_content:
|
|
500
529
|
return
|
|
501
530
|
|
|
531
|
+
# If fetch_content_pages ran the full bytes-path extraction (even
|
|
532
|
+
# yielding 0 text, e.g. silent audio), the source already did the
|
|
533
|
+
# expensive work. Don't re-process with another candidate ID for
|
|
534
|
+
# the same asset.
|
|
535
|
+
source = getattr(self.content_provider, "_source", None)
|
|
536
|
+
if source is not None:
|
|
537
|
+
processed: set[str] = getattr(source, "_content_pages_processed", set())
|
|
538
|
+
if candidate_id in processed:
|
|
539
|
+
return
|
|
540
|
+
|
|
502
541
|
async def _run_binary_detectors_for_asset(
|
|
503
542
|
self,
|
|
504
543
|
*,
|
|
@@ -727,6 +766,7 @@ class DetectorPipeline:
|
|
|
727
766
|
self,
|
|
728
767
|
asset_type: OutputAssetType,
|
|
729
768
|
ocr_enabled: bool,
|
|
769
|
+
transcription_enabled: bool = False,
|
|
730
770
|
) -> str | None:
|
|
731
771
|
mapping = {
|
|
732
772
|
OutputAssetType.TXT: "text/plain",
|
|
@@ -737,6 +777,8 @@ class DetectorPipeline:
|
|
|
737
777
|
return mapping[asset_type]
|
|
738
778
|
if ocr_enabled and asset_type in {OutputAssetType.IMAGE, OutputAssetType.BINARY}:
|
|
739
779
|
return "text/plain"
|
|
780
|
+
if transcription_enabled and asset_type in {OutputAssetType.AUDIO, OutputAssetType.VIDEO}:
|
|
781
|
+
return "text/plain"
|
|
740
782
|
return None
|
|
741
783
|
|
|
742
784
|
@staticmethod
|
|
@@ -3,11 +3,14 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import logging
|
|
6
7
|
from collections.abc import AsyncGenerator
|
|
7
8
|
|
|
8
9
|
from ..models.generated_single_asset_scan_results import DetectionResult, SingleAssetScanResults
|
|
9
10
|
from ..sources.base import BaseSource
|
|
10
11
|
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
11
14
|
|
|
12
15
|
class ParsedContentProvider:
|
|
13
16
|
"""
|
|
@@ -32,11 +35,30 @@ class ParsedContentProvider:
|
|
|
32
35
|
if saw_text:
|
|
33
36
|
return
|
|
34
37
|
|
|
38
|
+
# If fetch_content_pages already ran the full extraction pipeline for
|
|
39
|
+
# this asset (tracked via _content_pages_processed), skip the fallback
|
|
40
|
+
# iter_asset_pages call. Without this, an all-silence audio file would
|
|
41
|
+
# trigger a redundant second transcription pass.
|
|
42
|
+
pages_processed: set[str] | None = getattr(self._source, "_content_pages_processed", None)
|
|
43
|
+
if isinstance(pages_processed, set) and asset_id in pages_processed:
|
|
44
|
+
logger.info(
|
|
45
|
+
"fetch_text_pages(%s): source already processed, skipping fallback",
|
|
46
|
+
asset_id,
|
|
47
|
+
)
|
|
48
|
+
return
|
|
49
|
+
|
|
35
50
|
result = await self._source.fetch_content_bytes(asset_id)
|
|
36
51
|
if result is None:
|
|
52
|
+
logger.info("fetch_text_pages(%s): fetch_content_bytes returned None", asset_id)
|
|
37
53
|
return
|
|
38
54
|
|
|
39
55
|
raw_bytes, mime = result
|
|
56
|
+
logger.info(
|
|
57
|
+
"fetch_text_pages(%s): fallback iter_asset_pages path (%s, %d bytes)",
|
|
58
|
+
asset_id,
|
|
59
|
+
mime,
|
|
60
|
+
len(raw_bytes),
|
|
61
|
+
)
|
|
40
62
|
pages: list[str] = await asyncio.to_thread(
|
|
41
63
|
list,
|
|
42
64
|
self._source.iter_asset_pages(raw_bytes, mime),
|
|
@@ -21,6 +21,9 @@ from concurrent.futures import ProcessPoolExecutor
|
|
|
21
21
|
from typing import Any
|
|
22
22
|
|
|
23
23
|
from ..models.generated_single_asset_scan_results import DetectionResult
|
|
24
|
+
from ..utils.resources import get_effective_cpu_count, get_effective_memory_mb
|
|
25
|
+
|
|
26
|
+
__all__ = ["get_effective_cpu_count", "get_effective_memory_mb"]
|
|
24
27
|
|
|
25
28
|
logger = logging.getLogger(__name__)
|
|
26
29
|
|
|
@@ -130,63 +133,6 @@ def is_io_bound_detector(detector_name: str) -> bool:
|
|
|
130
133
|
return detector_name in _IO_BOUND_DETECTORS
|
|
131
134
|
|
|
132
135
|
|
|
133
|
-
def get_effective_cpu_count() -> int:
|
|
134
|
-
"""Return the number of usable CPUs, respecting cgroup limits (K8s/Docker).
|
|
135
|
-
|
|
136
|
-
``os.cpu_count()`` returns the *host* CPU count, which can be much larger
|
|
137
|
-
than what the container is allowed to use. This function reads the cgroup
|
|
138
|
-
v2 ``cpu.max`` (or v1 ``cpu.cfs_quota_us``/``cpu.cfs_period_us``) to
|
|
139
|
-
determine the actual allocation.
|
|
140
|
-
"""
|
|
141
|
-
try:
|
|
142
|
-
data = open("/sys/fs/cgroup/cpu.max").read().strip()
|
|
143
|
-
quota_str, period_str = data.split()
|
|
144
|
-
if quota_str != "max":
|
|
145
|
-
cpus = int(quota_str) / int(period_str)
|
|
146
|
-
if cpus >= 0.5:
|
|
147
|
-
return max(1, int(cpus))
|
|
148
|
-
except (FileNotFoundError, OSError, ValueError):
|
|
149
|
-
pass
|
|
150
|
-
|
|
151
|
-
try:
|
|
152
|
-
quota = int(open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us").read().strip())
|
|
153
|
-
period = int(open("/sys/fs/cgroup/cpu/cpu.cfs_period_us").read().strip())
|
|
154
|
-
if quota > 0 and period > 0:
|
|
155
|
-
cpus = quota / period
|
|
156
|
-
if cpus >= 0.5:
|
|
157
|
-
return max(1, int(cpus))
|
|
158
|
-
except (FileNotFoundError, OSError, ValueError):
|
|
159
|
-
pass
|
|
160
|
-
|
|
161
|
-
return os.cpu_count() or 4
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def get_effective_memory_mb() -> int:
|
|
165
|
-
"""Return usable memory in MB, respecting cgroup limits."""
|
|
166
|
-
try:
|
|
167
|
-
mem_bytes = int(open("/sys/fs/cgroup/memory.max").read().strip())
|
|
168
|
-
if mem_bytes < 2**50:
|
|
169
|
-
return max(256, mem_bytes // (1024 * 1024))
|
|
170
|
-
except (FileNotFoundError, OSError, ValueError):
|
|
171
|
-
pass
|
|
172
|
-
|
|
173
|
-
try:
|
|
174
|
-
mem_bytes = int(open("/sys/fs/cgroup/memory/memory.limit_in_bytes").read().strip())
|
|
175
|
-
if mem_bytes < 2**50:
|
|
176
|
-
return max(256, mem_bytes // (1024 * 1024))
|
|
177
|
-
except (FileNotFoundError, OSError, ValueError):
|
|
178
|
-
pass
|
|
179
|
-
|
|
180
|
-
try:
|
|
181
|
-
for line in open("/proc/meminfo"):
|
|
182
|
-
if line.startswith("MemTotal:"):
|
|
183
|
-
return max(256, int(line.split()[1]) // 1024)
|
|
184
|
-
except (FileNotFoundError, OSError, ValueError):
|
|
185
|
-
pass
|
|
186
|
-
|
|
187
|
-
return 4096
|
|
188
|
-
|
|
189
|
-
|
|
190
136
|
def compute_pool_workers(override: int | None = None) -> int:
|
|
191
137
|
"""Compute optimal pool size from actual resource limits.
|
|
192
138
|
|
|
@@ -135,6 +135,11 @@ class ObjectStorageSourceBase(BaseSource, ABC):
|
|
|
135
135
|
# Keyed by both asset_hash and external_url for O(1) lookup from either.
|
|
136
136
|
self._bytes_cache: dict[str, bytes] = {}
|
|
137
137
|
self._mime_cache: dict[str, str] = {}
|
|
138
|
+
# asset_ids for which fetch_content_pages ran the full bytes path
|
|
139
|
+
# (even if it produced no text, e.g. all-silence audio). Checked by
|
|
140
|
+
# ParsedContentProvider to skip its fallback iter_asset_pages path,
|
|
141
|
+
# which would otherwise re-run an expensive transcription a second time.
|
|
142
|
+
self._content_pages_processed: set[str] = set()
|
|
138
143
|
# Child IMAGE assets queued while transforming the current object.
|
|
139
144
|
self._pending_child_assets: list[SingleAssetScanResults] = []
|
|
140
145
|
|
|
@@ -302,6 +307,15 @@ class ObjectStorageSourceBase(BaseSource, ABC):
|
|
|
302
307
|
|
|
303
308
|
return OutputAssetType.OTHER
|
|
304
309
|
|
|
310
|
+
@staticmethod
|
|
311
|
+
def _asset_kind_for_asset_type(asset_type: OutputAssetType) -> str:
|
|
312
|
+
mapping: dict[OutputAssetType, str] = {
|
|
313
|
+
OutputAssetType.IMAGE: "image",
|
|
314
|
+
OutputAssetType.AUDIO: "audio",
|
|
315
|
+
OutputAssetType.VIDEO: "video",
|
|
316
|
+
}
|
|
317
|
+
return mapping.get(asset_type, "file")
|
|
318
|
+
|
|
305
319
|
def _ensure_file_processing_dependencies(self) -> None:
|
|
306
320
|
if self._file_processing_deps_checked:
|
|
307
321
|
return
|
|
@@ -446,7 +460,7 @@ class ObjectStorageSourceBase(BaseSource, ABC):
|
|
|
446
460
|
created_at=ref.last_modified,
|
|
447
461
|
updated_at=ref.last_modified,
|
|
448
462
|
runner_id=self.runner_id,
|
|
449
|
-
**self.metadata_fields(
|
|
463
|
+
**self.metadata_fields(self._asset_kind_for_asset_type(asset_type), asset_metadata),
|
|
450
464
|
)
|
|
451
465
|
self._hash_to_uri[asset_hash] = external_url
|
|
452
466
|
self._object_ref_by_hash[asset_hash] = ref
|
|
@@ -549,6 +563,7 @@ class ObjectStorageSourceBase(BaseSource, ABC):
|
|
|
549
563
|
self._object_ref_by_hash = {}
|
|
550
564
|
self._bytes_cache = {}
|
|
551
565
|
self._mime_cache = {}
|
|
566
|
+
self._content_pages_processed = set()
|
|
552
567
|
self._pending_child_assets = []
|
|
553
568
|
|
|
554
569
|
refs = self._list_objects()
|
|
@@ -628,26 +643,69 @@ class ObjectStorageSourceBase(BaseSource, ABC):
|
|
|
628
643
|
raw_bytes = self._bytes_cache.get(asset_id)
|
|
629
644
|
mime = self._mime_cache.get(asset_id, "")
|
|
630
645
|
|
|
646
|
+
logger.info(
|
|
647
|
+
"fetch_content_pages(%s): raw_bytes=%s mime=%s processed=%s",
|
|
648
|
+
asset_id,
|
|
649
|
+
f"{len(raw_bytes)} bytes" if raw_bytes is not None else "MISS",
|
|
650
|
+
mime or "MISS",
|
|
651
|
+
asset_id in self._content_pages_processed,
|
|
652
|
+
)
|
|
653
|
+
|
|
631
654
|
if raw_bytes is not None:
|
|
632
655
|
sampling = self.config.sampling
|
|
633
656
|
batch_size = int(sampling.rows_per_page or 100)
|
|
634
657
|
include_col_names = bool(
|
|
635
658
|
sampling.include_column_names if sampling.include_column_names is not None else True
|
|
636
659
|
)
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
pages
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
660
|
+
file_name = self._file_name_for_asset_id(asset_id)
|
|
661
|
+
|
|
662
|
+
# Stream pages from a thread instead of materializing via list().
|
|
663
|
+
# For transcription this lets detectors start working on the first
|
|
664
|
+
# chunk while later chunks are still being transcribed.
|
|
665
|
+
loop = asyncio.get_running_loop()
|
|
666
|
+
queue: asyncio.Queue[str | None] = asyncio.Queue()
|
|
667
|
+
|
|
668
|
+
exc_info: list[BaseException | None] = [None]
|
|
669
|
+
|
|
670
|
+
page_count: int = 0
|
|
671
|
+
|
|
672
|
+
def _produce() -> None:
|
|
673
|
+
nonlocal page_count
|
|
674
|
+
try:
|
|
675
|
+
for page in self.iter_asset_pages(
|
|
676
|
+
raw_bytes,
|
|
677
|
+
mime,
|
|
678
|
+
batch_size,
|
|
679
|
+
include_col_names,
|
|
680
|
+
file_name=file_name,
|
|
681
|
+
):
|
|
682
|
+
loop.call_soon_threadsafe(queue.put_nowait, page)
|
|
683
|
+
page_count += 1
|
|
684
|
+
except BaseException as exc:
|
|
685
|
+
exc_info[0] = exc
|
|
686
|
+
finally:
|
|
687
|
+
loop.call_soon_threadsafe(queue.put_nowait, None)
|
|
688
|
+
|
|
689
|
+
task = loop.run_in_executor(None, _produce)
|
|
690
|
+
|
|
691
|
+
while True:
|
|
692
|
+
page = await queue.get()
|
|
693
|
+
if page is None:
|
|
694
|
+
break
|
|
695
|
+
yield "", page
|
|
696
|
+
|
|
697
|
+
await task
|
|
698
|
+
if exc_info[0] is not None:
|
|
699
|
+
raise exc_info[0] # type: ignore[misc]
|
|
700
|
+
|
|
701
|
+
logger.info(
|
|
702
|
+
"fetch_content_pages(%s): streamed %d page(s) from %s",
|
|
703
|
+
asset_id,
|
|
704
|
+
page_count,
|
|
705
|
+
file_name,
|
|
648
706
|
)
|
|
649
|
-
|
|
650
|
-
|
|
707
|
+
|
|
708
|
+
self._content_pages_processed.add(asset_id)
|
|
651
709
|
return
|
|
652
710
|
|
|
653
711
|
result = await self.fetch_content(asset_id)
|
|
@@ -690,6 +690,18 @@ def iter_file_pages(
|
|
|
690
690
|
yield from _iter_parquet_pages(file_bytes, batch_size, include_column_names)
|
|
691
691
|
elif normalized in ("text/csv", "text/tab-separated-values"):
|
|
692
692
|
yield from _iter_csv_pages(file_bytes, include_column_names)
|
|
693
|
+
elif normalized.startswith(("audio/", "video/")) and enable_transcription:
|
|
694
|
+
# Stream transcript pages directly from the chunked transcription pipeline
|
|
695
|
+
# so the detector receives text as each ~10-min audio chunk completes
|
|
696
|
+
# instead of waiting for the full file and buffering the entire transcript.
|
|
697
|
+
from .transcription import iter_transcription_pages
|
|
698
|
+
|
|
699
|
+
yield from iter_transcription_pages(
|
|
700
|
+
file_bytes,
|
|
701
|
+
mime_type=normalized,
|
|
702
|
+
file_name=file_name,
|
|
703
|
+
segments_per_page=batch_size,
|
|
704
|
+
)
|
|
693
705
|
else:
|
|
694
706
|
text, error = extract_text(
|
|
695
707
|
file_bytes,
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Cgroup-aware CPU and memory introspection.
|
|
2
|
+
|
|
3
|
+
Shared by the detector worker pool (to size the process pool) and the
|
|
4
|
+
transcription pipeline (to select the right Whisper model at runtime).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_effective_cpu_count() -> int:
|
|
13
|
+
"""Return usable CPUs, respecting cgroup limits (K8s / Docker).
|
|
14
|
+
|
|
15
|
+
``os.cpu_count()`` returns the *host* count, which is usually much larger
|
|
16
|
+
than the container's CPU quota. This reads cgroup v2 / v1 to get the
|
|
17
|
+
actual allocation.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
data = open("/sys/fs/cgroup/cpu.max").read().strip()
|
|
21
|
+
quota_str, period_str = data.split()
|
|
22
|
+
if quota_str != "max":
|
|
23
|
+
cpus = int(quota_str) / int(period_str)
|
|
24
|
+
if cpus >= 0.5:
|
|
25
|
+
return max(1, int(cpus))
|
|
26
|
+
except (FileNotFoundError, OSError, ValueError):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
quota = int(open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us").read().strip())
|
|
31
|
+
period = int(open("/sys/fs/cgroup/cpu/cpu.cfs_period_us").read().strip())
|
|
32
|
+
if quota > 0 and period > 0:
|
|
33
|
+
cpus = quota / period
|
|
34
|
+
if cpus >= 0.5:
|
|
35
|
+
return max(1, int(cpus))
|
|
36
|
+
except (FileNotFoundError, OSError, ValueError):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
return os.cpu_count() or 4
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_effective_memory_mb() -> int:
|
|
43
|
+
"""Return usable memory in MB, respecting cgroup limits (K8s / Docker)."""
|
|
44
|
+
try:
|
|
45
|
+
mem_bytes = int(open("/sys/fs/cgroup/memory.max").read().strip())
|
|
46
|
+
if mem_bytes < 2**50:
|
|
47
|
+
return max(256, mem_bytes // (1024 * 1024))
|
|
48
|
+
except (FileNotFoundError, OSError, ValueError):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
mem_bytes = int(open("/sys/fs/cgroup/memory/memory.limit_in_bytes").read().strip())
|
|
53
|
+
if mem_bytes < 2**50:
|
|
54
|
+
return max(256, mem_bytes // (1024 * 1024))
|
|
55
|
+
except (FileNotFoundError, OSError, ValueError):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
for line in open("/proc/meminfo"):
|
|
60
|
+
if line.startswith("MemTotal:"):
|
|
61
|
+
return max(256, int(line.split()[1]) // 1024)
|
|
62
|
+
except (FileNotFoundError, OSError, ValueError):
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
return 4096
|