classifyre-cli 0.4.14__tar.gz → 0.4.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/.turbo/turbo-build.log +1 -1
  2. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/PKG-INFO +1 -1
  3. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/package.json +1 -1
  4. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/pyproject.toml +17 -1
  5. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_object_detection.py +1 -0
  6. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/main.py +31 -0
  7. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/models/generated_input.py +103 -3
  8. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/rest.py +49 -0
  9. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/base.py +13 -0
  10. classifyre_cli-0.4.16/src/sources/email/__init__.py +3 -0
  11. classifyre_cli-0.4.16/src/sources/email/source.py +555 -0
  12. classifyre_cli-0.4.16/src/utils/dependency_groups.py +68 -0
  13. classifyre_cli-0.4.16/src/utils/uv_sync.py +259 -0
  14. classifyre_cli-0.4.16/tests/test_dependency_groups.py +67 -0
  15. classifyre_cli-0.4.16/tests/test_email_source.py +260 -0
  16. classifyre_cli-0.4.16/tests/test_uv_sync.py +132 -0
  17. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/uv.lock +577 -590
  18. classifyre_cli-0.4.14/src/utils/uv_sync.py +0 -79
  19. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/.gitignore +0 -0
  20. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/.python-version +0 -0
  21. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/README.md +0 -0
  22. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/main.py +0 -0
  23. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/scripts/generate_models.py +0 -0
  24. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/__init__.py +0 -0
  25. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/__init__.py +0 -0
  26. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/base.py +0 -0
  27. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/broken_links/__init__.py +0 -0
  28. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/broken_links/detector.py +0 -0
  29. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/config.py +0 -0
  30. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/content/__init__.py +0 -0
  31. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/__init__.py +0 -0
  32. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/detector.py +0 -0
  33. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/extractor.py +0 -0
  34. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/__init__.py +0 -0
  35. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_base.py +0 -0
  36. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_factory.py +0 -0
  37. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_feature_extraction.py +0 -0
  38. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_gliner2.py +0 -0
  39. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_image_classification.py +0 -0
  40. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_llm.py +0 -0
  41. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_regex.py +0 -0
  42. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/runners/_text_classification.py +0 -0
  43. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/custom/trainer.py +0 -0
  44. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/dependencies.py +0 -0
  45. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/pii/__init__.py +0 -0
  46. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/pii/detector.py +0 -0
  47. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/secrets/__init__.py +0 -0
  48. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/secrets/detector.py +0 -0
  49. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/threat/__init__.py +0 -0
  50. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/threat/code_security_detector.py +0 -0
  51. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/detectors/threat/yara_detector.py +0 -0
  52. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/models/generated_detectors.py +0 -0
  53. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/models/generated_single_asset_scan_results.py +0 -0
  54. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/__init__.py +0 -0
  55. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/base.py +0 -0
  56. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/console.py +0 -0
  57. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/factory.py +0 -0
  58. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/outputs/file.py +0 -0
  59. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/__init__.py +0 -0
  60. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/content_provider.py +0 -0
  61. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/detector_pipeline.py +0 -0
  62. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/parsed_content_provider.py +0 -0
  63. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/pipeline/worker_pool.py +0 -0
  64. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sandbox/__init__.py +0 -0
  65. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sandbox/runner.py +0 -0
  66. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/__init__.py +0 -0
  67. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/asset_metadata.py +0 -0
  68. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/atlassian_common.py +0 -0
  69. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/azure_blob_storage/__init__.py +0 -0
  70. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/azure_blob_storage/source.py +0 -0
  71. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/confluence/__init__.py +0 -0
  72. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/confluence/source.py +0 -0
  73. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/databricks/__init__.py +0 -0
  74. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/databricks/source.py +0 -0
  75. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/dependencies.py +0 -0
  76. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/google_cloud_storage/__init__.py +0 -0
  77. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/google_cloud_storage/source.py +0 -0
  78. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/hive/__init__.py +0 -0
  79. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/hive/source.py +0 -0
  80. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/jira/__init__.py +0 -0
  81. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/jira/source.py +0 -0
  82. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mongodb/__init__.py +0 -0
  83. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mongodb/source.py +0 -0
  84. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mssql/__init__.py +0 -0
  85. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mssql/source.py +0 -0
  86. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mysql/__init__.py +0 -0
  87. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/mysql/source.py +0 -0
  88. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/neo4j/__init__.py +0 -0
  89. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/neo4j/source.py +0 -0
  90. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/notion/__init__.py +0 -0
  91. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/notion/client.py +0 -0
  92. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/notion/source.py +0 -0
  93. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/object_storage/base.py +0 -0
  94. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/oracle/__init__.py +0 -0
  95. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/oracle/source.py +0 -0
  96. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/postgresql/__init__.py +0 -0
  97. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/postgresql/source.py +0 -0
  98. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/powerbi/__init__.py +0 -0
  99. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/powerbi/source.py +0 -0
  100. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/recipe_normalizer.py +0 -0
  101. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/s3_compatible_storage/README.md +0 -0
  102. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/s3_compatible_storage/__init__.py +0 -0
  103. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/s3_compatible_storage/source.py +0 -0
  104. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/servicedesk/__init__.py +0 -0
  105. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/servicedesk/source.py +0 -0
  106. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/slack/__init__.py +0 -0
  107. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/slack/source.py +0 -0
  108. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/snowflake/__init__.py +0 -0
  109. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/snowflake/source.py +0 -0
  110. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/sqlite/__init__.py +0 -0
  111. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/sqlite/source.py +0 -0
  112. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tableau/__init__.py +0 -0
  113. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tableau/source.py +0 -0
  114. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tabular_base.py +0 -0
  115. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/tabular_utils.py +0 -0
  116. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/wordpress/__init__.py +0 -0
  117. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/sources/wordpress/source.py +0 -0
  118. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/telemetry.py +0 -0
  119. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/__init__.py +0 -0
  120. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/content_extraction.py +0 -0
  121. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/embedded_images.py +0 -0
  122. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/file_metadata.py +0 -0
  123. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/file_parser.py +0 -0
  124. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/file_to_images.py +0 -0
  125. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/hashing.py +0 -0
  126. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/src/utils/validation.py +0 -0
  127. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/__init__.py +0 -0
  128. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/conftest.py +0 -0
  129. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/__init__.py +0 -0
  130. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/broken_links/test_broken_links_detector.py +0 -0
  131. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/conftest.py +0 -0
  132. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/content/__init__.py +0 -0
  133. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/__init__.py +0 -0
  134. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/conftest.py +0 -0
  135. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_invoice_extraction.py +0 -0
  136. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_llm_runner.py +0 -0
  137. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_pipeline_integration.py +0 -0
  138. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_regex_runner.py +0 -0
  139. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/custom/test_transformer_runners.py +0 -0
  140. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/__init__.py +0 -0
  141. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/conftest.py +0 -0
  142. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/sample_invoice.pdf +0 -0
  143. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/test_pii_detector.py +0 -0
  144. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/pii/test_pii_detector_extended.py +0 -0
  145. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/secrets/__init__.py +0 -0
  146. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/secrets/test_secrets_detector.py +0 -0
  147. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/secrets/test_secrets_detector_extended.py +0 -0
  148. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_base_detector.py +0 -0
  149. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_custom_detector_examples_runtime.py +0 -0
  150. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_catalog_commercial.py +0 -0
  151. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_pipeline_types.py +0 -0
  152. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_schema_examples.py +0 -0
  153. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_detector_types.py +0 -0
  154. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_phase2_detectors.py +0 -0
  155. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/test_registry.py +0 -0
  156. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/threat/__init__.py +0 -0
  157. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/threat/test_code_security_detector.py +0 -0
  158. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/detectors/threat/test_yara_detector.py +0 -0
  159. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/integration/test_wordpress_broken_links_detector.py +0 -0
  160. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/integration/test_wordpress_links_assets.py +0 -0
  161. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/pipeline/test_detector_pipeline.py +0 -0
  162. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/pipeline/test_worker_pool.py +0 -0
  163. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_assets_metadata_catalog.py +0 -0
  164. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_azure_blob_storage_source.py +0 -0
  165. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_base_source_attachment.py +0 -0
  166. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_base_source_sampling.py +0 -0
  167. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_confluence_source.py +0 -0
  168. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_custom_extractor.py +0 -0
  169. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_databricks_source.py +0 -0
  170. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_google_cloud_storage_source.py +0 -0
  171. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_hashing.py +0 -0
  172. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_hive_source.py +0 -0
  173. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_jira_source.py +0 -0
  174. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_mongodb_source.py +0 -0
  175. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_mssql_source.py +0 -0
  176. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_mysql_source.py +0 -0
  177. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_neo4j_source.py +0 -0
  178. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_notion_source.py +0 -0
  179. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_oracle_source.py +0 -0
  180. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_outputs.py +0 -0
  181. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_postgresql_source.py +0 -0
  182. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_powerbi_source.py +0 -0
  183. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_recipe_normalizer.py +0 -0
  184. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_s3_compatible_storage_source.py +0 -0
  185. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_sandbox_runner.py +0 -0
  186. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_servicedesk_source.py +0 -0
  187. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_slack_source.py +0 -0
  188. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_snowflake_source.py +0 -0
  189. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_source_dependency_groups.py +0 -0
  190. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_sqlite_source.py +0 -0
  191. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_tableau_source.py +0 -0
  192. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_tabular_utils.py +0 -0
  193. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/test_wordpress_source.py +0 -0
  194. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_content_extraction.py +0 -0
  195. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_embedded_images.py +0 -0
  196. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_file_metadata.py +0 -0
  197. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_file_parser.py +0 -0
  198. {classifyre_cli-0.4.14 → classifyre_cli-0.4.16}/tests/utils/test_file_to_images.py +0 -0
@@ -1,3 +1,3 @@
1
1
  $ uv sync
2
- Resolved 268 packages in 224ms
2
+ Resolved 255 packages in 216ms
3
3
  Checked 50 packages in 1ms
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: classifyre-cli
3
- Version: 0.4.14
3
+ Version: 0.4.16
4
4
  Summary: Classifyre CLI — scan and classify unstructured data sources
5
5
  License: MIT
6
6
  Keywords: data,ingestion,metadata,pii,secrets,unstructured
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@classifyre/cli",
3
- "version": "0.4.14",
3
+ "version": "0.4.16",
4
4
  "private": true,
5
5
  "scripts": {
6
6
  "build": "uv sync",
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "classifyre-cli"
3
- version = "0.4.14"
3
+ version = "0.4.16"
4
4
  description = "Classifyre CLI — scan and classify unstructured data sources"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -21,6 +21,18 @@ dependencies = [
21
21
  # Local editable install for development — ignored when published to PyPI
22
22
  classifyre-schemas = { path = "../../packages/schemas", editable = true }
23
23
  en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
24
+ # CPU-only torch on Linux. The default PyPI Linux wheels bundle the full NVIDIA
25
+ # CUDA stack (~5-7 GB) which is dead weight on our CPU-only CLI jobs (no GPU
26
+ # requested). Pull torch/torchvision from the PyTorch CPU index on Linux so the
27
+ # nvidia-* packages drop out entirely. macOS/other platforms already use CPU
28
+ # wheels from PyPI. (A GPU image can override this in the future.)
29
+ torch = [{ index = "pytorch-cpu", marker = "sys_platform == 'linux'" }]
30
+ torchvision = [{ index = "pytorch-cpu", marker = "sys_platform == 'linux'" }]
31
+
32
+ [[tool.uv.index]]
33
+ name = "pytorch-cpu"
34
+ url = "https://download.pytorch.org/whl/cpu"
35
+ explicit = true
24
36
 
25
37
  [project.scripts]
26
38
  classifyre = "src.main:main"
@@ -82,6 +94,7 @@ custom = [
82
94
  "transformers>=4.50.0,<6.0.0",
83
95
  "torch>=2.12.0",
84
96
  "pillow>=12.2.0",
97
+ "timm>=1.0.27",
85
98
  "setfit>=1.1.3",
86
99
  "datasets>=4.8.5",
87
100
  "scikit-learn>=1.7.2",
@@ -167,6 +180,9 @@ azure-blob-storage = [
167
180
  google-cloud-storage = [
168
181
  "google-cloud-storage>=3.10.1,<4.0.0",
169
182
  ]
183
+ email = [
184
+ "imap-tools>=1.10.0,<2.0.0",
185
+ ]
170
186
  otel = [
171
187
  "opentelemetry-sdk>=1.42.0",
172
188
  "opentelemetry-exporter-otlp-proto-http>=1.27.0",
@@ -48,6 +48,7 @@ class ObjectDetectionRunner(BaseRunner):
48
48
  raise MissingDependencyError(
49
49
  "object_detection",
50
50
  ["custom", "detectors"],
51
+ ["custom", "detectors"],
51
52
  f"ObjectDetectionRunner requires additional dependencies: {exc}",
52
53
  ) from exc
53
54
 
@@ -197,6 +197,27 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
197
197
  has_detectors = bool(pipeline.detectors)
198
198
 
199
199
  if has_detectors:
200
+ # Warm this run's optional dependency groups once, here in
201
+ # the parent process, before the worker pool spawns — so the
202
+ # pool's worker processes don't each race on their own
203
+ # `uv sync` against the shared venv. Best-effort: the
204
+ # lock-protected require_module path remains the safety net.
205
+ from .utils.dependency_groups import recipe_uv_groups
206
+ from .utils.uv_sync import warm_groups
207
+
208
+ warm = recipe_uv_groups(recipe)
209
+ if warm:
210
+ logger.info(
211
+ "Warming optional dependency groups: %s",
212
+ ", ".join(sorted(warm)),
213
+ )
214
+ warm_ok, warm_detail = warm_groups(warm)
215
+ if not warm_ok:
216
+ logger.warning(
217
+ "Dependency warm-up incomplete (workers will retry): %s",
218
+ warm_detail,
219
+ )
220
+
200
221
  worker_pool = DetectorWorkerPool(max_workers=pool_workers)
201
222
  pipeline = DetectorPipeline.from_recipe(
202
223
  recipe,
@@ -351,6 +372,16 @@ async def run_command_async(args: argparse.Namespace, recipe: dict[str, Any]) ->
351
372
  total_assets,
352
373
  output_batch_count,
353
374
  )
375
+
376
+ # Phase 1: emit source-derived relationship edges (best-effort).
377
+ if hasattr(source, "collect_relationships") and hasattr(sink, "emit_edges"):
378
+ try:
379
+ edges = await source.collect_relationships()
380
+ if edges:
381
+ await sink.emit_edges(edges)
382
+ logger.info("Emitted %d source-derived relationship edges", len(edges))
383
+ except Exception as rel_error:
384
+ logger.warning("Relationship emission failed (non-fatal): %s", rel_error)
354
385
  except Exception as extraction_error:
355
386
  if _is_timeout_error(extraction_error):
356
387
  logger.warning(
@@ -43,6 +43,7 @@ class AssetType(StrEnum):
43
43
  SERVICEDESK = 'SERVICEDESK'
44
44
  SQLITE = 'SQLITE'
45
45
  NOTION = 'NOTION'
46
+ EMAIL = 'EMAIL'
46
47
 
47
48
 
48
49
  class SourceCategory(StrEnum):
@@ -238,6 +239,85 @@ class WordPressOptional(BaseModel):
238
239
  content: WordPressOptionalContent | None = None
239
240
 
240
241
 
242
+ class EmailRequired(BaseModel):
243
+ model_config = ConfigDict(
244
+ extra='forbid',
245
+ )
246
+ host: str = Field(
247
+ ..., description='IMAP server host (e.g. imap.gmail.com, outlook.office365.com)'
248
+ )
249
+ port: int | None = Field(
250
+ 993, description='IMAP server port (993 for IMAPS)', ge=1, le=65535
251
+ )
252
+
253
+
254
+ class EmailMasked(BaseModel):
255
+ model_config = ConfigDict(
256
+ extra='forbid',
257
+ )
258
+ username: str = Field(
259
+ ..., description='Mailbox login (usually the full email address)'
260
+ )
261
+ password: str = Field(
262
+ ...,
263
+ description='Account or app-specific password (use an app password for Gmail/Outlook)',
264
+ )
265
+
266
+
267
+ class EmailOptionalConnection(BaseModel):
268
+ """
269
+ Transport-level connection controls.
270
+ """
271
+
272
+ model_config = ConfigDict(
273
+ extra='forbid',
274
+ )
275
+ use_ssl: bool | None = Field(
276
+ True,
277
+ description='Connect over implicit TLS (IMAPS). Disable only for STARTTLS/plain servers.',
278
+ )
279
+ timeout_seconds: int | None = Field(
280
+ 30, description='Socket timeout for IMAP operations', ge=1
281
+ )
282
+
283
+
284
+ class EmailOptionalScope(BaseModel):
285
+ """
286
+ Which messages and attachments to ingest.
287
+ """
288
+
289
+ model_config = ConfigDict(
290
+ extra='forbid',
291
+ )
292
+ folders: list[str] | None = Field(['INBOX'], description='Mailbox folders to scan')
293
+ since_date: str | None = Field(
294
+ None,
295
+ description='Only fetch messages on/after this date (ISO 8601 date, e.g. 2026-01-01)',
296
+ )
297
+ before_date: str | None = Field(
298
+ None, description='Only fetch messages before this date (ISO 8601 date)'
299
+ )
300
+ unseen_only: bool | None = Field(
301
+ False, description='Only fetch unread (UNSEEN) messages'
302
+ )
303
+ include_attachments: bool | None = Field(
304
+ True, description='Emit attachments as separate assets linked to the email'
305
+ )
306
+ max_attachment_size_bytes: int | None = Field(
307
+ None,
308
+ description='Skip downloading attachment bytes above this size (still emits a metadata-only asset). Unset means no limit.',
309
+ ge=0,
310
+ )
311
+
312
+
313
+ class EmailOptional(BaseModel):
314
+ model_config = ConfigDict(
315
+ extra='forbid',
316
+ )
317
+ connection: EmailOptionalConnection | None = None
318
+ scope: EmailOptionalScope | None = None
319
+
320
+
241
321
  class SlackRequired(BaseModel):
242
322
  model_config = ConfigDict(
243
323
  extra='forbid',
@@ -1872,6 +1952,7 @@ class Type(StrEnum):
1872
1952
  SERVICEDESK = 'SERVICEDESK'
1873
1953
  SQLITE = 'SQLITE'
1874
1954
  NOTION = 'NOTION'
1955
+ EMAIL = 'EMAIL'
1875
1956
 
1876
1957
 
1877
1958
  class SlackInput(CoreInput):
@@ -1892,6 +1973,22 @@ class SlackInput(CoreInput):
1892
1973
  resources: ResourceOverrides | None = None
1893
1974
 
1894
1975
 
1976
+ class EmailInput(CoreInput):
1977
+ type: Literal['EMAIL'] = Field('EMAIL', description='Type of the asset or source')
1978
+ required: EmailRequired
1979
+ masked: EmailMasked
1980
+ optional: EmailOptional | None = None
1981
+ detectors: list[Detector] | None = Field(
1982
+ None, description='Detectors to run on ingested content'
1983
+ )
1984
+ custom_detectors: list[CustomDetectorSelection] | None = Field(
1985
+ None,
1986
+ description='Reusable custom detector IDs selected from the custom detector catalog.',
1987
+ )
1988
+ sampling: SamplingConfig
1989
+ resources: ResourceOverrides | None = None
1990
+
1991
+
1895
1992
  class S3CompatibleStorageInput(CoreInput):
1896
1993
  type: Literal['S3_COMPATIBLE_STORAGE'] = Field(
1897
1994
  'S3_COMPATIBLE_STORAGE', description='Type of the asset or source'
@@ -2331,7 +2428,7 @@ class ConfluenceOptionalConnection(BaseModel):
2331
2428
  )
2332
2429
 
2333
2430
 
2334
- class Type16(StrEnum):
2431
+ class Type17(StrEnum):
2335
2432
  """
2336
2433
  Filter spaces by space type
2337
2434
  """
@@ -2368,7 +2465,7 @@ class ConfluenceOptionalScopeSpaces(BaseModel):
2368
2465
  keys: list[str] | None = Field(
2369
2466
  None, description='Filter spaces by keys (up to 250)', max_length=250
2370
2467
  )
2371
- type: Type16 | None = Field(None, description='Filter spaces by space type')
2468
+ type: Type17 | None = Field(None, description='Filter spaces by space type')
2372
2469
  status: Status | None = Field(None, description='Filter spaces by status')
2373
2470
  labels: list[str] | None = Field(
2374
2471
  None,
@@ -2634,7 +2731,7 @@ class ServiceDeskOptional(BaseModel):
2634
2731
  content: ServiceDeskOptionalContent | None = None
2635
2732
 
2636
2733
 
2637
- class Type17(StrEnum):
2734
+ class Type18(StrEnum):
2638
2735
  """
2639
2736
  Type of the asset or source
2640
2737
  """
@@ -2660,6 +2757,7 @@ class Type17(StrEnum):
2660
2757
  SERVICEDESK = 'SERVICEDESK'
2661
2758
  SQLITE = 'SQLITE'
2662
2759
  NOTION = 'NOTION'
2760
+ EMAIL = 'EMAIL'
2663
2761
 
2664
2762
 
2665
2763
  class ConfluenceInput(CoreInput):
@@ -2920,6 +3018,7 @@ class SourceInput(
2920
3018
  | ServiceDeskInput
2921
3019
  | SQLiteInput
2922
3020
  | NotionInput
3021
+ | EmailInput
2923
3022
  ]
2924
3023
  ):
2925
3024
  root: (
@@ -2944,6 +3043,7 @@ class SourceInput(
2944
3043
  | ServiceDeskInput
2945
3044
  | SQLiteInput
2946
3045
  | NotionInput
3046
+ | EmailInput
2947
3047
  ) = Field(
2948
3048
  ...,
2949
3049
  description='Merged configuration schema with all source types and common definitions',
@@ -97,6 +97,31 @@ class BulkIngestAssetsRequest(BaseModel):
97
97
  skip_findings: bool = Field(False, serialization_alias="skipFindings")
98
98
 
99
99
 
100
+ class IngestEdge(BaseModel):
101
+ """A source-derived relationship edge for the investigation graph.
102
+
103
+ Identify endpoints by UUID (from_id / to_id) or by asset hash
104
+ (from_hash / to_hash — the API resolves hashes to UUIDs).
105
+ """
106
+
107
+ model_config = ConfigDict(populate_by_name=True)
108
+
109
+ from_type: str = Field(serialization_alias="fromType")
110
+ from_id: str | None = Field(None, serialization_alias="fromId")
111
+ from_hash: str | None = Field(None, serialization_alias="fromHash")
112
+ to_type: str = Field(serialization_alias="toType")
113
+ to_id: str | None = Field(None, serialization_alias="toId")
114
+ to_hash: str | None = Field(None, serialization_alias="toHash")
115
+ relation_type: str = Field(serialization_alias="relationType")
116
+ confidence: float = 1.0
117
+
118
+
119
+ class BulkIngestEdgesRequest(BaseModel):
120
+ model_config = ConfigDict(populate_by_name=True)
121
+
122
+ edges: list[IngestEdge]
123
+
124
+
100
125
  class FinalizeIngestRunRequest(BaseModel):
101
126
  model_config = ConfigDict(populate_by_name=True)
102
127
 
@@ -252,6 +277,30 @@ class RestOutputSink:
252
277
  update_error,
253
278
  )
254
279
 
280
+ async def emit_edges(self, edges: list[IngestEdge]) -> None:
281
+ """Bulk-upsert source-derived relationship edges to the investigation graph.
282
+
283
+ Idempotent — safe to call multiple times with overlapping data.
284
+ Silently skips if the list is empty.
285
+ """
286
+ if not edges:
287
+ return
288
+
289
+ _edge_batch = 500
290
+ for i in range(0, len(edges), _edge_batch):
291
+ chunk = edges[i : i + _edge_batch]
292
+ payload = BulkIngestEdgesRequest(edges=chunk)
293
+ try:
294
+ self._request_json(
295
+ "POST",
296
+ "/graph/edges",
297
+ payload.model_dump(mode="json", by_alias=True),
298
+ )
299
+ logger.debug("Emitted %d source-derived edges to graph", len(chunk))
300
+ except Exception as exc:
301
+ # Edge emission is best-effort: log and continue.
302
+ logger.warning("Failed to emit edges to graph: %s", exc)
303
+
255
304
  async def register_discovered_assets(self, hashes: list[str]) -> None:
256
305
  runner_id = self._require_runner_id()
257
306
  for i in range(0, len(hashes), 500):
@@ -4,6 +4,7 @@ from collections.abc import AsyncGenerator, Generator
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  from ..models.generated_single_asset_scan_results import DetectionResult, SingleAssetScanResults
7
+ from ..outputs.rest import IngestEdge
7
8
 
8
9
  if TYPE_CHECKING:
9
10
  from ..utils.file_parser import ParsedBytes
@@ -315,3 +316,15 @@ class BaseSource(ABC):
315
316
  this and map those identifiers back to their original URLs.
316
317
  """
317
318
  return normalize_http_url(link)
319
+
320
+
321
+ async def collect_relationships(self) -> list[IngestEdge]:
322
+ """Return source-derived relationship edges for the investigation graph.
323
+
324
+ Connectors override this to emit typed edges (READS, ATTACHED_TO,
325
+ SENT_TO, OWNS, ACCESSED, etc.) discovered during extraction. The caller
326
+ (main.py) will forward these to ``RestOutputSink.emit_edges()``.
327
+
328
+ Default: no relationships (empty list).
329
+ """
330
+ return []
@@ -0,0 +1,3 @@
1
+ from .source import EmailSource
2
+
3
+ __all__ = ["EmailSource"]