classifyre-cli 0.4.33__tar.gz → 0.4.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/.gitignore +3 -0
  2. classifyre_cli-0.4.35/.turbo/turbo-build.log +3 -0
  3. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/PKG-INFO +1 -1
  4. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/package.json +1 -1
  5. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/pyproject.toml +33 -1
  6. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/models/generated_input.py +804 -0
  7. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/outputs/rest.py +1 -3
  8. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/base.py +1 -3
  9. classifyre_cli-0.4.35/src/sources/delta_lake/__init__.py +3 -0
  10. classifyre_cli-0.4.35/src/sources/delta_lake/source.py +139 -0
  11. classifyre_cli-0.4.35/src/sources/elasticsearch/__init__.py +3 -0
  12. classifyre_cli-0.4.35/src/sources/elasticsearch/source.py +31 -0
  13. classifyre_cli-0.4.35/src/sources/hudi/__init__.py +3 -0
  14. classifyre_cli-0.4.35/src/sources/hudi/source.py +98 -0
  15. classifyre_cli-0.4.35/src/sources/iceberg/__init__.py +3 -0
  16. classifyre_cli-0.4.35/src/sources/iceberg/source.py +148 -0
  17. classifyre_cli-0.4.35/src/sources/kafka/__init__.py +3 -0
  18. classifyre_cli-0.4.35/src/sources/kafka/source.py +362 -0
  19. classifyre_cli-0.4.35/src/sources/meilisearch/__init__.py +3 -0
  20. classifyre_cli-0.4.35/src/sources/meilisearch/source.py +353 -0
  21. classifyre_cli-0.4.35/src/sources/opensearch/__init__.py +3 -0
  22. classifyre_cli-0.4.35/src/sources/opensearch/source.py +32 -0
  23. classifyre_cli-0.4.35/src/sources/search_engine_base.py +345 -0
  24. classifyre_cli-0.4.35/src/sources/spark_base.py +413 -0
  25. classifyre_cli-0.4.35/src/sources/spark_catalog/__init__.py +3 -0
  26. classifyre_cli-0.4.35/src/sources/spark_catalog/source.py +93 -0
  27. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/tabular_base.py +4 -1
  28. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/dependency_groups.py +5 -0
  29. classifyre_cli-0.4.35/src/utils/spark_runtime.py +56 -0
  30. classifyre_cli-0.4.35/tests/_spark_fakes.py +125 -0
  31. classifyre_cli-0.4.35/tests/test_delta_lake_source.py +96 -0
  32. classifyre_cli-0.4.35/tests/test_elasticsearch_source.py +238 -0
  33. classifyre_cli-0.4.35/tests/test_hudi_source.py +72 -0
  34. classifyre_cli-0.4.35/tests/test_iceberg_source.py +95 -0
  35. classifyre_cli-0.4.35/tests/test_kafka_source.py +192 -0
  36. classifyre_cli-0.4.35/tests/test_meilisearch_source.py +255 -0
  37. classifyre_cli-0.4.35/tests/test_opensearch_source.py +228 -0
  38. classifyre_cli-0.4.35/tests/test_spark_catalog_source.py +71 -0
  39. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/uv.lock +851 -689
  40. classifyre_cli-0.4.33/.turbo/turbo-build.log +0 -3
  41. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/.python-version +0 -0
  42. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/README.md +0 -0
  43. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/main.py +0 -0
  44. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/scripts/generate_models.py +0 -0
  45. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/__init__.py +0 -0
  46. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/config.py +0 -0
  47. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/__init__.py +0 -0
  48. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/base.py +0 -0
  49. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/broken_links/__init__.py +0 -0
  50. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/broken_links/detector.py +0 -0
  51. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/config.py +0 -0
  52. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/content/__init__.py +0 -0
  53. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/__init__.py +0 -0
  54. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/detector.py +0 -0
  55. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/extractor.py +0 -0
  56. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/__init__.py +0 -0
  57. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_base.py +0 -0
  58. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_factory.py +0 -0
  59. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_feature_extraction.py +0 -0
  60. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_gliner2.py +0 -0
  61. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_image_classification.py +0 -0
  62. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_llm.py +0 -0
  63. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_object_detection.py +0 -0
  64. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_regex.py +0 -0
  65. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_text_classification.py +0 -0
  66. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/custom/trainer.py +0 -0
  67. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/dependencies.py +0 -0
  68. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/pii/__init__.py +0 -0
  69. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/pii/detector.py +0 -0
  70. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/secrets/__init__.py +0 -0
  71. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/secrets/detector.py +0 -0
  72. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/threat/__init__.py +0 -0
  73. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/threat/code_security_detector.py +0 -0
  74. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/detectors/threat/yara_detector.py +0 -0
  75. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/main.py +0 -0
  76. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/models/generated_detectors.py +0 -0
  77. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/models/generated_single_asset_scan_results.py +0 -0
  78. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/outputs/__init__.py +0 -0
  79. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/outputs/base.py +0 -0
  80. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/outputs/console.py +0 -0
  81. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/outputs/factory.py +0 -0
  82. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/outputs/file.py +0 -0
  83. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/pipeline/__init__.py +0 -0
  84. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/pipeline/content_provider.py +0 -0
  85. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/pipeline/detector_pipeline.py +0 -0
  86. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/pipeline/parsed_content_provider.py +0 -0
  87. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/pipeline/worker_pool.py +0 -0
  88. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sandbox/__init__.py +0 -0
  89. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sandbox/runner.py +0 -0
  90. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/__init__.py +0 -0
  91. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/asset_metadata.py +0 -0
  92. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/atlassian_common.py +0 -0
  93. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/azure_blob_storage/__init__.py +0 -0
  94. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/azure_blob_storage/source.py +0 -0
  95. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/confluence/__init__.py +0 -0
  96. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/confluence/source.py +0 -0
  97. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/databricks/__init__.py +0 -0
  98. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/databricks/source.py +0 -0
  99. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/dependencies.py +0 -0
  100. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/email/__init__.py +0 -0
  101. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/email/source.py +0 -0
  102. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/google_cloud_storage/__init__.py +0 -0
  103. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/google_cloud_storage/source.py +0 -0
  104. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/hive/__init__.py +0 -0
  105. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/hive/source.py +0 -0
  106. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/jira/__init__.py +0 -0
  107. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/jira/source.py +0 -0
  108. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/mongodb/__init__.py +0 -0
  109. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/mongodb/source.py +0 -0
  110. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/mssql/__init__.py +0 -0
  111. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/mssql/source.py +0 -0
  112. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/mysql/__init__.py +0 -0
  113. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/mysql/source.py +0 -0
  114. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/neo4j/__init__.py +0 -0
  115. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/neo4j/source.py +0 -0
  116. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/notion/__init__.py +0 -0
  117. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/notion/client.py +0 -0
  118. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/notion/source.py +0 -0
  119. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/object_storage/base.py +0 -0
  120. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/oracle/__init__.py +0 -0
  121. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/oracle/source.py +0 -0
  122. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/postgresql/__init__.py +0 -0
  123. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/postgresql/source.py +0 -0
  124. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/powerbi/__init__.py +0 -0
  125. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/powerbi/source.py +0 -0
  126. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/recipe_normalizer.py +0 -0
  127. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/s3_compatible_storage/README.md +0 -0
  128. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/s3_compatible_storage/__init__.py +0 -0
  129. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/s3_compatible_storage/source.py +0 -0
  130. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/servicedesk/__init__.py +0 -0
  131. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/servicedesk/source.py +0 -0
  132. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/slack/__init__.py +0 -0
  133. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/slack/source.py +0 -0
  134. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/snowflake/__init__.py +0 -0
  135. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/snowflake/source.py +0 -0
  136. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/sqlite/__init__.py +0 -0
  137. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/sqlite/source.py +0 -0
  138. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/tableau/__init__.py +0 -0
  139. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/tableau/source.py +0 -0
  140. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/tabular_utils.py +0 -0
  141. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/wordpress/__init__.py +0 -0
  142. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/wordpress/source.py +0 -0
  143. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/youtube/__init__.py +0 -0
  144. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/sources/youtube/source.py +0 -0
  145. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/telemetry.py +0 -0
  146. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/__init__.py +0 -0
  147. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/content_extraction.py +0 -0
  148. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/embedded_images.py +0 -0
  149. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/file_metadata.py +0 -0
  150. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/file_parser.py +0 -0
  151. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/file_to_images.py +0 -0
  152. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/hashing.py +0 -0
  153. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/resources.py +0 -0
  154. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/transcription.py +0 -0
  155. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/uv_sync.py +0 -0
  156. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/src/utils/validation.py +0 -0
  157. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/__init__.py +0 -0
  158. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/conftest.py +0 -0
  159. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/__init__.py +0 -0
  160. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/broken_links/test_broken_links_detector.py +0 -0
  161. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/conftest.py +0 -0
  162. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/content/__init__.py +0 -0
  163. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/__init__.py +0 -0
  164. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/conftest.py +0 -0
  165. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/test_invoice_extraction.py +0 -0
  166. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/test_llm_runner.py +0 -0
  167. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/test_pipeline_integration.py +0 -0
  168. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/test_regex_runner.py +0 -0
  169. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/custom/test_transformer_runners.py +0 -0
  170. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/pii/__init__.py +0 -0
  171. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/pii/conftest.py +0 -0
  172. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/pii/sample_invoice.pdf +0 -0
  173. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/pii/test_pii_detector.py +0 -0
  174. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/pii/test_pii_detector_extended.py +0 -0
  175. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/secrets/__init__.py +0 -0
  176. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/secrets/test_secrets_detector.py +0 -0
  177. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/secrets/test_secrets_detector_extended.py +0 -0
  178. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_base_detector.py +0 -0
  179. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_custom_detector_examples_runtime.py +0 -0
  180. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_detector_catalog_commercial.py +0 -0
  181. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_detector_pipeline_types.py +0 -0
  182. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_detector_schema_examples.py +0 -0
  183. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_detector_types.py +0 -0
  184. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_phase2_detectors.py +0 -0
  185. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/test_registry.py +0 -0
  186. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/threat/__init__.py +0 -0
  187. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/threat/test_code_security_detector.py +0 -0
  188. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/detectors/threat/test_yara_detector.py +0 -0
  189. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/integration/test_wordpress_broken_links_detector.py +0 -0
  190. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/integration/test_wordpress_links_assets.py +0 -0
  191. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/pipeline/test_detector_pipeline.py +0 -0
  192. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/pipeline/test_worker_pool.py +0 -0
  193. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_assets_metadata_catalog.py +0 -0
  194. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_azure_blob_storage_source.py +0 -0
  195. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_base_source_attachment.py +0 -0
  196. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_base_source_sampling.py +0 -0
  197. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_config.py +0 -0
  198. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_confluence_source.py +0 -0
  199. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_custom_extractor.py +0 -0
  200. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_databricks_source.py +0 -0
  201. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_dependency_groups.py +0 -0
  202. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_email_source.py +0 -0
  203. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_google_cloud_storage_source.py +0 -0
  204. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_hashing.py +0 -0
  205. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_hive_source.py +0 -0
  206. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_jira_source.py +0 -0
  207. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_mongodb_source.py +0 -0
  208. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_mssql_source.py +0 -0
  209. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_mysql_source.py +0 -0
  210. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_neo4j_source.py +0 -0
  211. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_notion_source.py +0 -0
  212. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_oracle_source.py +0 -0
  213. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_outputs.py +0 -0
  214. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_postgresql_source.py +0 -0
  215. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_powerbi_source.py +0 -0
  216. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_recipe_normalizer.py +0 -0
  217. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_s3_compatible_storage_source.py +0 -0
  218. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_sampling_automatic.py +0 -0
  219. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_sandbox_runner.py +0 -0
  220. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_servicedesk_source.py +0 -0
  221. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_slack_source.py +0 -0
  222. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_snowflake_source.py +0 -0
  223. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_source_dependency_groups.py +0 -0
  224. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_sqlite_source.py +0 -0
  225. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_tableau_source.py +0 -0
  226. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_tabular_automatic_sampling.py +0 -0
  227. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_tabular_utils.py +0 -0
  228. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_uv_sync.py +0 -0
  229. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_wordpress_source.py +0 -0
  230. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_youtube_source.py +0 -0
  231. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/test_youtube_source_integration.py +0 -0
  232. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/utils/test_content_extraction.py +0 -0
  233. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/utils/test_embedded_images.py +0 -0
  234. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/utils/test_file_metadata.py +0 -0
  235. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/utils/test_file_parser.py +0 -0
  236. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/utils/test_file_to_images.py +0 -0
  237. {classifyre_cli-0.4.33 → classifyre_cli-0.4.35}/tests/utils/test_transcription.py +0 -0
@@ -63,3 +63,6 @@ dmypy.json
63
63
 
64
64
  # Local training artifacts
65
65
  checkpoints/
66
+
67
+ # Spark local warehouse (created during dev/testing of lakehouse sources)
68
+ spark-warehouse/
@@ -0,0 +1,3 @@
1
+ $ uv sync
2
+ Resolved 271 packages in 177ms
3
+ Checked 50 packages in 1ms
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: classifyre-cli
3
- Version: 0.4.33
3
+ Version: 0.4.35
4
4
  Summary: Classifyre CLI — scan and classify unstructured data sources
5
5
  License: MIT
6
6
  Keywords: data,ingestion,metadata,pii,secrets,unstructured
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@classifyre/cli",
3
- "version": "0.4.33",
3
+ "version": "0.4.35",
4
4
  "private": true,
5
5
  "scripts": {
6
6
  "build": "uv sync",
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "classifyre-cli"
3
- version = "0.4.33"
3
+ version = "0.4.35"
4
4
  description = "Classifyre CLI — scan and classify unstructured data sources"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -202,6 +202,38 @@ youtube = [
202
202
  "yt-dlp>=2025.1.0",
203
203
  "youtube-transcript-api>=1.0.0",
204
204
  ]
205
+ spark = [
206
+ # Shared PySpark runtime for the lakehouse sources. Requires a JDK (Java 21
207
+ # LTS) on the host; format JARs (Delta/Iceberg/Hudi) are resolved at runtime
208
+ # via spark.jars.packages (see SPARK_* env vars). Pinned to the Spark 4.1
209
+ # minor so the format-JAR coordinates below stay version-matched.
210
+ #
211
+ # The `connect` extra pulls in pandas>=2.2, pyarrow, and grpcio, which the
212
+ # Spark Connect client (Spark Catalog via sc:// URLs) hard-requires at session
213
+ # build time. Classic/local Spark (Delta/Hudi/Iceberg) does not need them, but
214
+ # they share this group, so we ship the client deps once for all of them.
215
+ "pyspark[connect]>=4.1,<4.2",
216
+ ]
217
+ delta-lake = [
218
+ { include-group = "spark" },
219
+ "delta-spark>=4.0",
220
+ ]
221
+ hudi = [
222
+ # Hudi Spark integration ships as Maven JARs configured via
223
+ # SPARK_JARS_PACKAGES; only the shared PySpark runtime is needed here.
224
+ { include-group = "spark" },
225
+ ]
226
+ spark-catalog = [
227
+ { include-group = "spark" },
228
+ ]
229
+ iceberg = [
230
+ # Inspected through Spark (iceberg-spark-runtime JARs via SPARK_JARS_PACKAGES),
231
+ # sharing the JVM the other lakehouse sources already require.
232
+ { include-group = "spark" },
233
+ ]
234
+ kafka = [
235
+ "kafka-python>=2.0",
236
+ ]
205
237
  otel = [
206
238
  "opentelemetry-sdk>=1.42.0",
207
239
  "opentelemetry-exporter-otlp-proto-http>=1.27.0",