classifyre-cli 0.4.34__tar.gz → 0.4.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/.turbo/turbo-build.log +1 -1
  2. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/PKG-INFO +1 -1
  3. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/package.json +1 -1
  4. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/pyproject.toml +7 -2
  5. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/models/generated_input.py +319 -8
  6. classifyre_cli-0.4.35/src/sources/elasticsearch/__init__.py +3 -0
  7. classifyre_cli-0.4.35/src/sources/elasticsearch/source.py +31 -0
  8. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/kafka/source.py +27 -8
  9. classifyre_cli-0.4.35/src/sources/meilisearch/__init__.py +3 -0
  10. classifyre_cli-0.4.35/src/sources/meilisearch/source.py +353 -0
  11. classifyre_cli-0.4.35/src/sources/opensearch/__init__.py +3 -0
  12. classifyre_cli-0.4.35/src/sources/opensearch/source.py +32 -0
  13. classifyre_cli-0.4.35/src/sources/search_engine_base.py +345 -0
  14. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/spark_catalog/source.py +13 -5
  15. classifyre_cli-0.4.35/tests/test_elasticsearch_source.py +238 -0
  16. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_kafka_source.py +52 -1
  17. classifyre_cli-0.4.35/tests/test_meilisearch_source.py +255 -0
  18. classifyre_cli-0.4.35/tests/test_opensearch_source.py +228 -0
  19. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/uv.lock +282 -157
  20. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/.gitignore +0 -0
  21. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/.python-version +0 -0
  22. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/README.md +0 -0
  23. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/main.py +0 -0
  24. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/scripts/generate_models.py +0 -0
  25. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/__init__.py +0 -0
  26. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/config.py +0 -0
  27. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/__init__.py +0 -0
  28. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/base.py +0 -0
  29. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/broken_links/__init__.py +0 -0
  30. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/broken_links/detector.py +0 -0
  31. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/config.py +0 -0
  32. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/content/__init__.py +0 -0
  33. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/__init__.py +0 -0
  34. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/detector.py +0 -0
  35. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/extractor.py +0 -0
  36. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/__init__.py +0 -0
  37. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_base.py +0 -0
  38. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_factory.py +0 -0
  39. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_feature_extraction.py +0 -0
  40. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_gliner2.py +0 -0
  41. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_image_classification.py +0 -0
  42. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_llm.py +0 -0
  43. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_object_detection.py +0 -0
  44. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_regex.py +0 -0
  45. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/runners/_text_classification.py +0 -0
  46. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/custom/trainer.py +0 -0
  47. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/dependencies.py +0 -0
  48. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/pii/__init__.py +0 -0
  49. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/pii/detector.py +0 -0
  50. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/secrets/__init__.py +0 -0
  51. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/secrets/detector.py +0 -0
  52. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/threat/__init__.py +0 -0
  53. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/threat/code_security_detector.py +0 -0
  54. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/detectors/threat/yara_detector.py +0 -0
  55. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/main.py +0 -0
  56. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/models/generated_detectors.py +0 -0
  57. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/models/generated_single_asset_scan_results.py +0 -0
  58. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/outputs/__init__.py +0 -0
  59. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/outputs/base.py +0 -0
  60. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/outputs/console.py +0 -0
  61. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/outputs/factory.py +0 -0
  62. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/outputs/file.py +0 -0
  63. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/outputs/rest.py +0 -0
  64. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/pipeline/__init__.py +0 -0
  65. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/pipeline/content_provider.py +0 -0
  66. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/pipeline/detector_pipeline.py +0 -0
  67. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/pipeline/parsed_content_provider.py +0 -0
  68. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/pipeline/worker_pool.py +0 -0
  69. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sandbox/__init__.py +0 -0
  70. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sandbox/runner.py +0 -0
  71. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/__init__.py +0 -0
  72. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/asset_metadata.py +0 -0
  73. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/atlassian_common.py +0 -0
  74. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/azure_blob_storage/__init__.py +0 -0
  75. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/azure_blob_storage/source.py +0 -0
  76. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/base.py +0 -0
  77. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/confluence/__init__.py +0 -0
  78. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/confluence/source.py +0 -0
  79. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/databricks/__init__.py +0 -0
  80. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/databricks/source.py +0 -0
  81. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/delta_lake/__init__.py +0 -0
  82. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/delta_lake/source.py +0 -0
  83. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/dependencies.py +0 -0
  84. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/email/__init__.py +0 -0
  85. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/email/source.py +0 -0
  86. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/google_cloud_storage/__init__.py +0 -0
  87. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/google_cloud_storage/source.py +0 -0
  88. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/hive/__init__.py +0 -0
  89. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/hive/source.py +0 -0
  90. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/hudi/__init__.py +0 -0
  91. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/hudi/source.py +0 -0
  92. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/iceberg/__init__.py +0 -0
  93. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/iceberg/source.py +0 -0
  94. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/jira/__init__.py +0 -0
  95. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/jira/source.py +0 -0
  96. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/kafka/__init__.py +0 -0
  97. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/mongodb/__init__.py +0 -0
  98. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/mongodb/source.py +0 -0
  99. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/mssql/__init__.py +0 -0
  100. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/mssql/source.py +0 -0
  101. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/mysql/__init__.py +0 -0
  102. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/mysql/source.py +0 -0
  103. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/neo4j/__init__.py +0 -0
  104. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/neo4j/source.py +0 -0
  105. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/notion/__init__.py +0 -0
  106. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/notion/client.py +0 -0
  107. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/notion/source.py +0 -0
  108. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/object_storage/base.py +0 -0
  109. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/oracle/__init__.py +0 -0
  110. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/oracle/source.py +0 -0
  111. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/postgresql/__init__.py +0 -0
  112. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/postgresql/source.py +0 -0
  113. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/powerbi/__init__.py +0 -0
  114. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/powerbi/source.py +0 -0
  115. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/recipe_normalizer.py +0 -0
  116. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/s3_compatible_storage/README.md +0 -0
  117. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/s3_compatible_storage/__init__.py +0 -0
  118. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/s3_compatible_storage/source.py +0 -0
  119. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/servicedesk/__init__.py +0 -0
  120. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/servicedesk/source.py +0 -0
  121. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/slack/__init__.py +0 -0
  122. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/slack/source.py +0 -0
  123. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/snowflake/__init__.py +0 -0
  124. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/snowflake/source.py +0 -0
  125. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/spark_base.py +0 -0
  126. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/spark_catalog/__init__.py +0 -0
  127. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/sqlite/__init__.py +0 -0
  128. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/sqlite/source.py +0 -0
  129. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/tableau/__init__.py +0 -0
  130. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/tableau/source.py +0 -0
  131. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/tabular_base.py +0 -0
  132. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/tabular_utils.py +0 -0
  133. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/wordpress/__init__.py +0 -0
  134. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/wordpress/source.py +0 -0
  135. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/youtube/__init__.py +0 -0
  136. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/sources/youtube/source.py +0 -0
  137. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/telemetry.py +0 -0
  138. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/__init__.py +0 -0
  139. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/content_extraction.py +0 -0
  140. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/dependency_groups.py +0 -0
  141. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/embedded_images.py +0 -0
  142. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/file_metadata.py +0 -0
  143. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/file_parser.py +0 -0
  144. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/file_to_images.py +0 -0
  145. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/hashing.py +0 -0
  146. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/resources.py +0 -0
  147. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/spark_runtime.py +0 -0
  148. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/transcription.py +0 -0
  149. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/uv_sync.py +0 -0
  150. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/src/utils/validation.py +0 -0
  151. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/__init__.py +0 -0
  152. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/_spark_fakes.py +0 -0
  153. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/conftest.py +0 -0
  154. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/__init__.py +0 -0
  155. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/broken_links/test_broken_links_detector.py +0 -0
  156. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/conftest.py +0 -0
  157. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/content/__init__.py +0 -0
  158. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/__init__.py +0 -0
  159. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/conftest.py +0 -0
  160. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/test_invoice_extraction.py +0 -0
  161. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/test_llm_runner.py +0 -0
  162. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/test_pipeline_integration.py +0 -0
  163. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/test_regex_runner.py +0 -0
  164. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/custom/test_transformer_runners.py +0 -0
  165. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/pii/__init__.py +0 -0
  166. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/pii/conftest.py +0 -0
  167. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/pii/sample_invoice.pdf +0 -0
  168. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/pii/test_pii_detector.py +0 -0
  169. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/pii/test_pii_detector_extended.py +0 -0
  170. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/secrets/__init__.py +0 -0
  171. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/secrets/test_secrets_detector.py +0 -0
  172. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/secrets/test_secrets_detector_extended.py +0 -0
  173. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_base_detector.py +0 -0
  174. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_custom_detector_examples_runtime.py +0 -0
  175. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_detector_catalog_commercial.py +0 -0
  176. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_detector_pipeline_types.py +0 -0
  177. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_detector_schema_examples.py +0 -0
  178. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_detector_types.py +0 -0
  179. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_phase2_detectors.py +0 -0
  180. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/test_registry.py +0 -0
  181. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/threat/__init__.py +0 -0
  182. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/threat/test_code_security_detector.py +0 -0
  183. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/detectors/threat/test_yara_detector.py +0 -0
  184. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/integration/test_wordpress_broken_links_detector.py +0 -0
  185. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/integration/test_wordpress_links_assets.py +0 -0
  186. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/pipeline/test_detector_pipeline.py +0 -0
  187. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/pipeline/test_worker_pool.py +0 -0
  188. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_assets_metadata_catalog.py +0 -0
  189. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_azure_blob_storage_source.py +0 -0
  190. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_base_source_attachment.py +0 -0
  191. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_base_source_sampling.py +0 -0
  192. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_config.py +0 -0
  193. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_confluence_source.py +0 -0
  194. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_custom_extractor.py +0 -0
  195. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_databricks_source.py +0 -0
  196. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_delta_lake_source.py +0 -0
  197. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_dependency_groups.py +0 -0
  198. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_email_source.py +0 -0
  199. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_google_cloud_storage_source.py +0 -0
  200. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_hashing.py +0 -0
  201. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_hive_source.py +0 -0
  202. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_hudi_source.py +0 -0
  203. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_iceberg_source.py +0 -0
  204. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_jira_source.py +0 -0
  205. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_mongodb_source.py +0 -0
  206. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_mssql_source.py +0 -0
  207. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_mysql_source.py +0 -0
  208. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_neo4j_source.py +0 -0
  209. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_notion_source.py +0 -0
  210. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_oracle_source.py +0 -0
  211. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_outputs.py +0 -0
  212. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_postgresql_source.py +0 -0
  213. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_powerbi_source.py +0 -0
  214. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_recipe_normalizer.py +0 -0
  215. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_s3_compatible_storage_source.py +0 -0
  216. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_sampling_automatic.py +0 -0
  217. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_sandbox_runner.py +0 -0
  218. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_servicedesk_source.py +0 -0
  219. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_slack_source.py +0 -0
  220. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_snowflake_source.py +0 -0
  221. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_source_dependency_groups.py +0 -0
  222. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_spark_catalog_source.py +0 -0
  223. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_sqlite_source.py +0 -0
  224. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_tableau_source.py +0 -0
  225. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_tabular_automatic_sampling.py +0 -0
  226. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_tabular_utils.py +0 -0
  227. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_uv_sync.py +0 -0
  228. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_wordpress_source.py +0 -0
  229. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_youtube_source.py +0 -0
  230. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/test_youtube_source_integration.py +0 -0
  231. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/utils/test_content_extraction.py +0 -0
  232. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/utils/test_embedded_images.py +0 -0
  233. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/utils/test_file_metadata.py +0 -0
  234. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/utils/test_file_parser.py +0 -0
  235. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/utils/test_file_to_images.py +0 -0
  236. {classifyre_cli-0.4.34 → classifyre_cli-0.4.35}/tests/utils/test_transcription.py +0 -0
@@ -1,3 +1,3 @@
1
1
  $ uv sync
2
- Resolved 268 packages in 168ms
2
+ Resolved 271 packages in 177ms
3
3
  Checked 50 packages in 1ms
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: classifyre-cli
3
- Version: 0.4.34
3
+ Version: 0.4.35
4
4
  Summary: Classifyre CLI — scan and classify unstructured data sources
5
5
  License: MIT
6
6
  Keywords: data,ingestion,metadata,pii,secrets,unstructured
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@classifyre/cli",
3
- "version": "0.4.34",
3
+ "version": "0.4.35",
4
4
  "private": true,
5
5
  "scripts": {
6
6
  "build": "uv sync",
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "classifyre-cli"
3
- version = "0.4.34"
3
+ version = "0.4.35"
4
4
  description = "Classifyre CLI — scan and classify unstructured data sources"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -207,7 +207,12 @@ spark = [
207
207
  # LTS) on the host; format JARs (Delta/Iceberg/Hudi) are resolved at runtime
208
208
  # via spark.jars.packages (see SPARK_* env vars). Pinned to the Spark 4.1
209
209
  # minor so the format-JAR coordinates below stay version-matched.
210
- "pyspark>=4.1,<4.2",
210
+ #
211
+ # The `connect` extra pulls in pandas>=2.2, pyarrow, and grpcio, which the
212
+ # Spark Connect client (Spark Catalog via sc:// URLs) hard-requires at session
213
+ # build time. Classic/local Spark (Delta/Hudi/Iceberg) does not need them, but
214
+ # they share this group, so we ship the client deps once for all of them.
215
+ "pyspark[connect]>=4.1,<4.2",
211
216
  ]
212
217
  delta-lake = [
213
218
  { include-group = "spark" },
@@ -50,6 +50,9 @@ class AssetType(StrEnum):
50
50
  HUDI = 'HUDI'
51
51
  SPARK_CATALOG = 'SPARK_CATALOG'
52
52
  KAFKA = 'KAFKA'
53
+ ELASTICSEARCH = 'ELASTICSEARCH'
54
+ OPENSEARCH = 'OPENSEARCH'
55
+ MEILISEARCH = 'MEILISEARCH'
53
56
 
54
57
 
55
58
  class DetectorType(StrEnum):
@@ -353,6 +356,9 @@ class Type(StrEnum):
353
356
  HUDI = 'HUDI'
354
357
  SPARK_CATALOG = 'SPARK_CATALOG'
355
358
  KAFKA = 'KAFKA'
359
+ ELASTICSEARCH = 'ELASTICSEARCH'
360
+ OPENSEARCH = 'OPENSEARCH'
361
+ MEILISEARCH = 'MEILISEARCH'
356
362
 
357
363
 
358
364
  class YouTubeRequired(BaseModel):
@@ -2865,6 +2871,9 @@ class Type19(StrEnum):
2865
2871
  HUDI = 'HUDI'
2866
2872
  SPARK_CATALOG = 'SPARK_CATALOG'
2867
2873
  KAFKA = 'KAFKA'
2874
+ ELASTICSEARCH = 'ELASTICSEARCH'
2875
+ OPENSEARCH = 'OPENSEARCH'
2876
+ MEILISEARCH = 'MEILISEARCH'
2868
2877
 
2869
2878
 
2870
2879
  class ConfluenceInput(CoreInput):
@@ -3491,25 +3500,68 @@ class IcebergInput(CoreInput):
3491
3500
  resources: ResourceOverrides | None = None
3492
3501
 
3493
3502
 
3494
- class KafkaRequired(BaseModel):
3503
+ class NoAuthentication(BaseModel):
3495
3504
  model_config = ConfigDict(
3496
3505
  extra='forbid',
3497
3506
  )
3507
+ auth_mode: Literal['NONE']
3498
3508
  bootstrap_servers: str = Field(
3499
3509
  ..., description='Comma-separated Kafka bootstrap servers (host:port)'
3500
3510
  )
3501
3511
 
3502
3512
 
3503
- class KafkaMasked(BaseModel):
3513
+ class SASL(BaseModel):
3514
+ model_config = ConfigDict(
3515
+ extra='forbid',
3516
+ )
3517
+ auth_mode: Literal['SASL']
3518
+ bootstrap_servers: str = Field(
3519
+ ..., description='Comma-separated Kafka bootstrap servers (host:port)'
3520
+ )
3521
+
3522
+
3523
+ class ClientCertificateMTLS(BaseModel):
3524
+ model_config = ConfigDict(
3525
+ extra='forbid',
3526
+ )
3527
+ auth_mode: Literal['CLIENT_CERT']
3528
+ bootstrap_servers: str = Field(
3529
+ ..., description='Comma-separated Kafka bootstrap servers (host:port)'
3530
+ )
3531
+
3532
+
3533
+ class NoAuthentication1(BaseModel):
3534
+ model_config = ConfigDict(
3535
+ extra='forbid',
3536
+ )
3537
+
3538
+
3539
+ class SASL1(BaseModel):
3504
3540
  """
3505
- Optional SASL credentials.
3541
+ SASL username/password credentials.
3506
3542
  """
3507
3543
 
3508
3544
  model_config = ConfigDict(
3509
3545
  extra='forbid',
3510
3546
  )
3511
- sasl_username: str | None = Field(None, description='SASL username')
3512
- sasl_password: str | None = Field(None, description='SASL password')
3547
+ sasl_username: str = Field(..., description='SASL username')
3548
+ sasl_password: str = Field(..., description='SASL password')
3549
+
3550
+
3551
+ class ClientCertificateMTLS1(BaseModel):
3552
+ """
3553
+ mTLS client certificate credentials.
3554
+ """
3555
+
3556
+ model_config = ConfigDict(
3557
+ extra='forbid',
3558
+ )
3559
+ ssl_certfile: str = Field(
3560
+ ..., description='PEM-encoded client certificate (access cert)'
3561
+ )
3562
+ ssl_keyfile: str = Field(
3563
+ ..., description='PEM-encoded client private key (access key)'
3564
+ )
3513
3565
 
3514
3566
 
3515
3567
  class KafkaOptionalConnection(BaseModel):
@@ -3523,7 +3575,8 @@ class KafkaOptionalConnection(BaseModel):
3523
3575
  security_protocol: KafkaSecurityProtocol | None = 'PLAINTEXT'
3524
3576
  sasl_mechanism: KafkaSaslMechanism | None = 'PLAIN'
3525
3577
  ssl_ca: str | None = Field(
3526
- None, description='PEM-encoded CA certificate for TLS verification'
3578
+ None,
3579
+ description="PEM-encoded CA certificate for TLS verification (optional for client-certificate auth; validates the broker's certificate)",
3527
3580
  )
3528
3581
  request_timeout_ms: int | None = Field(
3529
3582
  30000, description='Client request timeout in milliseconds', ge=1000
@@ -3562,8 +3615,12 @@ class KafkaInput(CoreInput):
3562
3615
  type: Literal['KAFKA'] | None = Field(
3563
3616
  None, description='Type of the asset or source'
3564
3617
  )
3565
- required: KafkaRequired
3566
- masked: KafkaMasked | None = None
3618
+ required: NoAuthentication | SASL | ClientCertificateMTLS = Field(
3619
+ ..., title='KafkaRequired'
3620
+ )
3621
+ masked: NoAuthentication1 | SASL1 | ClientCertificateMTLS1 | None = Field(
3622
+ None, title='KafkaMasked'
3623
+ )
3567
3624
  optional: KafkaOptional | None = None
3568
3625
  detectors: list[Detector] | None = Field(
3569
3626
  None, description='Detectors to run on ingested content'
@@ -3576,6 +3633,254 @@ class KafkaInput(CoreInput):
3576
3633
  resources: ResourceOverrides | None = None
3577
3634
 
3578
3635
 
3636
+ class NoAuthentication2(BaseModel):
3637
+ model_config = ConfigDict(
3638
+ extra='forbid',
3639
+ )
3640
+ auth_mode: Literal['NONE']
3641
+ url: AnyUrl = Field(
3642
+ ..., description='Base URL of the cluster (e.g. https://localhost:9200)'
3643
+ )
3644
+
3645
+
3646
+ class BasicUsernamePassword(BaseModel):
3647
+ model_config = ConfigDict(
3648
+ extra='forbid',
3649
+ )
3650
+ auth_mode: Literal['BASIC']
3651
+ url: AnyUrl = Field(
3652
+ ..., description='Base URL of the cluster (e.g. https://localhost:9200)'
3653
+ )
3654
+
3655
+
3656
+ class APIKeyBearerToken(BaseModel):
3657
+ model_config = ConfigDict(
3658
+ extra='forbid',
3659
+ )
3660
+ auth_mode: Literal['API_KEY']
3661
+ url: AnyUrl = Field(
3662
+ ..., description='Base URL of the cluster (e.g. https://localhost:9200)'
3663
+ )
3664
+
3665
+
3666
+ class NoAuthentication3(BaseModel):
3667
+ model_config = ConfigDict(
3668
+ extra='forbid',
3669
+ )
3670
+
3671
+
3672
+ class BasicUsernamePassword1(BaseModel):
3673
+ """
3674
+ Basic auth credentials.
3675
+ """
3676
+
3677
+ model_config = ConfigDict(
3678
+ extra='forbid',
3679
+ )
3680
+ username: str = Field(..., description='Basic auth username')
3681
+ password: str = Field(..., description='Basic auth password')
3682
+
3683
+
3684
+ class APIKeyBearerToken1(BaseModel):
3685
+ """
3686
+ API key / bearer token credential.
3687
+ """
3688
+
3689
+ model_config = ConfigDict(
3690
+ extra='forbid',
3691
+ )
3692
+ api_key: str = Field(
3693
+ ..., description='API key or bearer token, sent as an Authorization header'
3694
+ )
3695
+
3696
+
3697
+ class SearchEngineOptionalConnection(BaseModel):
3698
+ """
3699
+ Cluster connection controls.
3700
+ """
3701
+
3702
+ model_config = ConfigDict(
3703
+ extra='forbid',
3704
+ )
3705
+ verify_ssl: bool | None = Field(
3706
+ True, description='TLS certificate verification toggle'
3707
+ )
3708
+ request_timeout_seconds: float | None = Field(
3709
+ 30,
3710
+ description='Network timeout in seconds for cluster API calls',
3711
+ ge=1.0,
3712
+ le=300.0,
3713
+ )
3714
+
3715
+
3716
+ class SearchEngineOptionalScope(BaseModel):
3717
+ """
3718
+ Index selection scope.
3719
+ """
3720
+
3721
+ model_config = ConfigDict(
3722
+ extra='forbid',
3723
+ )
3724
+ include_indices: list[str] | None = Field(
3725
+ None, description='Optional index allowlist'
3726
+ )
3727
+ exclude_indices: list[str] | None = Field(None, description='Index denylist')
3728
+ include_system_indices: bool | None = Field(
3729
+ False, description='Include system indices (names starting with .)'
3730
+ )
3731
+ index_limit: int | None = Field(
3732
+ None, description='Optional cap on number of index assets', ge=1
3733
+ )
3734
+
3735
+
3736
+ class ElasticsearchOptional(BaseModel):
3737
+ model_config = ConfigDict(
3738
+ extra='forbid',
3739
+ )
3740
+ connection: SearchEngineOptionalConnection | None = None
3741
+ scope: SearchEngineOptionalScope | None = None
3742
+
3743
+
3744
+ class ElasticsearchInput(CoreInput):
3745
+ type: Literal['ELASTICSEARCH'] | None = Field(
3746
+ None, description='Type of the asset or source'
3747
+ )
3748
+ required: NoAuthentication2 | BasicUsernamePassword | APIKeyBearerToken = Field(
3749
+ ..., title='ElasticsearchRequired'
3750
+ )
3751
+ masked: NoAuthentication3 | BasicUsernamePassword1 | APIKeyBearerToken1 | None = (
3752
+ Field(None, title='ElasticsearchMasked')
3753
+ )
3754
+ optional: ElasticsearchOptional | None = None
3755
+ detectors: list[Detector] | None = Field(
3756
+ None, description='Detectors to run on ingested content'
3757
+ )
3758
+ custom_detectors: list[CustomDetectorSelection] | None = Field(
3759
+ None,
3760
+ description='Reusable custom detector IDs selected from the custom detector catalog.',
3761
+ )
3762
+ sampling: SamplingConfig
3763
+ resources: ResourceOverrides | None = None
3764
+
3765
+
3766
+ class OpenSearchOptional(BaseModel):
3767
+ model_config = ConfigDict(
3768
+ extra='forbid',
3769
+ )
3770
+ connection: SearchEngineOptionalConnection | None = None
3771
+ scope: SearchEngineOptionalScope | None = None
3772
+
3773
+
3774
+ class OpenSearchInput(CoreInput):
3775
+ type: Literal['OPENSEARCH'] | None = Field(
3776
+ None, description='Type of the asset or source'
3777
+ )
3778
+ required: NoAuthentication2 | BasicUsernamePassword | APIKeyBearerToken = Field(
3779
+ ..., title='OpenSearchRequired'
3780
+ )
3781
+ masked: NoAuthentication3 | BasicUsernamePassword1 | APIKeyBearerToken1 | None = (
3782
+ Field(None, title='OpenSearchMasked')
3783
+ )
3784
+ optional: OpenSearchOptional | None = None
3785
+ detectors: list[Detector] | None = Field(
3786
+ None, description='Detectors to run on ingested content'
3787
+ )
3788
+ custom_detectors: list[CustomDetectorSelection] | None = Field(
3789
+ None,
3790
+ description='Reusable custom detector IDs selected from the custom detector catalog.',
3791
+ )
3792
+ sampling: SamplingConfig
3793
+ resources: ResourceOverrides | None = None
3794
+
3795
+
3796
+ class NoAuthentication4(BaseModel):
3797
+ model_config = ConfigDict(
3798
+ extra='forbid',
3799
+ )
3800
+ auth_mode: Literal['NONE']
3801
+ url: AnyUrl = Field(
3802
+ ...,
3803
+ description='Base URL of the Meilisearch instance (e.g. http://localhost:7700)',
3804
+ )
3805
+
3806
+
3807
+ class APIKeyBearerToken2(BaseModel):
3808
+ model_config = ConfigDict(
3809
+ extra='forbid',
3810
+ )
3811
+ auth_mode: Literal['API_KEY']
3812
+ url: AnyUrl = Field(
3813
+ ...,
3814
+ description='Base URL of the Meilisearch instance (e.g. http://localhost:7700)',
3815
+ )
3816
+
3817
+
3818
+ class NoAuthentication5(BaseModel):
3819
+ model_config = ConfigDict(
3820
+ extra='forbid',
3821
+ )
3822
+
3823
+
3824
+ class APIKeyBearerToken3(BaseModel):
3825
+ """
3826
+ Meilisearch API key or master key, sent as an Authorization: Bearer header. Meilisearch has no separate username/password authentication mode.
3827
+ """
3828
+
3829
+ model_config = ConfigDict(
3830
+ extra='forbid',
3831
+ )
3832
+ api_key: str = Field(..., description='API key or master key')
3833
+
3834
+
3835
+ class MeilisearchOptionalScope(BaseModel):
3836
+ """
3837
+ Index selection scope.
3838
+ """
3839
+
3840
+ model_config = ConfigDict(
3841
+ extra='forbid',
3842
+ )
3843
+ include_indices: list[str] | None = Field(
3844
+ None, description='Optional index allowlist (matches index uid)'
3845
+ )
3846
+ exclude_indices: list[str] | None = Field(
3847
+ None, description='Index denylist (matches index uid)'
3848
+ )
3849
+ index_limit: int | None = Field(
3850
+ None, description='Optional cap on number of index assets', ge=1
3851
+ )
3852
+
3853
+
3854
+ class MeilisearchOptional(BaseModel):
3855
+ model_config = ConfigDict(
3856
+ extra='forbid',
3857
+ )
3858
+ connection: SearchEngineOptionalConnection | None = None
3859
+ scope: MeilisearchOptionalScope | None = None
3860
+
3861
+
3862
+ class MeilisearchInput(CoreInput):
3863
+ type: Literal['MEILISEARCH'] | None = Field(
3864
+ None, description='Type of the asset or source'
3865
+ )
3866
+ required: NoAuthentication4 | APIKeyBearerToken2 = Field(
3867
+ ..., title='MeilisearchRequired'
3868
+ )
3869
+ masked: NoAuthentication5 | APIKeyBearerToken3 | None = Field(
3870
+ None, title='MeilisearchMasked'
3871
+ )
3872
+ optional: MeilisearchOptional | None = None
3873
+ detectors: list[Detector] | None = Field(
3874
+ None, description='Detectors to run on ingested content'
3875
+ )
3876
+ custom_detectors: list[CustomDetectorSelection] | None = Field(
3877
+ None,
3878
+ description='Reusable custom detector IDs selected from the custom detector catalog.',
3879
+ )
3880
+ sampling: SamplingConfig
3881
+ resources: ResourceOverrides | None = None
3882
+
3883
+
3579
3884
  class YouTubeInput(CoreInput):
3580
3885
  type: Literal['YOUTUBE'] | None = Field(
3581
3886
  None, description='Type of the asset or source'
@@ -3624,6 +3929,9 @@ class SourceInput(
3624
3929
  | HudiInput
3625
3930
  | SparkCatalogInput
3626
3931
  | KafkaInput
3932
+ | ElasticsearchInput
3933
+ | OpenSearchInput
3934
+ | MeilisearchInput
3627
3935
  ]
3628
3936
  ):
3629
3937
  root: (
@@ -3655,6 +3963,9 @@ class SourceInput(
3655
3963
  | HudiInput
3656
3964
  | SparkCatalogInput
3657
3965
  | KafkaInput
3966
+ | ElasticsearchInput
3967
+ | OpenSearchInput
3968
+ | MeilisearchInput
3658
3969
  ) = Field(
3659
3970
  ...,
3660
3971
  description='Merged configuration schema with all source types and common definitions',
@@ -0,0 +1,3 @@
1
+ from .source import ElasticsearchSource
2
+
3
+ __all__ = ["ElasticsearchSource"]
@@ -0,0 +1,31 @@
1
+ """Elasticsearch source — discovers indices and samples documents.
2
+
3
+ Uses plain REST calls (``requests``) rather than the ``elasticsearch-py``
4
+ client, since only read-only cluster/index/search endpoints are needed and
5
+ those are stable across Elasticsearch versions. Shared with ``OpenSearchSource``
6
+ via :mod:`src.sources.search_engine_base` — see that module for the REST logic.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+ from ...models.generated_input import ElasticsearchInput
14
+ from ..base import BaseSource
15
+ from ..search_engine_base import SearchEngineSourceMixin
16
+
17
+
18
+ class ElasticsearchSource(SearchEngineSourceMixin, BaseSource):
19
+ source_type = "elasticsearch"
20
+ ENGINE_LABEL = "Elasticsearch"
21
+
22
+ def __init__(
23
+ self,
24
+ recipe: dict[str, Any],
25
+ source_id: str | None = None,
26
+ runner_id: str | None = None,
27
+ ) -> None:
28
+ super().__init__(recipe, source_id, runner_id)
29
+ self.config = ElasticsearchInput.model_validate(recipe)
30
+ self.runner_id = runner_id or "local-run"
31
+ self._index_lookup: dict[str, str] = {}
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
 
10
10
  import logging
11
11
  import ssl as ssl_module
12
+ import tempfile
12
13
  from collections.abc import AsyncGenerator
13
14
  from datetime import UTC, datetime
14
15
  from typing import Any
@@ -67,6 +68,7 @@ class KafkaSource(BaseSource):
67
68
  def _client_kwargs(self) -> dict[str, Any]:
68
69
  kwargs: dict[str, Any] = {"bootstrap_servers": self._bootstrap_servers()}
69
70
  connection = self._connection()
71
+ ssl_ca = getattr(connection, "ssl_ca", None) if connection is not None else None
70
72
  if connection is not None:
71
73
  protocol = getattr(connection, "security_protocol", None)
72
74
  if protocol is not None:
@@ -80,17 +82,34 @@ class KafkaSource(BaseSource):
80
82
  )
81
83
  if getattr(connection, "request_timeout_ms", None):
82
84
  kwargs["request_timeout_ms"] = int(connection.request_timeout_ms)
83
- if getattr(connection, "ssl_ca", None):
84
- context = ssl_module.create_default_context(cadata=connection.ssl_ca)
85
- kwargs["ssl_context"] = context
86
85
  masked = self.config.masked
87
- if masked is not None:
88
- if getattr(masked, "sasl_username", None):
89
- kwargs["sasl_plain_username"] = masked.sasl_username
90
- if getattr(masked, "sasl_password", None):
91
- kwargs["sasl_plain_password"] = masked.sasl_password
86
+ if getattr(masked, "sasl_username", None):
87
+ kwargs["sasl_plain_username"] = masked.sasl_username
88
+ if getattr(masked, "sasl_password", None):
89
+ kwargs["sasl_plain_password"] = masked.sasl_password
90
+ ssl_certfile = getattr(masked, "ssl_certfile", None)
91
+ ssl_keyfile = getattr(masked, "ssl_keyfile", None)
92
+ if ssl_ca or ssl_certfile:
93
+ context = ssl_module.create_default_context(cadata=ssl_ca)
94
+ if ssl_certfile and ssl_keyfile:
95
+ self._load_client_cert_chain(context, ssl_certfile, ssl_keyfile)
96
+ kwargs["ssl_context"] = context
92
97
  return kwargs
93
98
 
99
+ @staticmethod
100
+ def _load_client_cert_chain(
101
+ context: ssl_module.SSLContext, certfile: str, keyfile: str
102
+ ) -> None:
103
+ with (
104
+ tempfile.NamedTemporaryFile("w", suffix=".pem") as cert_tmp,
105
+ tempfile.NamedTemporaryFile("w", suffix=".pem") as key_tmp,
106
+ ):
107
+ cert_tmp.write(certfile)
108
+ cert_tmp.flush()
109
+ key_tmp.write(keyfile)
110
+ key_tmp.flush()
111
+ context.load_cert_chain(certfile=cert_tmp.name, keyfile=key_tmp.name)
112
+
94
113
  def _make_consumer(self, **extra: Any) -> Any:
95
114
  kwargs = {**self._client_kwargs(), "enable_auto_commit": False, **extra}
96
115
  return self._kafka.KafkaConsumer(**kwargs)
@@ -0,0 +1,3 @@
1
+ from .source import MeilisearchSource
2
+
3
+ __all__ = ["MeilisearchSource"]