isa-data 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (683) hide show
  1. isa_data/__init__.py +28 -0
  2. isa_data/adapters/__init__.py +27 -0
  3. isa_data/adapters/base/__init__.py +14 -0
  4. isa_data/adapters/base/base_adapter.py +240 -0
  5. isa_data/adapters/base/database_adapter.py +1010 -0
  6. isa_data/adapters/file_adapters/__init__.py +68 -0
  7. isa_data/adapters/file_adapters/base_adapter.py +372 -0
  8. isa_data/adapters/file_adapters/csv_adapter.py +584 -0
  9. isa_data/adapters/file_adapters/document_adapter.py +741 -0
  10. isa_data/adapters/file_adapters/excel_adapter.py +467 -0
  11. isa_data/adapters/sink_adapters/__init__.py +81 -0
  12. isa_data/adapters/sink_adapters/base_sink_adapter.py +165 -0
  13. isa_data/adapters/sink_adapters/csv_adapter.py +451 -0
  14. isa_data/adapters/sink_adapters/delta_lake_adapter.py +417 -0
  15. isa_data/adapters/sink_adapters/duckdb_adapter.py +283 -0
  16. isa_data/adapters/sink_adapters/parquet_adapter.py +466 -0
  17. isa_data/adapters/source_adapters/README.md +361 -0
  18. isa_data/adapters/source_adapters/__init__.py +65 -0
  19. isa_data/adapters/source_adapters/base_adapter.py +215 -0
  20. isa_data/adapters/source_adapters/bigquery_adapter.py +468 -0
  21. isa_data/adapters/source_adapters/clickhouse_adapter.py +393 -0
  22. isa_data/adapters/source_adapters/deltalake_adapter.py +434 -0
  23. isa_data/adapters/source_adapters/elasticsearch_adapter.py +456 -0
  24. isa_data/adapters/source_adapters/mongodb_adapter.py +343 -0
  25. isa_data/adapters/source_adapters/mysql_adapter.py +283 -0
  26. isa_data/adapters/source_adapters/oracle_adapter.py +333 -0
  27. isa_data/adapters/source_adapters/postgresql_adapter.py +323 -0
  28. isa_data/adapters/source_adapters/redis_adapter.py +401 -0
  29. isa_data/adapters/source_adapters/registry.py +306 -0
  30. isa_data/adapters/source_adapters/s3_adapter.py +546 -0
  31. isa_data/adapters/source_adapters/snowflake_adapter.py +334 -0
  32. isa_data/adapters/source_adapters/sqlserver_adapter.py +320 -0
  33. isa_data/adapters/vendor_collectors/README.md +99 -0
  34. isa_data/adapters/vendor_collectors/__init__.py +88 -0
  35. isa_data/adapters/vendor_collectors/_base/__init__.py +85 -0
  36. isa_data/adapters/vendor_collectors/_base/apicurio_backend.py +277 -0
  37. isa_data/adapters/vendor_collectors/_base/auth/__init__.py +15 -0
  38. isa_data/adapters/vendor_collectors/_base/auth/api_key.py +28 -0
  39. isa_data/adapters/vendor_collectors/_base/auth/base.py +54 -0
  40. isa_data/adapters/vendor_collectors/_base/auth/manual_upload.py +27 -0
  41. isa_data/adapters/vendor_collectors/_base/auth/oauth2.py +79 -0
  42. isa_data/adapters/vendor_collectors/_base/base_collector.py +161 -0
  43. isa_data/adapters/vendor_collectors/_base/checkpoint.py +72 -0
  44. isa_data/adapters/vendor_collectors/_base/file_feed_collector.py +250 -0
  45. isa_data/adapters/vendor_collectors/_base/http_client.py +175 -0
  46. isa_data/adapters/vendor_collectors/_base/managed_web_data_collector.py +49 -0
  47. isa_data/adapters/vendor_collectors/_base/manifest_loader.py +221 -0
  48. isa_data/adapters/vendor_collectors/_base/pagination.py +184 -0
  49. isa_data/adapters/vendor_collectors/_base/rate_limiter.py +105 -0
  50. isa_data/adapters/vendor_collectors/_base/result.py +61 -0
  51. isa_data/adapters/vendor_collectors/_base/schema_validator.py +232 -0
  52. isa_data/adapters/vendor_collectors/connectors/__init__.py +16 -0
  53. isa_data/adapters/vendor_collectors/connectors/_stub.py +42 -0
  54. isa_data/adapters/vendor_collectors/connectors/amazon_ads.py +339 -0
  55. isa_data/adapters/vendor_collectors/connectors/amazon_spapi.py +213 -0
  56. isa_data/adapters/vendor_collectors/connectors/amazon_vc.py +122 -0
  57. isa_data/adapters/vendor_collectors/connectors/coupang.py +261 -0
  58. isa_data/adapters/vendor_collectors/connectors/douyin.py +280 -0
  59. isa_data/adapters/vendor_collectors/connectors/ga4.py +285 -0
  60. isa_data/adapters/vendor_collectors/connectors/google_ads.py +386 -0
  61. isa_data/adapters/vendor_collectors/connectors/homedepot.py +194 -0
  62. isa_data/adapters/vendor_collectors/connectors/jd.py +476 -0
  63. isa_data/adapters/vendor_collectors/connectors/lazada.py +311 -0
  64. isa_data/adapters/vendor_collectors/connectors/lowes.py +151 -0
  65. isa_data/adapters/vendor_collectors/connectors/meta_ads.py +416 -0
  66. isa_data/adapters/vendor_collectors/connectors/overstock.py +285 -0
  67. isa_data/adapters/vendor_collectors/connectors/ozon.py +245 -0
  68. isa_data/adapters/vendor_collectors/connectors/pdd.py +323 -0
  69. isa_data/adapters/vendor_collectors/connectors/rakuten.py +308 -0
  70. isa_data/adapters/vendor_collectors/connectors/shopee.py +252 -0
  71. isa_data/adapters/vendor_collectors/connectors/shopify.py +239 -0
  72. isa_data/adapters/vendor_collectors/connectors/temu.py +339 -0
  73. isa_data/adapters/vendor_collectors/connectors/tmall.py +610 -0
  74. isa_data/adapters/vendor_collectors/connectors/walmart_connect.py +322 -0
  75. isa_data/adapters/vendor_collectors/connectors/walmart_marketplace.py +458 -0
  76. isa_data/adapters/vendor_collectors/connectors/wayfair.py +98 -0
  77. isa_data/adapters/vendor_collectors/connectors/wildberries.py +237 -0
  78. isa_data/adapters/vendor_collectors/connectors/xiaomi.py +256 -0
  79. isa_data/adapters/vendor_collectors/providers/__init__.py +44 -0
  80. isa_data/adapters/vendor_collectors/providers/_managed_base.py +177 -0
  81. isa_data/adapters/vendor_collectors/providers/bright_data.py +106 -0
  82. isa_data/adapters/vendor_collectors/providers/oxylabs.py +110 -0
  83. isa_data/adapters/vendor_collectors/providers/router.py +102 -0
  84. isa_data/adapters/vendor_collectors/tests/__init__.py +0 -0
  85. isa_data/adapters/vendor_collectors/tests/conftest.py +111 -0
  86. isa_data/adapters/vendor_collectors/tests/test_amazon_ads_connector.py +322 -0
  87. isa_data/adapters/vendor_collectors/tests/test_amazon_spapi_connector.py +305 -0
  88. isa_data/adapters/vendor_collectors/tests/test_apicurio_backend.py +278 -0
  89. isa_data/adapters/vendor_collectors/tests/test_auth.py +107 -0
  90. isa_data/adapters/vendor_collectors/tests/test_base_collector.py +104 -0
  91. isa_data/adapters/vendor_collectors/tests/test_checkpoint.py +42 -0
  92. isa_data/adapters/vendor_collectors/tests/test_connectors_smoke.py +158 -0
  93. isa_data/adapters/vendor_collectors/tests/test_coupang_connector.py +303 -0
  94. isa_data/adapters/vendor_collectors/tests/test_douyin_connector.py +251 -0
  95. isa_data/adapters/vendor_collectors/tests/test_file_feed_collector.py +147 -0
  96. isa_data/adapters/vendor_collectors/tests/test_ga4_connector.py +380 -0
  97. isa_data/adapters/vendor_collectors/tests/test_google_ads_connector.py +415 -0
  98. isa_data/adapters/vendor_collectors/tests/test_homedepot_connector.py +279 -0
  99. isa_data/adapters/vendor_collectors/tests/test_http_client.py +80 -0
  100. isa_data/adapters/vendor_collectors/tests/test_jd_connector.py +469 -0
  101. isa_data/adapters/vendor_collectors/tests/test_lazada_connector.py +389 -0
  102. isa_data/adapters/vendor_collectors/tests/test_lowes_connector.py +219 -0
  103. isa_data/adapters/vendor_collectors/tests/test_managed_web_providers.py +165 -0
  104. isa_data/adapters/vendor_collectors/tests/test_manifest_loader.py +66 -0
  105. isa_data/adapters/vendor_collectors/tests/test_meta_ads_connector.py +314 -0
  106. isa_data/adapters/vendor_collectors/tests/test_overstock_connector.py +336 -0
  107. isa_data/adapters/vendor_collectors/tests/test_ozon_connector.py +248 -0
  108. isa_data/adapters/vendor_collectors/tests/test_pagination.py +54 -0
  109. isa_data/adapters/vendor_collectors/tests/test_pdd_connector.py +314 -0
  110. isa_data/adapters/vendor_collectors/tests/test_rakuten_connector.py +346 -0
  111. isa_data/adapters/vendor_collectors/tests/test_rate_limiter.py +40 -0
  112. isa_data/adapters/vendor_collectors/tests/test_schema_validator.py +41 -0
  113. isa_data/adapters/vendor_collectors/tests/test_shopee_connector.py +322 -0
  114. isa_data/adapters/vendor_collectors/tests/test_shopify_connector.py +274 -0
  115. isa_data/adapters/vendor_collectors/tests/test_smoke.py +37 -0
  116. isa_data/adapters/vendor_collectors/tests/test_temu_connector.py +404 -0
  117. isa_data/adapters/vendor_collectors/tests/test_tmall_connector.py +638 -0
  118. isa_data/adapters/vendor_collectors/tests/test_walmart_connect_connector.py +363 -0
  119. isa_data/adapters/vendor_collectors/tests/test_walmart_marketplace_connector.py +425 -0
  120. isa_data/adapters/vendor_collectors/tests/test_wildberries_connector.py +252 -0
  121. isa_data/adapters/vendor_collectors/tests/test_xiaomi_connector.py +248 -0
  122. isa_data/api/__init__.py +3 -0
  123. isa_data/api/v1/__init__.py +45 -0
  124. isa_data/api/v1/access_control.py +145 -0
  125. isa_data/api/v1/active_metadata.py +176 -0
  126. isa_data/api/v1/agent_fleet.py +736 -0
  127. isa_data/api/v1/analytics.py +133 -0
  128. isa_data/api/v1/audit.py +123 -0
  129. isa_data/api/v1/catalog.py +3713 -0
  130. isa_data/api/v1/catalog_auth.py +89 -0
  131. isa_data/api/v1/commerce_products.py +115 -0
  132. isa_data/api/v1/copilot.py +246 -0
  133. isa_data/api/v1/digital.py +430 -0
  134. isa_data/api/v1/digital_assets.py +413 -0
  135. isa_data/api/v1/enterprise_audit.py +343 -0
  136. isa_data/api/v1/etl.py +599 -0
  137. isa_data/api/v1/fabric.py +386 -0
  138. isa_data/api/v1/federation.py +435 -0
  139. isa_data/api/v1/governance.py +286 -0
  140. isa_data/api/v1/governance_lineage.py +195 -0
  141. isa_data/api/v1/governance_notifications.py +110 -0
  142. isa_data/api/v1/indicators.py +196 -0
  143. isa_data/api/v1/ingestion.py +515 -0
  144. isa_data/api/v1/lake.py +718 -0
  145. isa_data/api/v1/lineage.py +884 -0
  146. isa_data/api/v1/me.py +918 -0
  147. isa_data/api/v1/me_cost_tracker.py +363 -0
  148. isa_data/api/v1/me_metrics.py +203 -0
  149. isa_data/api/v1/me_rate_limit.py +161 -0
  150. isa_data/api/v1/me_storage.py +124 -0
  151. isa_data/api/v1/metadata.py +707 -0
  152. isa_data/api/v1/notebook.py +223 -0
  153. isa_data/api/v1/quality.py +730 -0
  154. isa_data/api/v1/scm_products.py +68 -0
  155. isa_data/api/v1/unified_products.py +180 -0
  156. isa_data/api/v1/user.py +614 -0
  157. isa_data/api/v1/vector_stores.py +537 -0
  158. isa_data/contracts/__init__.py +2 -0
  159. isa_data/contracts/com_products.py +3351 -0
  160. isa_data/contracts/data_products/__init__.py +139 -0
  161. isa_data/contracts/data_products/analysis_result_record.schema.json +276 -0
  162. isa_data/contracts/data_products/analysis_run_record.schema.json +126 -0
  163. isa_data/contracts/data_products/artifact_definition.schema.json +300 -0
  164. isa_data/contracts/data_products/artifact_surface.py +369 -0
  165. isa_data/contracts/data_products/examples/analysis_result_notebook_only.json +34 -0
  166. isa_data/contracts/data_products/examples/analysis_result_with_model_run.json +38 -0
  167. isa_data/contracts/data_products/examples/analysis_run_notebook_only.json +23 -0
  168. isa_data/contracts/data_products/examples/analysis_run_with_model_run.json +27 -0
  169. isa_data/contracts/data_products/examples/artifact_market_opportunity_model_pack.json +35 -0
  170. isa_data/contracts/data_products/examples/artifact_product_360_table.json +34 -0
  171. isa_data/contracts/data_products/examples/model_governance_ad_budget_recommendations.json +80 -0
  172. isa_data/contracts/data_products/examples/model_governance_customer_risk.json +77 -0
  173. isa_data/contracts/data_products/examples/model_governance_market_opportunity.json +31 -0
  174. isa_data/contracts/data_products/examples/model_governance_price_recommendation_agent.json +34 -0
  175. isa_data/contracts/data_products/examples/publication/commercial_tower_ad_budget_recommendations.json +146 -0
  176. isa_data/contracts/data_products/examples/publication/commercial_tower_customer_risk.json +144 -0
  177. isa_data/contracts/data_products/examples/publication/commercial_tower_kpi_snapshot.json +166 -0
  178. isa_data/contracts/data_products/examples/publication/commercial_tower_market_opportunity.json +186 -0
  179. isa_data/contracts/data_products/examples/publication/commercial_tower_price_recommendations.json +146 -0
  180. isa_data/contracts/data_products/examples/publication/commercial_tower_product_360.json +142 -0
  181. isa_data/contracts/data_products/examples/worked/content_generation_context.json +371 -0
  182. isa_data/contracts/data_products/examples/worked/product_360.json +265 -0
  183. isa_data/contracts/data_products/framework.py +1576 -0
  184. isa_data/contracts/data_products/interface_definition.schema.json +249 -0
  185. isa_data/contracts/data_products/legacy_adapters.py +569 -0
  186. isa_data/contracts/data_products/model_governance_definition.schema.json +489 -0
  187. isa_data/contracts/data_products/policy_definition.schema.json +254 -0
  188. isa_data/contracts/data_products/product_certification_report.schema.json +660 -0
  189. isa_data/contracts/data_products/product_certification_snapshot.schema.json +632 -0
  190. isa_data/contracts/data_products/product_definition.schema.json +543 -0
  191. isa_data/contracts/data_products/product_status_record.schema.json +751 -0
  192. isa_data/core/__init__.py +54 -0
  193. isa_data/core/billing.py +374 -0
  194. isa_data/core/clients/__init__.py +293 -0
  195. isa_data/core/clients/duckdb_client.py +94 -0
  196. isa_data/core/clients/minio_client.py +109 -0
  197. isa_data/core/clients/model_client.py +283 -0
  198. isa_data/core/clients/nats_client.py +190 -0
  199. isa_data/core/clients/neo4j_client.py +109 -0
  200. isa_data/core/clients/postgres_client.py +101 -0
  201. isa_data/core/clients/qdrant_client.py +107 -0
  202. isa_data/core/clients/redis_client.py +105 -0
  203. isa_data/core/config/__init__.py +86 -0
  204. isa_data/core/config/app_config.py +218 -0
  205. isa_data/core/config/consul_config.py +57 -0
  206. isa_data/core/config/data_config.py +280 -0
  207. isa_data/core/config/infra_config.py +85 -0
  208. isa_data/core/config/logging_config.py +47 -0
  209. isa_data/core/config/model_config.py +53 -0
  210. isa_data/core/config/service_config.py +62 -0
  211. isa_data/core/edition.py +57 -0
  212. isa_data/core/errors.py +161 -0
  213. isa_data/core/health.py +153 -0
  214. isa_data/core/logging.py +118 -0
  215. isa_data/core/metrics.py +262 -0
  216. isa_data/core/middleware.py +37 -0
  217. isa_data/core/pool_config.py +57 -0
  218. isa_data/core/resilience.py +166 -0
  219. isa_data/core/security.py +65 -0
  220. isa_data/db/__init__.py +19 -0
  221. isa_data/db/session.py +134 -0
  222. isa_data/main.py +424 -0
  223. isa_data/models/__init__.py +60 -0
  224. isa_data/models/base.py +34 -0
  225. isa_data/models/dto/__init__.py +93 -0
  226. isa_data/models/dto/base_models.py +88 -0
  227. isa_data/models/dto/data_models.py +184 -0
  228. isa_data/models/dto/graph_models.py +193 -0
  229. isa_data/models/dto/interfaces.py +289 -0
  230. isa_data/models/dto/pdf_models.py +139 -0
  231. isa_data/models/notebook_run.py +137 -0
  232. isa_data/models/product_spec.py +214 -0
  233. isa_data/models/project.py +218 -0
  234. isa_data/models/vector_store.py +273 -0
  235. isa_data/processors/__init__.py +5 -0
  236. isa_data/processors/chunking/__init__.py +139 -0
  237. isa_data/processors/chunking/audio/__init__.py +35 -0
  238. isa_data/processors/chunking/audio/chunker.py +750 -0
  239. isa_data/processors/chunking/audio/config.py +58 -0
  240. isa_data/processors/chunking/audio/convenience.py +232 -0
  241. isa_data/processors/chunking/audio/strategies.py +31 -0
  242. isa_data/processors/chunking/base.py +226 -0
  243. isa_data/processors/chunking/image/__init__.py +32 -0
  244. isa_data/processors/chunking/image/chunker.py +659 -0
  245. isa_data/processors/chunking/image/config.py +54 -0
  246. isa_data/processors/chunking/image/convenience.py +257 -0
  247. isa_data/processors/chunking/image/strategies.py +27 -0
  248. isa_data/processors/chunking/tests/__init__.py +5 -0
  249. isa_data/processors/chunking/tests/conftest.py +372 -0
  250. isa_data/processors/chunking/tests/test_audio_chunking.py +835 -0
  251. isa_data/processors/chunking/tests/test_image_chunking.py +453 -0
  252. isa_data/processors/chunking/tests/test_text_chunking.py +443 -0
  253. isa_data/processors/chunking/tests/test_timeseries_chunking.py +1032 -0
  254. isa_data/processors/chunking/tests/test_unified_chunker.py +657 -0
  255. isa_data/processors/chunking/tests/test_video_chunking.py +559 -0
  256. isa_data/processors/chunking/text/__init__.py +42 -0
  257. isa_data/processors/chunking/text/chunker.py +884 -0
  258. isa_data/processors/chunking/text/config.py +63 -0
  259. isa_data/processors/chunking/text/convenience.py +313 -0
  260. isa_data/processors/chunking/text/strategies.py +28 -0
  261. isa_data/processors/chunking/timeseries/__init__.py +44 -0
  262. isa_data/processors/chunking/timeseries/chunker.py +957 -0
  263. isa_data/processors/chunking/timeseries/config.py +76 -0
  264. isa_data/processors/chunking/timeseries/convenience.py +408 -0
  265. isa_data/processors/chunking/timeseries/strategies.py +34 -0
  266. isa_data/processors/chunking/unified.py +396 -0
  267. isa_data/processors/chunking/video/__init__.py +36 -0
  268. isa_data/processors/chunking/video/chunker.py +762 -0
  269. isa_data/processors/chunking/video/config.py +59 -0
  270. isa_data/processors/chunking/video/convenience.py +229 -0
  271. isa_data/processors/chunking/video/strategies.py +34 -0
  272. isa_data/processors/data_processors/__init__.py +32 -0
  273. isa_data/processors/data_processors/analytics/__init__.py +10 -0
  274. isa_data/processors/data_processors/analytics/statistics_processor.py +780 -0
  275. isa_data/processors/data_processors/base_data_processor.py +380 -0
  276. isa_data/processors/data_processors/core/__init__.py +15 -0
  277. isa_data/processors/data_processors/core/data_quality_processor.py +1430 -0
  278. isa_data/processors/data_processors/docs/csv_processor.md +0 -0
  279. isa_data/processors/data_processors/docs/metadata_extractor.md +260 -0
  280. isa_data/processors/data_processors/management/metadata/__init__.py +0 -0
  281. isa_data/processors/data_processors/management/metadata/metadata_extractor.py +942 -0
  282. isa_data/processors/data_processors/preprocessors/__init__.py +25 -0
  283. isa_data/processors/data_processors/preprocessors/cleaning/__init__.py +0 -0
  284. isa_data/processors/data_processors/preprocessors/cleaning/column_standardizer.py +363 -0
  285. isa_data/processors/data_processors/preprocessors/csv_processor.py +402 -0
  286. isa_data/processors/data_processors/preprocessors/loading/__init__.py +0 -0
  287. isa_data/processors/data_processors/preprocessors/loading/file_format_detector.py +175 -0
  288. isa_data/processors/data_processors/preprocessors/validation/__init__.py +0 -0
  289. isa_data/processors/data_processors/preprocessors/validation/data_type_analyzer.py +400 -0
  290. isa_data/processors/data_processors/tests/__init__.py +0 -0
  291. isa_data/processors/data_processors/tests/test_metadata_extractor.py +206 -0
  292. isa_data/processors/data_processors/tests/test_semantic_enricher.py +310 -0
  293. isa_data/processors/data_processors/transformation/__init__.py +37 -0
  294. isa_data/processors/data_processors/transformation/data_aggregator.py +492 -0
  295. isa_data/processors/data_processors/transformation/transform_base.py +457 -0
  296. isa_data/processors/data_processors/utilities/__init__.py +8 -0
  297. isa_data/processors/data_processors/utilities/feature_processor.py +1658 -0
  298. isa_data/processors/data_processors/visualization/__init__.py +0 -0
  299. isa_data/processors/data_processors/visualization/chart_generators/__init__.py +43 -0
  300. isa_data/processors/data_processors/visualization/chart_generators/chart_base.py +397 -0
  301. isa_data/processors/data_processors/visualization/chart_generators/static_chart_generator.py +510 -0
  302. isa_data/processors/data_processors/visualization/export_engines/__init__.py +26 -0
  303. isa_data/processors/data_processors/visualization/export_engines/export_base.py +415 -0
  304. isa_data/processors/file_processors/__init__.py +69 -0
  305. isa_data/processors/file_processors/asset_detector.py +843 -0
  306. isa_data/processors/file_processors/audio_processor.py +595 -0
  307. isa_data/processors/file_processors/docs/markdown_processor.md +184 -0
  308. isa_data/processors/file_processors/docs/pdf_processor.md +324 -0
  309. isa_data/processors/file_processors/docs/regex_extractor.md +0 -0
  310. isa_data/processors/file_processors/image_processor.py +435 -0
  311. isa_data/processors/file_processors/markdown_processor.py +253 -0
  312. isa_data/processors/file_processors/office_processor.py +929 -0
  313. isa_data/processors/file_processors/pdf_processor.py +1093 -0
  314. isa_data/processors/file_processors/regex_extractor.py +271 -0
  315. isa_data/processors/file_processors/table_processor.py +630 -0
  316. isa_data/processors/file_processors/tests/__init__.py +0 -0
  317. isa_data/processors/file_processors/tests/pdf_processor_test_results.json +4481 -0
  318. isa_data/processors/file_processors/tests/test_markdown_processor.py +388 -0
  319. isa_data/processors/file_processors/tests/test_pdf_processoer.py +257 -0
  320. isa_data/processors/file_processors/tests/test_regex_extractor.py +0 -0
  321. isa_data/processors/file_processors/text_chunking.py +1384 -0
  322. isa_data/processors/file_processors/text_processor.py +300 -0
  323. isa_data/processors/file_processors/unified_asset_processor.py +774 -0
  324. isa_data/processors/file_processors/video_processor.py +726 -0
  325. isa_data/repositories/__init__.py +15 -0
  326. isa_data/repositories/vector_store_repository.py +285 -0
  327. isa_data/routers_registry.py +178 -0
  328. isa_data/routes_registry.py +129 -0
  329. isa_data/services/__init__.py +9 -0
  330. isa_data/services/data_fabric_service/__init__.py +29 -0
  331. isa_data/services/data_fabric_service/active_metadata/__init__.py +19 -0
  332. isa_data/services/data_fabric_service/active_metadata/active_metadata_service.py +155 -0
  333. isa_data/services/data_fabric_service/active_metadata/change_detector.py +67 -0
  334. isa_data/services/data_fabric_service/active_metadata/config.py +49 -0
  335. isa_data/services/data_fabric_service/active_metadata/models.py +61 -0
  336. isa_data/services/data_fabric_service/active_metadata/schema_monitor.py +81 -0
  337. isa_data/services/data_fabric_service/active_metadata/schema_store.py +124 -0
  338. isa_data/services/data_fabric_service/catalog/__init__.py +57 -0
  339. isa_data/services/data_fabric_service/catalog/data_catalog_service.py +1357 -0
  340. isa_data/services/data_fabric_service/catalog/models.py +385 -0
  341. isa_data/services/data_fabric_service/federation/__init__.py +99 -0
  342. isa_data/services/data_fabric_service/federation/adapters/__init__.py +5 -0
  343. isa_data/services/data_fabric_service/federation/adapters/generic_jdbc_adapter.py +315 -0
  344. isa_data/services/data_fabric_service/federation/backend_registry.py +229 -0
  345. isa_data/services/data_fabric_service/federation/catalog_sync_engine.py +316 -0
  346. isa_data/services/data_fabric_service/federation/dataphin/__init__.py +27 -0
  347. isa_data/services/data_fabric_service/federation/dataphin/auth.py +122 -0
  348. isa_data/services/data_fabric_service/federation/dataphin/bi_backend.py +421 -0
  349. isa_data/services/data_fabric_service/federation/dataphin/client.py +309 -0
  350. isa_data/services/data_fabric_service/federation/dataphin/deploy/__init__.py +86 -0
  351. isa_data/services/data_fabric_service/federation/dataphin/deploy/backends.py +261 -0
  352. isa_data/services/data_fabric_service/federation/dataphin/deploy/gate.py +96 -0
  353. isa_data/services/data_fabric_service/federation/dataphin/deploy/plan.py +677 -0
  354. isa_data/services/data_fabric_service/federation/dataphin/errors.py +98 -0
  355. isa_data/services/data_fabric_service/federation/dataphin/governance_backend.py +529 -0
  356. isa_data/services/data_fabric_service/federation/dataphin/handoff/__init__.py +80 -0
  357. isa_data/services/data_fabric_service/federation/dataphin/handoff/config.py +242 -0
  358. isa_data/services/data_fabric_service/federation/dataphin/handoff/ct_assertions.py +105 -0
  359. isa_data/services/data_fabric_service/federation/dataphin/handoff/ct_projections.py +548 -0
  360. isa_data/services/data_fabric_service/federation/dataphin/handoff/engine.py +894 -0
  361. isa_data/services/data_fabric_service/federation/dataphin/indicator_backend.py +342 -0
  362. isa_data/services/data_fabric_service/federation/dataphin/master_data_backend.py +183 -0
  363. isa_data/services/data_fabric_service/federation/dataphin/provisioning/__init__.py +68 -0
  364. isa_data/services/data_fabric_service/federation/dataphin/provisioning/dialect.py +125 -0
  365. isa_data/services/data_fabric_service/federation/dataphin/provisioning/env_context.py +157 -0
  366. isa_data/services/data_fabric_service/federation/dataphin/provisioning/translator.py +397 -0
  367. isa_data/services/data_fabric_service/federation/dataphin/scaffolding/__init__.py +70 -0
  368. isa_data/services/data_fabric_service/federation/dataphin/scaffolding/config.py +82 -0
  369. isa_data/services/data_fabric_service/federation/dataphin/scaffolding/engine.py +324 -0
  370. isa_data/services/data_fabric_service/federation/dataphin/warehouse_backend.py +356 -0
  371. isa_data/services/data_fabric_service/federation/dataphin_adapter.py +490 -0
  372. isa_data/services/data_fabric_service/federation/external_schema_monitor.py +181 -0
  373. isa_data/services/data_fabric_service/federation/federated_adapter.py +84 -0
  374. isa_data/services/data_fabric_service/federation/federated_lineage_merger.py +220 -0
  375. isa_data/services/data_fabric_service/federation/iceberg/__init__.py +34 -0
  376. isa_data/services/data_fabric_service/federation/iceberg/errors.py +28 -0
  377. isa_data/services/data_fabric_service/federation/iceberg/hms_catalog.py +404 -0
  378. isa_data/services/data_fabric_service/federation/iceberg_adapter.py +352 -0
  379. isa_data/services/data_fabric_service/federation/landing_zone_service.py +228 -0
  380. isa_data/services/data_fabric_service/federation/local_indicator_backend.py +113 -0
  381. isa_data/services/data_fabric_service/federation/local_warehouse_backend.py +82 -0
  382. isa_data/services/data_fabric_service/federation/models.py +70 -0
  383. isa_data/services/data_fabric_service/federation/platform_registry.py +116 -0
  384. isa_data/services/data_fabric_service/federation/protocols.py +384 -0
  385. isa_data/services/data_fabric_service/federation/query_router.py +283 -0
  386. isa_data/services/data_fabric_service/federation/sync_models.py +88 -0
  387. isa_data/services/data_fabric_service/intelligent_query_service.py +1367 -0
  388. isa_data/services/data_fabric_service/lineage/__init__.py +160 -0
  389. isa_data/services/data_fabric_service/lineage/ai_impact_analyzer.py +420 -0
  390. isa_data/services/data_fabric_service/lineage/ai_lineage_service.py +567 -0
  391. isa_data/services/data_fabric_service/lineage/ai_sql_parser.py +437 -0
  392. isa_data/services/data_fabric_service/lineage/impact_analyzer.py +440 -0
  393. isa_data/services/data_fabric_service/lineage/lineage_explainer.py +563 -0
  394. isa_data/services/data_fabric_service/lineage/lineage_graph.py +639 -0
  395. isa_data/services/data_fabric_service/lineage/lineage_store.py +623 -0
  396. isa_data/services/data_fabric_service/lineage/models.py +251 -0
  397. isa_data/services/data_fabric_service/lineage/product_lineage.py +307 -0
  398. isa_data/services/data_fabric_service/lineage/semantic_column_mapper.py +522 -0
  399. isa_data/services/data_fabric_service/lineage/sql_parser.py +511 -0
  400. isa_data/services/data_fabric_service/metadata/__init__.py +0 -0
  401. isa_data/services/data_fabric_service/metadata/base_metadata_store.py +59 -0
  402. isa_data/services/data_fabric_service/quality/__init__.py +185 -0
  403. isa_data/services/data_fabric_service/quality/ai_anomaly_detector.py +487 -0
  404. isa_data/services/data_fabric_service/quality/ai_profiler.py +738 -0
  405. isa_data/services/data_fabric_service/quality/ai_quality_service.py +604 -0
  406. isa_data/services/data_fabric_service/quality/ai_rules_engine.py +497 -0
  407. isa_data/services/data_fabric_service/quality/knowledge_quality_evaluator.py +296 -0
  408. isa_data/services/data_fabric_service/quality/models.py +775 -0
  409. isa_data/services/data_fabric_service/quality/rules_engine.py +598 -0
  410. isa_data/services/data_fabric_service/quality/unified_quality_service.py +149 -0
  411. isa_data/services/data_fabric_service/query/__init__.py +0 -0
  412. isa_data/services/data_fabric_service/query/base_query_executor.py +99 -0
  413. isa_data/services/data_fabric_service/semantic/__init__.py +59 -0
  414. isa_data/services/data_fabric_service/semantic/dimension_mapper.py +430 -0
  415. isa_data/services/data_fabric_service/semantic/metric_resolver.py +480 -0
  416. isa_data/services/data_fabric_service/semantic/metrics_store.py +63 -0
  417. isa_data/services/data_fabric_service/semantic/models.py +327 -0
  418. isa_data/services/data_fabric_service/semantic/semantic_resolver.py +698 -0
  419. isa_data/services/data_fabric_service/semantic/term_resolver.py +468 -0
  420. isa_data/services/data_infra_service/__init__.py +22 -0
  421. isa_data/services/data_infra_service/access_control/__init__.py +26 -0
  422. isa_data/services/data_infra_service/access_control/models.py +103 -0
  423. isa_data/services/data_infra_service/access_control/rbac_service.py +237 -0
  424. isa_data/services/data_infra_service/agent_fleet/__init__.py +100 -0
  425. isa_data/services/data_infra_service/agent_fleet/detection_events_consumer.py +325 -0
  426. isa_data/services/data_infra_service/agent_fleet/entity_extractor.py +690 -0
  427. isa_data/services/data_infra_service/agent_fleet/response_vectorizer.py +727 -0
  428. isa_data/services/data_infra_service/analytics/__init__.py +24 -0
  429. isa_data/services/data_infra_service/analytics/config.py +34 -0
  430. isa_data/services/data_infra_service/analytics/data_eda.py +806 -0
  431. isa_data/services/data_infra_service/analytics/duckdb_executor.py +23 -0
  432. isa_data/services/data_infra_service/analytics/incremental_refresh.py +28 -0
  433. isa_data/services/data_infra_service/analytics/materialized_view_service.py +53 -0
  434. isa_data/services/data_infra_service/analytics/mv_builder.py +53 -0
  435. isa_data/services/data_infra_service/analytics/mv_registry_store.py +75 -0
  436. isa_data/services/data_infra_service/analytics/query_router.py +22 -0
  437. isa_data/services/data_infra_service/analytics/scheduler.py +42 -0
  438. isa_data/services/data_infra_service/audit/__init__.py +38 -0
  439. isa_data/services/data_infra_service/audit/audit_service.py +211 -0
  440. isa_data/services/data_infra_service/audit/models.py +88 -0
  441. isa_data/services/data_infra_service/augmentation/__init__.py +0 -0
  442. isa_data/services/data_infra_service/augmentation/augmentation_service.py +220 -0
  443. isa_data/services/data_infra_service/augmentation/data_enrichment.py +272 -0
  444. isa_data/services/data_infra_service/augmentation/external_integration.py +184 -0
  445. isa_data/services/data_infra_service/augmentation/merge_validation.py +311 -0
  446. isa_data/services/data_infra_service/cdc_processor.py +718 -0
  447. isa_data/services/data_infra_service/clean_data_analytics_service.py +444 -0
  448. isa_data/services/data_infra_service/data_analytics_service.py +2047 -0
  449. isa_data/services/data_infra_service/digital_assets/__init__.py +35 -0
  450. isa_data/services/data_infra_service/digital_assets/asset_landing.py +447 -0
  451. isa_data/services/data_infra_service/ingestion/README.md +579 -0
  452. isa_data/services/data_infra_service/ingestion/__init__.py +40 -0
  453. isa_data/services/data_infra_service/ingestion/batch_processor.py +555 -0
  454. isa_data/services/data_infra_service/ingestion/cdc_listener.py +343 -0
  455. isa_data/services/data_infra_service/ingestion/ingestion_service.py +703 -0
  456. isa_data/services/data_infra_service/management/metadata/__init__.py +0 -0
  457. isa_data/services/data_infra_service/management/metadata/data_explorer.py +922 -0
  458. isa_data/services/data_infra_service/management/metadata/metadata_catalog_service.py +1319 -0
  459. isa_data/services/data_infra_service/management/metadata/metadata_embedding.py +972 -0
  460. isa_data/services/data_infra_service/management/metadata/metadata_semantic_service.py +635 -0
  461. isa_data/services/data_infra_service/management/metadata/metadata_store_service.py +520 -0
  462. isa_data/services/data_infra_service/management/metadata/semantic_enricher.py +979 -0
  463. isa_data/services/data_infra_service/management/quality/__init__.py +39 -0
  464. isa_data/services/data_infra_service/management/quality/quality_assessment.py +797 -0
  465. isa_data/services/data_infra_service/management/quality/quality_improvement.py +853 -0
  466. isa_data/services/data_infra_service/management/quality/quality_management_service.py +720 -0
  467. isa_data/services/data_infra_service/management/quality/quality_monitoring.py +1077 -0
  468. isa_data/services/data_infra_service/pipeline_orchestrator.py +753 -0
  469. isa_data/services/data_infra_service/preprocessor/__init__.py +34 -0
  470. isa_data/services/data_infra_service/preprocessor/data_cleaning.py +476 -0
  471. isa_data/services/data_infra_service/preprocessor/data_loading.py +312 -0
  472. isa_data/services/data_infra_service/preprocessor/data_validation.py +447 -0
  473. isa_data/services/data_infra_service/preprocessor/dataframe_preprocessor.py +112 -0
  474. isa_data/services/data_infra_service/preprocessor/preprocessor_service.py +792 -0
  475. isa_data/services/data_infra_service/query/__init__.py +0 -0
  476. isa_data/services/data_infra_service/query/query_matcher.py +1050 -0
  477. isa_data/services/data_infra_service/query/sql_executor.py +2162 -0
  478. isa_data/services/data_infra_service/query/sql_generator.py +821 -0
  479. isa_data/services/data_infra_service/query/sql_guardrail.py +207 -0
  480. isa_data/services/data_infra_service/query/sql_query_service.py +542 -0
  481. isa_data/services/data_infra_service/residency/__init__.py +40 -0
  482. isa_data/services/data_infra_service/residency/policy.py +317 -0
  483. isa_data/services/data_infra_service/storage/__init__.py +35 -0
  484. isa_data/services/data_infra_service/storage/agent_fleet/README.md +233 -0
  485. isa_data/services/data_infra_service/storage/agent_fleet/__init__.py +58 -0
  486. isa_data/services/data_infra_service/storage/agent_fleet/agent_iceberg_writer.py +1084 -0
  487. isa_data/services/data_infra_service/storage/base_data_lake.py +75 -0
  488. isa_data/services/data_infra_service/storage/data_lake_service.py +524 -0
  489. isa_data/services/data_infra_service/storage/delta_lake_manager.py +412 -0
  490. isa_data/services/data_infra_service/storage/deprecated/__init__.py +0 -0
  491. isa_data/services/data_infra_service/storage/deprecated/data_persistence.py +494 -0
  492. isa_data/services/data_infra_service/storage/deprecated/data_storage_service.py +458 -0
  493. isa_data/services/data_infra_service/storage/deprecated/storage_catalog.py +495 -0
  494. isa_data/services/data_infra_service/storage/deprecated/storage_path_resolver.py +398 -0
  495. isa_data/services/data_infra_service/storage/deprecated/storage_target_selection.py +508 -0
  496. isa_data/services/data_infra_service/storage/iceberg_std_writer.py +402 -0
  497. isa_data/services/data_infra_service/storage/zone_management_service.py +547 -0
  498. isa_data/services/data_infra_service/streaming/__init__.py +71 -0
  499. isa_data/services/data_infra_service/streaming/change_data_processor.py +426 -0
  500. isa_data/services/data_infra_service/streaming/incremental_processor.py +562 -0
  501. isa_data/services/data_infra_service/streaming/models.py +318 -0
  502. isa_data/services/data_infra_service/streaming/streaming_ingestion.py +497 -0
  503. isa_data/services/data_infra_service/streaming/upsert_manager.py +490 -0
  504. isa_data/services/data_infra_service/transformation/__init__.py +24 -0
  505. isa_data/services/data_infra_service/transformation/business_rules.py +564 -0
  506. isa_data/services/data_infra_service/transformation/data_aggregation.py +302 -0
  507. isa_data/services/data_infra_service/transformation/feature_engineering.py +454 -0
  508. isa_data/services/data_infra_service/transformation/lang_extractor.py +1096 -0
  509. isa_data/services/data_infra_service/transformation/transformation_service.py +359 -0
  510. isa_data/services/data_infra_service/visualization/__init__.py +0 -0
  511. isa_data/services/data_infra_service/visualization/data_visualization.py +1213 -0
  512. isa_data/services/data_product_service/README.md +252 -0
  513. isa_data/services/data_product_service/__init__.py +34 -0
  514. isa_data/services/data_product_service/analytics/__init__.py +60 -0
  515. isa_data/services/data_product_service/analytics/user_churn_prediction.py +578 -0
  516. isa_data/services/data_product_service/analytics/user_intent_prediction.py +513 -0
  517. isa_data/services/data_product_service/analytics/user_ltv_prediction.py +455 -0
  518. isa_data/services/data_product_service/analytics/user_trends.py +431 -0
  519. isa_data/services/data_product_service/base/__init__.py +9 -0
  520. isa_data/services/data_product_service/base/base_product.py +271 -0
  521. isa_data/services/data_product_service/basic/__init__.py +19 -0
  522. isa_data/services/data_product_service/basic/user_360.py +446 -0
  523. isa_data/services/data_product_service/basic/user_order_history.py +271 -0
  524. isa_data/services/data_product_service/basic/user_profile.py +313 -0
  525. isa_data/services/data_product_service/behavior/__init__.py +62 -0
  526. isa_data/services/data_product_service/behavior/user_behavior_patterns.py +433 -0
  527. isa_data/services/data_product_service/behavior/user_engagement_metrics.py +549 -0
  528. isa_data/services/data_product_service/behavior/user_feature_usage.py +502 -0
  529. isa_data/services/data_product_service/behavior/user_journey_analysis.py +569 -0
  530. isa_data/services/data_product_service/catalog_seed/__init__.py +33 -0
  531. isa_data/services/data_product_service/catalog_seed/isa_data_products.py +388 -0
  532. isa_data/services/data_product_service/commerce/__init__.py +34 -0
  533. isa_data/services/data_product_service/commerce/com_product_service.py +779 -0
  534. isa_data/services/data_product_service/commerce/isa_model_client.py +218 -0
  535. isa_data/services/data_product_service/consumer_contracts.py +524 -0
  536. isa_data/services/data_product_service/context/__init__.py +12 -0
  537. isa_data/services/data_product_service/governance/__init__.py +43 -0
  538. isa_data/services/data_product_service/governance/alert_dispatcher.py +278 -0
  539. isa_data/services/data_product_service/governance/certification_runner.py +354 -0
  540. isa_data/services/data_product_service/governance/cross_modal_lineage.py +105 -0
  541. isa_data/services/data_product_service/governance/curation_gate.py +101 -0
  542. isa_data/services/data_product_service/governance/federation_monitor.py +92 -0
  543. isa_data/services/data_product_service/governance/gdpr_service.py +118 -0
  544. isa_data/services/data_product_service/governance/gold_promotion_gate.py +124 -0
  545. isa_data/services/data_product_service/governance/notification_service.py +268 -0
  546. isa_data/services/data_product_service/governance/output_port_service.py +351 -0
  547. isa_data/services/data_product_service/governance/pii_classification_service.py +137 -0
  548. isa_data/services/data_product_service/governance/pii_detector.py +202 -0
  549. isa_data/services/data_product_service/governance/policy_evaluator.py +579 -0
  550. isa_data/services/data_product_service/governance/product_status_service.py +351 -0
  551. isa_data/services/data_product_service/governance/product_versioning.py +152 -0
  552. isa_data/services/data_product_service/governance/publication_workflow.py +445 -0
  553. isa_data/services/data_product_service/governance/sla_monitor_service.py +481 -0
  554. isa_data/services/data_product_service/lineage/__init__.py +36 -0
  555. isa_data/services/data_product_service/lineage/lineage_models.py +142 -0
  556. isa_data/services/data_product_service/lineage/lineage_service.py +99 -0
  557. isa_data/services/data_product_service/recommendations/__init__.py +76 -0
  558. isa_data/services/data_product_service/recommendations/user_content_recommendations.py +696 -0
  559. isa_data/services/data_product_service/recommendations/user_graph_recommendations.py +784 -0
  560. isa_data/services/data_product_service/recommendations/user_product_recommendations.py +882 -0
  561. isa_data/services/data_product_service/recommendations/user_social_recommendations.py +791 -0
  562. isa_data/services/data_product_service/scm/__init__.py +2 -0
  563. isa_data/services/data_product_service/scm/scm_product_service.py +219 -0
  564. isa_data/services/data_product_service/scoping.py +79 -0
  565. isa_data/services/data_product_service/social/__init__.py +12 -0
  566. isa_data/services/data_product_service/unified/__init__.py +37 -0
  567. isa_data/services/data_product_service/unified/cross_type_relationships.py +116 -0
  568. isa_data/services/data_product_service/unified/knowledge_product_registry.py +140 -0
  569. isa_data/services/data_product_service/unified/knowledge_quality_evaluator.py +126 -0
  570. isa_data/services/data_product_service/unified/knowledge_quality_gate.py +159 -0
  571. isa_data/services/data_product_service/unified/knowledge_zone_manager.py +182 -0
  572. isa_data/services/digital_service/ARCHITECTURE.md +329 -0
  573. isa_data/services/digital_service/__init__.py +51 -0
  574. isa_data/services/digital_service/base/__init__.py +32 -0
  575. isa_data/services/digital_service/base/base_rag_service.py +1054 -0
  576. isa_data/services/digital_service/base/rag_exceptions.py +34 -0
  577. isa_data/services/digital_service/base/rag_models.py +298 -0
  578. isa_data/services/digital_service/config/__init__.py +4 -0
  579. isa_data/services/digital_service/config/analytics_config.py +112 -0
  580. isa_data/services/digital_service/config/eval_datasets/basic_functionality.json +39 -0
  581. isa_data/services/digital_service/config/eval_datasets/multihop_reasoning.json +17 -0
  582. isa_data/services/digital_service/config/eval_datasets/multilingual.json +17 -0
  583. isa_data/services/digital_service/docs/rag_status.md +148 -0
  584. isa_data/services/digital_service/enhanced_digital_service.py +1547 -0
  585. isa_data/services/digital_service/evaluation/__init__.py +20 -0
  586. isa_data/services/digital_service/evaluation/dataset_manager.py +251 -0
  587. isa_data/services/digital_service/evaluation/diagnostic_service.py +615 -0
  588. isa_data/services/digital_service/evaluation/evaluation_service.py +379 -0
  589. isa_data/services/digital_service/evaluation/metrics_service.py +543 -0
  590. isa_data/services/digital_service/evaluation/reporting_service.py +376 -0
  591. isa_data/services/digital_service/patterns/__init__.py +24 -0
  592. isa_data/services/digital_service/patterns/crag_rag_service.py +775 -0
  593. isa_data/services/digital_service/patterns/custom_rag_service.py +1626 -0
  594. isa_data/services/digital_service/patterns/graph_rag/__init__.py +17 -0
  595. isa_data/services/digital_service/patterns/graph_rag/attribute_extractor.py +379 -0
  596. isa_data/services/digital_service/patterns/graph_rag/core/__init__.py +40 -0
  597. isa_data/services/digital_service/patterns/graph_rag/core/config.py +302 -0
  598. isa_data/services/digital_service/patterns/graph_rag/core/dask_manager.py +266 -0
  599. isa_data/services/digital_service/patterns/graph_rag/core/strategies.py +639 -0
  600. isa_data/services/digital_service/patterns/graph_rag/core/types.py +254 -0
  601. isa_data/services/digital_service/patterns/graph_rag/docs/attribute_extractor.md +451 -0
  602. isa_data/services/digital_service/patterns/graph_rag/docs/design_guidance.md +266 -0
  603. isa_data/services/digital_service/patterns/graph_rag/docs/entity_extractor.md +470 -0
  604. isa_data/services/digital_service/patterns/graph_rag/docs/graph_constructor.md +283 -0
  605. isa_data/services/digital_service/patterns/graph_rag/docs/knowledge_retriever.md +373 -0
  606. isa_data/services/digital_service/patterns/graph_rag/docs/neo4j_client.md +415 -0
  607. isa_data/services/digital_service/patterns/graph_rag/docs/neo4j_store.md +352 -0
  608. isa_data/services/digital_service/patterns/graph_rag/docs/relation_extractor.md +292 -0
  609. isa_data/services/digital_service/patterns/graph_rag/domains/business.yaml +291 -0
  610. isa_data/services/digital_service/patterns/graph_rag/domains/codebase.yaml +303 -0
  611. isa_data/services/digital_service/patterns/graph_rag/domains/medical.yaml +262 -0
  612. isa_data/services/digital_service/patterns/graph_rag/embedding_utils.py +247 -0
  613. isa_data/services/digital_service/patterns/graph_rag/entity_extractor.py +478 -0
  614. isa_data/services/digital_service/patterns/graph_rag/graph_constructor.py +1155 -0
  615. isa_data/services/digital_service/patterns/graph_rag/knowledge_retriever.py +578 -0
  616. isa_data/services/digital_service/patterns/graph_rag/neo4j_store.py +266 -0
  617. isa_data/services/digital_service/patterns/graph_rag/relation_extractor.py +448 -0
  618. isa_data/services/digital_service/patterns/graph_rag_service.py +733 -0
  619. isa_data/services/digital_service/patterns/hyde_rag_service.py +423 -0
  620. isa_data/services/digital_service/patterns/pageindex/__init__.py +46 -0
  621. isa_data/services/digital_service/patterns/pageindex/index_generator.py +591 -0
  622. isa_data/services/digital_service/patterns/pageindex/tree_search.py +436 -0
  623. isa_data/services/digital_service/patterns/pageindex/utils.py +326 -0
  624. isa_data/services/digital_service/patterns/pageindex_rag_service.py +513 -0
  625. isa_data/services/digital_service/patterns/rag_fusion_service.py +499 -0
  626. isa_data/services/digital_service/patterns/raptor_rag_service.py +729 -0
  627. isa_data/services/digital_service/patterns/self_rag_service.py +725 -0
  628. isa_data/services/digital_service/patterns/simple_rag_service.py +562 -0
  629. isa_data/services/digital_service/pdf_extract_service.py +721 -0
  630. isa_data/services/digital_service/rag_factory.py +883 -0
  631. isa_data/services/digital_service/utils/__init__.py +63 -0
  632. isa_data/services/digital_service/utils/chunking_adapter.py +407 -0
  633. isa_data/services/digital_service/utils/convenience_functions.py +125 -0
  634. isa_data/services/digital_service/utils/deep_search/__init__.py +10 -0
  635. isa_data/services/digital_service/utils/deep_search/models.py +224 -0
  636. isa_data/services/digital_service/utils/deep_search/result_fusion.py +313 -0
  637. isa_data/services/digital_service/utils/policy_agent.py +402 -0
  638. isa_data/services/digital_service/utils/retrieval_funnel.py +264 -0
  639. isa_data/services/digital_service/utils/retrieval_supervisor.py +331 -0
  640. isa_data/services/digital_service/utils/search_adapter.py +543 -0
  641. isa_data/services/feature_store_service/__init__.py +43 -0
  642. isa_data/services/model_import_service.py +566 -0
  643. isa_data/services/notebook_service/__init__.py +34 -0
  644. isa_data/services/notebook_service/executor.py +466 -0
  645. isa_data/services/notebook_service/metrics_extractor.py +206 -0
  646. isa_data/services/notebook_service/models.py +160 -0
  647. isa_data/services/notebook_service/storage.py +257 -0
  648. isa_data/services/product_spec/__init__.py +48 -0
  649. isa_data/services/product_spec/deploy_contract/__init__.py +86 -0
  650. isa_data/services/product_spec/deploy_contract/loader.py +299 -0
  651. isa_data/services/product_spec/deploy_contract/namespace.py +127 -0
  652. isa_data/services/product_spec/deploy_contract/protocol.py +577 -0
  653. isa_data/services/product_spec/seed_data.py +123 -0
  654. isa_data/services/product_spec/seeder.py +105 -0
  655. isa_data/services/product_spec/spec_source.py +319 -0
  656. isa_data/services/vector_service/__init__.py +226 -0
  657. isa_data/services/vector_service/backends/__init__.py +28 -0
  658. isa_data/services/vector_service/backends/factory.py +181 -0
  659. isa_data/services/vector_service/backends/milvus_backend.py +266 -0
  660. isa_data/services/vector_service/backends/qdrant_backend.py +470 -0
  661. isa_data/services/vector_service/base/__init__.py +52 -0
  662. isa_data/services/vector_service/base/exceptions.py +63 -0
  663. isa_data/services/vector_service/base/models.py +130 -0
  664. isa_data/services/vector_service/base/vector_db.py +457 -0
  665. isa_data/services/vector_service/chunking/__init__.py +67 -0
  666. isa_data/services/vector_service/chunking/advanced_chunkers.py +828 -0
  667. isa_data/services/vector_service/chunking/chunking_service.py +1154 -0
  668. isa_data/services/vector_service/chunking/models.py +136 -0
  669. isa_data/services/vector_service/search/__init__.py +29 -0
  670. isa_data/services/vector_service/search/hybrid_search.py +433 -0
  671. isa_data/services/vector_service/search/mmr_reranker.py +426 -0
  672. isa_data/services/vector_service/updates/__init__.py +36 -0
  673. isa_data/services/vector_service/updates/incremental_service.py +624 -0
  674. isa_data/services/vector_store/__init__.py +69 -0
  675. isa_data/services/vector_store/integrations.py +330 -0
  676. isa_data/services/vector_store/manager.py +344 -0
  677. isa_data/services/vector_store/pipeline.py +255 -0
  678. isa_data/services/vector_store/providers.py +173 -0
  679. isa_data/services/vector_store/schemas.py +105 -0
  680. isa_data-1.0.0.dist-info/METADATA +172 -0
  681. isa_data-1.0.0.dist-info/RECORD +683 -0
  682. isa_data-1.0.0.dist-info/WHEEL +5 -0
  683. isa_data-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,404 @@
1
+ """
2
+ HMS / Iceberg catalog client — facade over PyIceberg's ``HiveCatalog``.
3
+
4
+ When the isA_Data lakehouse is backed by Hive Metastore + Iceberg, snapshot
5
+ and manifest provenance may previously have been reachable only through an
6
+ external governance platform. This client gives isA_Data a direct read path:
7
+ list/describe Iceberg tables AND walk the snapshot history straight from the
8
+ metastore.
9
+
10
+ Why PyIceberg's ``HiveCatalog`` (and not ``pyhive``/``hmsclient``)
11
+ -----------------------------------------------------------------
12
+ A raw Thrift HMS client (``pyhive`` / ``hmsclient``) only returns the *Hive*
13
+ view of a table — storage descriptor, columns, the ``metadata_location``
14
+ table property. It would still leave us to fetch and parse the Iceberg
15
+ metadata JSON, manifest lists and manifest files by hand. PyIceberg's
16
+ ``HiveCatalog`` resolves the metastore entry AND loads the Iceberg table
17
+ object, exposing ``.snapshots()``, ``.history()`` and per-snapshot manifest
18
+ metadata in one client. One dependency, both acceptance criteria covered.
19
+
20
+ Configuration (env vars, resolved by :class:`HmsCatalogConfig.from_env`)
21
+ ------------------------------------------------------------------------
22
+ HMS_THRIFT_URI — Hive Metastore Thrift URI, e.g. ``thrift://hms:9083``
23
+ HMS_CATALOG_NAME — logical catalog name (default: ``hms``)
24
+ HMS_WAREHOUSE — optional warehouse path (e.g. ``s3a://lakehouse/wh``)
25
+ HMS_S3_ENDPOINT — optional S3 endpoint for reading manifest files
26
+ HMS_S3_ACCESS_KEY — optional S3 access key
27
+ HMS_S3_SECRET_KEY — optional S3 secret key
28
+
29
+ No hosts or credentials are hardcoded — an unconfigured environment yields a
30
+ config whose :attr:`HmsCatalogConfig.configured` is ``False``, and the client
31
+ degrades gracefully instead of crashing.
32
+
33
+ Graceful degradation
34
+ --------------------
35
+ ``pyiceberg`` is an optional dependency. It is imported lazily inside
36
+ :meth:`HmsCatalogClient.connect` — importing this module never fails, so the
37
+ FastAPI app boots even where ``pyiceberg[hive]`` is not installed. Operations
38
+ that genuinely need a live catalog raise a typed
39
+ :class:`IcebergCatalogError`; nothing crashes the process.
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ import os
45
+ from dataclasses import dataclass, field
46
+ from typing import Any, Dict, List, Optional
47
+
48
+ from isa_data.core.logging import get_logger
49
+
50
+ from .errors import (
51
+ IcebergConnectionError,
52
+ IcebergDependencyMissingError,
53
+ IcebergTableNotFoundError,
54
+ )
55
+
56
+ logger = get_logger(__name__)
57
+
58
+ DEFAULT_CATALOG_NAME = "hms"
59
+
60
+
61
+ @dataclass
62
+ class HmsCatalogConfig:
63
+ """Connection config for a Hive Metastore-backed Iceberg catalog.
64
+
65
+ Resolved from the environment — never carries hardcoded hosts/secrets.
66
+ """
67
+
68
+ thrift_uri: str = ""
69
+ catalog_name: str = DEFAULT_CATALOG_NAME
70
+ warehouse: str = ""
71
+ s3_endpoint: str = ""
72
+ s3_access_key: str = ""
73
+ s3_secret_key: str = ""
74
+
75
+ @property
76
+ def configured(self) -> bool:
77
+ """True only when a Thrift URI is present — the minimum to connect."""
78
+ return bool(self.thrift_uri)
79
+
80
+ @classmethod
81
+ def from_env(cls) -> "HmsCatalogConfig":
82
+ """Load configuration from environment variables."""
83
+ return cls(
84
+ thrift_uri=os.getenv("HMS_THRIFT_URI", ""),
85
+ catalog_name=os.getenv("HMS_CATALOG_NAME", DEFAULT_CATALOG_NAME),
86
+ warehouse=os.getenv("HMS_WAREHOUSE", ""),
87
+ s3_endpoint=os.getenv("HMS_S3_ENDPOINT", ""),
88
+ s3_access_key=os.getenv("HMS_S3_ACCESS_KEY", ""),
89
+ s3_secret_key=os.getenv("HMS_S3_SECRET_KEY", ""),
90
+ )
91
+
92
+ def to_pyiceberg_properties(self) -> Dict[str, str]:
93
+ """Render PyIceberg ``load_catalog`` properties from this config."""
94
+ props: Dict[str, str] = {
95
+ "type": "hive",
96
+ "uri": self.thrift_uri,
97
+ }
98
+ if self.warehouse:
99
+ props["warehouse"] = self.warehouse
100
+ if self.s3_endpoint:
101
+ props["s3.endpoint"] = self.s3_endpoint
102
+ if self.s3_access_key:
103
+ props["s3.access-key-id"] = self.s3_access_key
104
+ if self.s3_secret_key:
105
+ props["s3.secret-access-key"] = self.s3_secret_key
106
+ return props
107
+
108
+
109
+ @dataclass
110
+ class IcebergSnapshotInfo:
111
+ """A single Iceberg snapshot plus its manifest provenance.
112
+
113
+ Captures everything needed to express snapshot/manifest provenance as
114
+ lineage: the snapshot id, its parent, the operation that produced it,
115
+ and the manifest list / manifest files it references.
116
+ """
117
+
118
+ snapshot_id: int
119
+ parent_snapshot_id: Optional[int]
120
+ timestamp_ms: int
121
+ operation: str = ""
122
+ manifest_list: str = ""
123
+ summary: Dict[str, Any] = field(default_factory=dict)
124
+ manifests: List[str] = field(default_factory=list)
125
+ schema_id: Optional[int] = None
126
+
127
+ def to_dict(self) -> Dict[str, Any]:
128
+ return {
129
+ "snapshot_id": self.snapshot_id,
130
+ "parent_snapshot_id": self.parent_snapshot_id,
131
+ "timestamp_ms": self.timestamp_ms,
132
+ "operation": self.operation,
133
+ "manifest_list": self.manifest_list,
134
+ "summary": self.summary,
135
+ "manifests": self.manifests,
136
+ "schema_id": self.schema_id,
137
+ }
138
+
139
+
140
+ @dataclass
141
+ class IcebergTableMetadata:
142
+ """Describe-table result for an Iceberg table read via HMS."""
143
+
144
+ namespace: str
145
+ table_name: str
146
+ location: str = ""
147
+ metadata_location: str = ""
148
+ current_snapshot_id: Optional[int] = None
149
+ columns: List[Dict[str, Any]] = field(default_factory=list)
150
+ partition_spec: List[str] = field(default_factory=list)
151
+ properties: Dict[str, Any] = field(default_factory=dict)
152
+ snapshots: List[IcebergSnapshotInfo] = field(default_factory=list)
153
+
154
+ @property
155
+ def identifier(self) -> str:
156
+ """Fully-qualified ``namespace.table`` identifier."""
157
+ return f"{self.namespace}.{self.table_name}"
158
+
159
+ def to_dict(self) -> Dict[str, Any]:
160
+ return {
161
+ "namespace": self.namespace,
162
+ "table_name": self.table_name,
163
+ "identifier": self.identifier,
164
+ "location": self.location,
165
+ "metadata_location": self.metadata_location,
166
+ "current_snapshot_id": self.current_snapshot_id,
167
+ "columns": self.columns,
168
+ "partition_spec": self.partition_spec,
169
+ "properties": self.properties,
170
+ "snapshots": [s.to_dict() for s in self.snapshots],
171
+ }
172
+
173
+
174
+ class HmsCatalogClient:
175
+ """Read-only client over a Hive Metastore-backed Iceberg catalog.
176
+
177
+ Lifecycle::
178
+
179
+ client = HmsCatalogClient.from_env()
180
+ client.connect() # lazy-imports pyiceberg
181
+ namespaces = client.list_namespaces()
182
+ tables = client.list_tables("analytics")
183
+ meta = client.describe_table("analytics", "orders")
184
+
185
+ Every catalog operation raises a typed :class:`IcebergCatalogError`
186
+ subclass on failure; the constructor and module import never raise.
187
+ """
188
+
189
+ def __init__(self, config: Optional[HmsCatalogConfig] = None) -> None:
190
+ self._config = config or HmsCatalogConfig()
191
+ self._catalog: Any = None
192
+
193
+ @classmethod
194
+ def from_env(cls) -> "HmsCatalogClient":
195
+ """Create a client from environment variables."""
196
+ return cls(HmsCatalogConfig.from_env())
197
+
198
+ @property
199
+ def config(self) -> HmsCatalogConfig:
200
+ return self._config
201
+
202
+ @property
203
+ def connected(self) -> bool:
204
+ return self._catalog is not None
205
+
206
+ # ------------------------------------------------------------------
207
+ # Connection
208
+ # ------------------------------------------------------------------
209
+
210
+ def connect(self) -> None:
211
+ """Load the PyIceberg ``HiveCatalog``.
212
+
213
+ Raises:
214
+ IcebergDependencyMissingError: ``pyiceberg`` is not installed.
215
+ IcebergConnectionError: no Thrift URI configured, or the
216
+ catalog could not be loaded.
217
+ """
218
+ if self._catalog is not None:
219
+ return
220
+
221
+ if not self._config.configured:
222
+ raise IcebergConnectionError(
223
+ "HMS not configured — set HMS_THRIFT_URI "
224
+ "(e.g. thrift://metastore-host:9083)"
225
+ )
226
+
227
+ # Lazy import — keeps `pyiceberg` optional. Importing this module
228
+ # must never fail just because the extra is not installed.
229
+ try:
230
+ from pyiceberg.catalog import load_catalog
231
+ except ImportError as exc:
232
+ raise IcebergDependencyMissingError(
233
+ "pyiceberg is not installed — install the HMS extra with "
234
+ "`pip install 'pyiceberg[hive]'` to read Iceberg metadata "
235
+ "via Hive Metastore"
236
+ ) from exc
237
+
238
+ try:
239
+ self._catalog = load_catalog(
240
+ self._config.catalog_name,
241
+ **self._config.to_pyiceberg_properties(),
242
+ )
243
+ except Exception as exc: # pragma: no cover - network-specific
244
+ raise IcebergConnectionError(
245
+ f"Failed to load Hive Metastore catalog "
246
+ f"'{self._config.catalog_name}' at "
247
+ f"'{self._config.thrift_uri}': {exc}"
248
+ ) from exc
249
+
250
+ logger.info(
251
+ "hms_catalog_connected",
252
+ catalog=self._config.catalog_name,
253
+ thrift_uri=self._config.thrift_uri,
254
+ )
255
+
256
+ def _require_catalog(self) -> Any:
257
+ """Return the loaded catalog, connecting on first use."""
258
+ if self._catalog is None:
259
+ self.connect()
260
+ return self._catalog
261
+
262
+ # ------------------------------------------------------------------
263
+ # Table listing / describe
264
+ # ------------------------------------------------------------------
265
+
266
+ def list_namespaces(self) -> List[str]:
267
+ """List all namespaces (Hive databases) in the catalog."""
268
+ catalog = self._require_catalog()
269
+ try:
270
+ raw = catalog.list_namespaces()
271
+ except Exception as exc: # pragma: no cover - network-specific
272
+ raise IcebergConnectionError(f"Failed to list namespaces: {exc}") from exc
273
+ return [".".join(ns) for ns in raw]
274
+
275
+ def list_tables(self, namespace: str) -> List[str]:
276
+ """List fully-qualified Iceberg table identifiers in a namespace."""
277
+ catalog = self._require_catalog()
278
+ try:
279
+ raw = catalog.list_tables(namespace)
280
+ except Exception as exc: # pragma: no cover - network-specific
281
+ raise IcebergConnectionError(
282
+ f"Failed to list tables in namespace '{namespace}': {exc}"
283
+ ) from exc
284
+ return [".".join(ident) for ident in raw]
285
+
286
+ def describe_table(self, namespace: str, table_name: str) -> IcebergTableMetadata:
287
+ """Load full metadata for one Iceberg table, including snapshots.
288
+
289
+ Raises:
290
+ IcebergTableNotFoundError: the table does not exist.
291
+ IcebergConnectionError: the metastore could not be reached.
292
+ """
293
+ catalog = self._require_catalog()
294
+ identifier = f"{namespace}.{table_name}"
295
+ try:
296
+ table = catalog.load_table(identifier)
297
+ except Exception as exc:
298
+ # PyIceberg raises NoSuchTableError; match by name to avoid a
299
+ # hard dependency on the exception class across versions.
300
+ if "NoSuchTable" in type(exc).__name__ or "not exist" in str(exc):
301
+ raise IcebergTableNotFoundError(
302
+ f"Iceberg table '{identifier}' not found in HMS"
303
+ ) from exc
304
+ raise IcebergConnectionError(
305
+ f"Failed to load table '{identifier}': {exc}"
306
+ ) from exc
307
+
308
+ return self._table_to_metadata(namespace, table_name, table)
309
+
310
+ # ------------------------------------------------------------------
311
+ # Snapshot / manifest provenance
312
+ # ------------------------------------------------------------------
313
+
314
+ def get_snapshots(
315
+ self, namespace: str, table_name: str
316
+ ) -> List[IcebergSnapshotInfo]:
317
+ """Return the full snapshot history for a table.
318
+
319
+ Each snapshot carries its parent id and manifest provenance — the
320
+ raw material for snapshot lineage edges.
321
+ """
322
+ return self.describe_table(namespace, table_name).snapshots
323
+
324
+ # ------------------------------------------------------------------
325
+ # PyIceberg object -> dataclass mapping
326
+ # ------------------------------------------------------------------
327
+
328
+ @staticmethod
329
+ def _table_to_metadata(
330
+ namespace: str, table_name: str, table: Any
331
+ ) -> IcebergTableMetadata:
332
+ """Map a PyIceberg ``Table`` to :class:`IcebergTableMetadata`."""
333
+ meta = table.metadata
334
+
335
+ columns: List[Dict[str, Any]] = []
336
+ try:
337
+ for fld in table.schema().fields:
338
+ columns.append(
339
+ {
340
+ "name": fld.name,
341
+ "type": str(fld.field_type),
342
+ "required": bool(fld.required),
343
+ "field_id": fld.field_id,
344
+ }
345
+ )
346
+ except Exception: # pragma: no cover - schema-shape defensive
347
+ columns = []
348
+
349
+ partition_spec: List[str] = []
350
+ try:
351
+ for pf in table.spec().fields:
352
+ partition_spec.append(pf.name)
353
+ except Exception: # pragma: no cover - spec-shape defensive
354
+ partition_spec = []
355
+
356
+ current_snapshot_id = getattr(meta, "current_snapshot_id", None)
357
+
358
+ snapshots = [
359
+ HmsCatalogClient._snapshot_to_info(snap)
360
+ for snap in getattr(meta, "snapshots", []) or []
361
+ ]
362
+
363
+ return IcebergTableMetadata(
364
+ namespace=namespace,
365
+ table_name=table_name,
366
+ location=getattr(meta, "location", "") or "",
367
+ metadata_location=getattr(table, "metadata_location", "") or "",
368
+ current_snapshot_id=current_snapshot_id,
369
+ columns=columns,
370
+ partition_spec=partition_spec,
371
+ properties=dict(getattr(meta, "properties", {}) or {}),
372
+ snapshots=snapshots,
373
+ )
374
+
375
+ @staticmethod
376
+ def _snapshot_to_info(snap: Any) -> IcebergSnapshotInfo:
377
+ """Map a PyIceberg ``Snapshot`` to :class:`IcebergSnapshotInfo`."""
378
+ summary_obj = getattr(snap, "summary", None)
379
+ operation = ""
380
+ summary: Dict[str, Any] = {}
381
+ if summary_obj is not None:
382
+ # PyIceberg Summary exposes `.operation` and behaves like a dict.
383
+ operation = str(getattr(summary_obj, "operation", "") or "")
384
+ try:
385
+ summary = dict(summary_obj)
386
+ except (TypeError, ValueError):
387
+ summary = {}
388
+
389
+ manifests: List[str] = []
390
+ # `manifests(io)` needs a FileIO to read the manifest list. When an
391
+ # IO is wired we could enumerate manifest file paths; we keep the
392
+ # manifest-list pointer unconditionally so provenance is never lost.
393
+ manifest_list = getattr(snap, "manifest_list", "") or ""
394
+
395
+ return IcebergSnapshotInfo(
396
+ snapshot_id=getattr(snap, "snapshot_id", 0),
397
+ parent_snapshot_id=getattr(snap, "parent_snapshot_id", None),
398
+ timestamp_ms=getattr(snap, "timestamp_ms", 0) or 0,
399
+ operation=operation,
400
+ manifest_list=manifest_list,
401
+ summary=summary,
402
+ manifests=manifests,
403
+ schema_id=getattr(snap, "schema_id", None),
404
+ )