pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. api/main.py +27 -1
  2. api/models/docs.py +68 -0
  3. api/models/evolution.py +117 -0
  4. api/models/tracking.py +111 -0
  5. api/models/validation.py +46 -6
  6. api/routes/v1/__init__.py +14 -1
  7. api/routes/v1/docs.py +187 -0
  8. api/routes/v1/evolution.py +337 -0
  9. api/routes/v1/templates.py +211 -27
  10. api/routes/v1/tracking.py +301 -0
  11. api/routes/v1/validation.py +68 -31
  12. pycharter/__init__.py +268 -58
  13. pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
  14. pycharter/data/templates/contract/template_contract.yaml +122 -0
  15. pycharter/data/templates/contract/template_metadata.yaml +68 -0
  16. pycharter/data/templates/contract/template_schema.yaml +100 -0
  17. pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
  18. pycharter/data/templates/etl/README.md +224 -0
  19. pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
  20. pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
  21. pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
  22. pycharter/data/templates/etl/extract_database.yaml +34 -0
  23. pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
  24. pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
  25. pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
  26. pycharter/data/templates/etl/extract_file_json.yaml +24 -0
  27. pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
  28. pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
  29. pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
  30. pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
  31. pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
  32. pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
  33. pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
  34. pycharter/data/templates/etl/load_file.yaml +34 -0
  35. pycharter/data/templates/etl/load_insert.yaml +18 -0
  36. pycharter/data/templates/etl/load_postgresql.yaml +39 -0
  37. pycharter/data/templates/etl/load_sqlite.yaml +21 -0
  38. pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
  39. pycharter/data/templates/etl/load_upsert.yaml +25 -0
  40. pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
  41. pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
  42. pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
  43. pycharter/data/templates/etl/transform_combined.yaml +48 -0
  44. pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
  45. pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
  46. pycharter/data/templates/etl/transform_simple.yaml +59 -0
  47. pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
  48. pycharter/docs_generator/__init__.py +43 -0
  49. pycharter/docs_generator/generator.py +465 -0
  50. pycharter/docs_generator/renderers.py +247 -0
  51. pycharter/etl_generator/__init__.py +168 -80
  52. pycharter/etl_generator/builder.py +121 -0
  53. pycharter/etl_generator/config_loader.py +394 -0
  54. pycharter/etl_generator/config_validator.py +418 -0
  55. pycharter/etl_generator/context.py +132 -0
  56. pycharter/etl_generator/expression.py +499 -0
  57. pycharter/etl_generator/extractors/__init__.py +30 -0
  58. pycharter/etl_generator/extractors/base.py +70 -0
  59. pycharter/etl_generator/extractors/cloud_storage.py +530 -0
  60. pycharter/etl_generator/extractors/database.py +221 -0
  61. pycharter/etl_generator/extractors/factory.py +185 -0
  62. pycharter/etl_generator/extractors/file.py +475 -0
  63. pycharter/etl_generator/extractors/http.py +895 -0
  64. pycharter/etl_generator/extractors/streaming.py +57 -0
  65. pycharter/etl_generator/loaders/__init__.py +41 -0
  66. pycharter/etl_generator/loaders/base.py +35 -0
  67. pycharter/etl_generator/loaders/cloud.py +87 -0
  68. pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
  69. pycharter/etl_generator/loaders/database.py +274 -0
  70. pycharter/etl_generator/loaders/factory.py +180 -0
  71. pycharter/etl_generator/loaders/file.py +72 -0
  72. pycharter/etl_generator/loaders/file_loader.py +130 -0
  73. pycharter/etl_generator/pipeline.py +743 -0
  74. pycharter/etl_generator/protocols.py +54 -0
  75. pycharter/etl_generator/result.py +63 -0
  76. pycharter/etl_generator/schemas/__init__.py +49 -0
  77. pycharter/etl_generator/transformers/__init__.py +49 -0
  78. pycharter/etl_generator/transformers/base.py +63 -0
  79. pycharter/etl_generator/transformers/config.py +45 -0
  80. pycharter/etl_generator/transformers/custom_function.py +101 -0
  81. pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
  82. pycharter/etl_generator/transformers/operations.py +218 -0
  83. pycharter/etl_generator/transformers/pipeline.py +54 -0
  84. pycharter/etl_generator/transformers/simple_operations.py +131 -0
  85. pycharter/quality/__init__.py +25 -0
  86. pycharter/quality/tracking/__init__.py +64 -0
  87. pycharter/quality/tracking/collector.py +318 -0
  88. pycharter/quality/tracking/exporters.py +238 -0
  89. pycharter/quality/tracking/models.py +194 -0
  90. pycharter/quality/tracking/store.py +385 -0
  91. pycharter/runtime_validator/__init__.py +20 -7
  92. pycharter/runtime_validator/builder.py +328 -0
  93. pycharter/runtime_validator/validator.py +311 -7
  94. pycharter/runtime_validator/validator_core.py +61 -0
  95. pycharter/schema_evolution/__init__.py +61 -0
  96. pycharter/schema_evolution/compatibility.py +270 -0
  97. pycharter/schema_evolution/diff.py +496 -0
  98. pycharter/schema_evolution/models.py +201 -0
  99. pycharter/shared/__init__.py +56 -0
  100. pycharter/shared/errors.py +296 -0
  101. pycharter/shared/protocols.py +234 -0
  102. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
  103. pycharter-0.0.24.dist-info/RECORD +543 -0
  104. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
  105. ui/static/404/index.html +1 -1
  106. ui/static/404.html +1 -1
  107. ui/static/__next.__PAGE__.txt +1 -1
  108. ui/static/__next._full.txt +1 -1
  109. ui/static/__next._head.txt +1 -1
  110. ui/static/__next._index.txt +1 -1
  111. ui/static/__next._tree.txt +1 -1
  112. ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
  113. ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
  114. ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
  115. ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
  116. ui/static/_not-found/__next._full.txt +1 -1
  117. ui/static/_not-found/__next._head.txt +1 -1
  118. ui/static/_not-found/__next._index.txt +1 -1
  119. ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  120. ui/static/_not-found/__next._not-found.txt +1 -1
  121. ui/static/_not-found/__next._tree.txt +1 -1
  122. ui/static/_not-found/index.html +1 -1
  123. ui/static/_not-found/index.txt +1 -1
  124. ui/static/contracts/__next._full.txt +2 -2
  125. ui/static/contracts/__next._head.txt +1 -1
  126. ui/static/contracts/__next._index.txt +1 -1
  127. ui/static/contracts/__next._tree.txt +1 -1
  128. ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  129. ui/static/contracts/__next.contracts.txt +1 -1
  130. ui/static/contracts/index.html +1 -1
  131. ui/static/contracts/index.txt +2 -2
  132. ui/static/documentation/__next._full.txt +1 -1
  133. ui/static/documentation/__next._head.txt +1 -1
  134. ui/static/documentation/__next._index.txt +1 -1
  135. ui/static/documentation/__next._tree.txt +1 -1
  136. ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
  137. ui/static/documentation/__next.documentation.txt +1 -1
  138. ui/static/documentation/index.html +2 -2
  139. ui/static/documentation/index.txt +1 -1
  140. ui/static/index.html +1 -1
  141. ui/static/index.txt +1 -1
  142. ui/static/metadata/__next._full.txt +1 -1
  143. ui/static/metadata/__next._head.txt +1 -1
  144. ui/static/metadata/__next._index.txt +1 -1
  145. ui/static/metadata/__next._tree.txt +1 -1
  146. ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  147. ui/static/metadata/__next.metadata.txt +1 -1
  148. ui/static/metadata/index.html +1 -1
  149. ui/static/metadata/index.txt +1 -1
  150. ui/static/quality/__next._full.txt +2 -2
  151. ui/static/quality/__next._head.txt +1 -1
  152. ui/static/quality/__next._index.txt +1 -1
  153. ui/static/quality/__next._tree.txt +1 -1
  154. ui/static/quality/__next.quality.__PAGE__.txt +2 -2
  155. ui/static/quality/__next.quality.txt +1 -1
  156. ui/static/quality/index.html +2 -2
  157. ui/static/quality/index.txt +2 -2
  158. ui/static/rules/__next._full.txt +1 -1
  159. ui/static/rules/__next._head.txt +1 -1
  160. ui/static/rules/__next._index.txt +1 -1
  161. ui/static/rules/__next._tree.txt +1 -1
  162. ui/static/rules/__next.rules.__PAGE__.txt +1 -1
  163. ui/static/rules/__next.rules.txt +1 -1
  164. ui/static/rules/index.html +1 -1
  165. ui/static/rules/index.txt +1 -1
  166. ui/static/schemas/__next._full.txt +1 -1
  167. ui/static/schemas/__next._head.txt +1 -1
  168. ui/static/schemas/__next._index.txt +1 -1
  169. ui/static/schemas/__next._tree.txt +1 -1
  170. ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  171. ui/static/schemas/__next.schemas.txt +1 -1
  172. ui/static/schemas/index.html +1 -1
  173. ui/static/schemas/index.txt +1 -1
  174. ui/static/settings/__next._full.txt +1 -1
  175. ui/static/settings/__next._head.txt +1 -1
  176. ui/static/settings/__next._index.txt +1 -1
  177. ui/static/settings/__next._tree.txt +1 -1
  178. ui/static/settings/__next.settings.__PAGE__.txt +1 -1
  179. ui/static/settings/__next.settings.txt +1 -1
  180. ui/static/settings/index.html +1 -1
  181. ui/static/settings/index.txt +1 -1
  182. ui/static/static/404/index.html +1 -1
  183. ui/static/static/404.html +1 -1
  184. ui/static/static/__next.__PAGE__.txt +1 -1
  185. ui/static/static/__next._full.txt +2 -2
  186. ui/static/static/__next._head.txt +1 -1
  187. ui/static/static/__next._index.txt +2 -2
  188. ui/static/static/__next._tree.txt +2 -2
  189. ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
  190. ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
  191. ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
  192. ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
  193. ui/static/static/_not-found/__next._full.txt +2 -2
  194. ui/static/static/_not-found/__next._head.txt +1 -1
  195. ui/static/static/_not-found/__next._index.txt +2 -2
  196. ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  197. ui/static/static/_not-found/__next._not-found.txt +1 -1
  198. ui/static/static/_not-found/__next._tree.txt +2 -2
  199. ui/static/static/_not-found/index.html +1 -1
  200. ui/static/static/_not-found/index.txt +2 -2
  201. ui/static/static/contracts/__next._full.txt +3 -3
  202. ui/static/static/contracts/__next._head.txt +1 -1
  203. ui/static/static/contracts/__next._index.txt +2 -2
  204. ui/static/static/contracts/__next._tree.txt +2 -2
  205. ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  206. ui/static/static/contracts/__next.contracts.txt +1 -1
  207. ui/static/static/contracts/index.html +1 -1
  208. ui/static/static/contracts/index.txt +3 -3
  209. ui/static/static/documentation/__next._full.txt +3 -3
  210. ui/static/static/documentation/__next._head.txt +1 -1
  211. ui/static/static/documentation/__next._index.txt +2 -2
  212. ui/static/static/documentation/__next._tree.txt +2 -2
  213. ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
  214. ui/static/static/documentation/__next.documentation.txt +1 -1
  215. ui/static/static/documentation/index.html +2 -2
  216. ui/static/static/documentation/index.txt +3 -3
  217. ui/static/static/index.html +1 -1
  218. ui/static/static/index.txt +2 -2
  219. ui/static/static/metadata/__next._full.txt +2 -2
  220. ui/static/static/metadata/__next._head.txt +1 -1
  221. ui/static/static/metadata/__next._index.txt +2 -2
  222. ui/static/static/metadata/__next._tree.txt +2 -2
  223. ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  224. ui/static/static/metadata/__next.metadata.txt +1 -1
  225. ui/static/static/metadata/index.html +1 -1
  226. ui/static/static/metadata/index.txt +2 -2
  227. ui/static/static/quality/__next._full.txt +2 -2
  228. ui/static/static/quality/__next._head.txt +1 -1
  229. ui/static/static/quality/__next._index.txt +2 -2
  230. ui/static/static/quality/__next._tree.txt +2 -2
  231. ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
  232. ui/static/static/quality/__next.quality.txt +1 -1
  233. ui/static/static/quality/index.html +2 -2
  234. ui/static/static/quality/index.txt +2 -2
  235. ui/static/static/rules/__next._full.txt +2 -2
  236. ui/static/static/rules/__next._head.txt +1 -1
  237. ui/static/static/rules/__next._index.txt +2 -2
  238. ui/static/static/rules/__next._tree.txt +2 -2
  239. ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
  240. ui/static/static/rules/__next.rules.txt +1 -1
  241. ui/static/static/rules/index.html +1 -1
  242. ui/static/static/rules/index.txt +2 -2
  243. ui/static/static/schemas/__next._full.txt +2 -2
  244. ui/static/static/schemas/__next._head.txt +1 -1
  245. ui/static/static/schemas/__next._index.txt +2 -2
  246. ui/static/static/schemas/__next._tree.txt +2 -2
  247. ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  248. ui/static/static/schemas/__next.schemas.txt +1 -1
  249. ui/static/static/schemas/index.html +1 -1
  250. ui/static/static/schemas/index.txt +2 -2
  251. ui/static/static/settings/__next._full.txt +2 -2
  252. ui/static/static/settings/__next._head.txt +1 -1
  253. ui/static/static/settings/__next._index.txt +2 -2
  254. ui/static/static/settings/__next._tree.txt +2 -2
  255. ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
  256. ui/static/static/settings/__next.settings.txt +1 -1
  257. ui/static/static/settings/index.html +1 -1
  258. ui/static/static/settings/index.txt +2 -2
  259. ui/static/static/static/.gitkeep +0 -0
  260. ui/static/static/static/404/index.html +1 -0
  261. ui/static/static/static/404.html +1 -0
  262. ui/static/static/static/__next.__PAGE__.txt +10 -0
  263. ui/static/static/static/__next._full.txt +30 -0
  264. ui/static/static/static/__next._head.txt +7 -0
  265. ui/static/static/static/__next._index.txt +9 -0
  266. ui/static/static/static/__next._tree.txt +2 -0
  267. ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
  268. ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
  269. ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
  270. ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
  271. ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
  272. ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
  273. ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
  274. ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
  275. ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
  276. ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
  277. ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
  278. ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
  279. ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
  280. ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
  281. ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
  282. ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
  283. ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
  284. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
  285. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
  286. ui/static/static/static/_not-found/__next._full.txt +17 -0
  287. ui/static/static/static/_not-found/__next._head.txt +7 -0
  288. ui/static/static/static/_not-found/__next._index.txt +9 -0
  289. ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
  290. ui/static/static/static/_not-found/__next._not-found.txt +4 -0
  291. ui/static/static/static/_not-found/__next._tree.txt +2 -0
  292. ui/static/static/static/_not-found/index.html +1 -0
  293. ui/static/static/static/_not-found/index.txt +17 -0
  294. ui/static/static/static/contracts/__next._full.txt +21 -0
  295. ui/static/static/static/contracts/__next._head.txt +7 -0
  296. ui/static/static/static/contracts/__next._index.txt +9 -0
  297. ui/static/static/static/contracts/__next._tree.txt +2 -0
  298. ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
  299. ui/static/static/static/contracts/__next.contracts.txt +4 -0
  300. ui/static/static/static/contracts/index.html +1 -0
  301. ui/static/static/static/contracts/index.txt +21 -0
  302. ui/static/static/static/documentation/__next._full.txt +21 -0
  303. ui/static/static/static/documentation/__next._head.txt +7 -0
  304. ui/static/static/static/documentation/__next._index.txt +9 -0
  305. ui/static/static/static/documentation/__next._tree.txt +2 -0
  306. ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
  307. ui/static/static/static/documentation/__next.documentation.txt +4 -0
  308. ui/static/static/static/documentation/index.html +93 -0
  309. ui/static/static/static/documentation/index.txt +21 -0
  310. ui/static/static/static/index.html +1 -0
  311. ui/static/static/static/index.txt +30 -0
  312. ui/static/static/static/metadata/__next._full.txt +21 -0
  313. ui/static/static/static/metadata/__next._head.txt +7 -0
  314. ui/static/static/static/metadata/__next._index.txt +9 -0
  315. ui/static/static/static/metadata/__next._tree.txt +2 -0
  316. ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
  317. ui/static/static/static/metadata/__next.metadata.txt +4 -0
  318. ui/static/static/static/metadata/index.html +1 -0
  319. ui/static/static/static/metadata/index.txt +21 -0
  320. ui/static/static/static/quality/__next._full.txt +21 -0
  321. ui/static/static/static/quality/__next._head.txt +7 -0
  322. ui/static/static/static/quality/__next._index.txt +9 -0
  323. ui/static/static/static/quality/__next._tree.txt +2 -0
  324. ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
  325. ui/static/static/static/quality/__next.quality.txt +4 -0
  326. ui/static/static/static/quality/index.html +2 -0
  327. ui/static/static/static/quality/index.txt +21 -0
  328. ui/static/static/static/rules/__next._full.txt +21 -0
  329. ui/static/static/static/rules/__next._head.txt +7 -0
  330. ui/static/static/static/rules/__next._index.txt +9 -0
  331. ui/static/static/static/rules/__next._tree.txt +2 -0
  332. ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
  333. ui/static/static/static/rules/__next.rules.txt +4 -0
  334. ui/static/static/static/rules/index.html +1 -0
  335. ui/static/static/static/rules/index.txt +21 -0
  336. ui/static/static/static/schemas/__next._full.txt +21 -0
  337. ui/static/static/static/schemas/__next._head.txt +7 -0
  338. ui/static/static/static/schemas/__next._index.txt +9 -0
  339. ui/static/static/static/schemas/__next._tree.txt +2 -0
  340. ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
  341. ui/static/static/static/schemas/__next.schemas.txt +4 -0
  342. ui/static/static/static/schemas/index.html +1 -0
  343. ui/static/static/static/schemas/index.txt +21 -0
  344. ui/static/static/static/settings/__next._full.txt +21 -0
  345. ui/static/static/static/settings/__next._head.txt +7 -0
  346. ui/static/static/static/settings/__next._index.txt +9 -0
  347. ui/static/static/static/settings/__next._tree.txt +2 -0
  348. ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
  349. ui/static/static/static/settings/__next.settings.txt +4 -0
  350. ui/static/static/static/settings/index.html +1 -0
  351. ui/static/static/static/settings/index.txt +21 -0
  352. ui/static/static/static/validation/__next._full.txt +21 -0
  353. ui/static/static/static/validation/__next._head.txt +7 -0
  354. ui/static/static/static/validation/__next._index.txt +9 -0
  355. ui/static/static/static/validation/__next._tree.txt +2 -0
  356. ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
  357. ui/static/static/static/validation/__next.validation.txt +4 -0
  358. ui/static/static/static/validation/index.html +1 -0
  359. ui/static/static/static/validation/index.txt +21 -0
  360. ui/static/static/validation/__next._full.txt +2 -2
  361. ui/static/static/validation/__next._head.txt +1 -1
  362. ui/static/static/validation/__next._index.txt +2 -2
  363. ui/static/static/validation/__next._tree.txt +2 -2
  364. ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
  365. ui/static/static/validation/__next.validation.txt +1 -1
  366. ui/static/static/validation/index.html +1 -1
  367. ui/static/static/validation/index.txt +2 -2
  368. ui/static/validation/__next._full.txt +2 -2
  369. ui/static/validation/__next._head.txt +1 -1
  370. ui/static/validation/__next._index.txt +1 -1
  371. ui/static/validation/__next._tree.txt +1 -1
  372. ui/static/validation/__next.validation.__PAGE__.txt +2 -2
  373. ui/static/validation/__next.validation.txt +1 -1
  374. ui/static/validation/index.html +1 -1
  375. ui/static/validation/index.txt +2 -2
  376. pycharter/data/templates/template_coercion_rules.yaml +0 -15
  377. pycharter/data/templates/template_contract.yaml +0 -587
  378. pycharter/data/templates/template_metadata.yaml +0 -38
  379. pycharter/data/templates/template_schema.yaml +0 -22
  380. pycharter/data/templates/template_transform_advanced.yaml +0 -50
  381. pycharter/data/templates/template_transform_simple.yaml +0 -59
  382. pycharter/data/templates/template_validation_rules.yaml +0 -29
  383. pycharter/etl_generator/extraction.py +0 -916
  384. pycharter/etl_generator/factory.py +0 -174
  385. pycharter/etl_generator/orchestrator.py +0 -1650
  386. pycharter/integrations/__init__.py +0 -19
  387. pycharter/integrations/kafka.py +0 -178
  388. pycharter/integrations/streaming.py +0 -100
  389. pycharter-0.0.22.dist-info/RECORD +0 -358
  390. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
  391. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
  392. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
  393. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
  394. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
  395. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
  396. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
  397. /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
  398. /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
  399. /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
  400. /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
  401. /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
  402. /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
  403. /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
  404. /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
@@ -0,0 +1,530 @@
1
+ """
2
+ Cloud storage extractor for ETL orchestrator.
3
+
4
+ Supports extracting data from cloud storage:
5
+ - AWS S3
6
+ - Google Cloud Storage (GCS)
7
+ - Azure Blob Storage
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ import tempfile
13
+ from pathlib import Path
14
+ from typing import Any, AsyncIterator, Dict, List, Optional
15
+
16
+ from pycharter.etl_generator.extractors.base import BaseExtractor
17
+ from pycharter.etl_generator.extractors.file import FileExtractor
18
+ from pycharter.utils.value_injector import resolve_values
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Try to import cloud storage libraries
23
+ try:
24
+ import boto3
25
+ from botocore.exceptions import ClientError
26
+ S3_AVAILABLE = True
27
+ except ImportError:
28
+ S3_AVAILABLE = False
29
+ boto3 = None
30
+ ClientError = None
31
+
32
+ try:
33
+ from google.cloud import storage as gcs_storage
34
+ GCS_AVAILABLE = True
35
+ except ImportError:
36
+ GCS_AVAILABLE = False
37
+ gcs_storage = None
38
+
39
+ try:
40
+ from azure.storage.blob import BlobServiceClient
41
+ AZURE_AVAILABLE = True
42
+ except ImportError:
43
+ AZURE_AVAILABLE = False
44
+ BlobServiceClient = None
45
+
46
+
47
+ class CloudStorageExtractor(BaseExtractor):
48
+ """
49
+ Extractor for cloud storage data sources.
50
+
51
+ Supports two modes:
52
+ 1. Programmatic API:
53
+ >>> extractor = CloudStorageExtractor(provider="s3", bucket="my-bucket", path="data/")
54
+ >>> async for batch in extractor.extract():
55
+ ... process(batch)
56
+
57
+ 2. Config-driven:
58
+ >>> extractor = CloudStorageExtractor()
59
+ >>> async for batch in extractor.extract_streaming(config, params, headers):
60
+ ... process(batch)
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ provider: Optional[str] = None,
66
+ bucket: Optional[str] = None,
67
+ path: Optional[str] = None,
68
+ credentials: Optional[Dict[str, Any]] = None,
69
+ file_format: Optional[str] = None,
70
+ batch_size: int = 1000,
71
+ max_records: Optional[int] = None,
72
+ ):
73
+ self.provider = provider
74
+ self.bucket = bucket
75
+ self.path = path
76
+ self.credentials = credentials
77
+ self.file_format = file_format
78
+ self.batch_size = batch_size
79
+ self.max_records = max_records
80
+
81
+ @classmethod
82
+ def from_config(cls, config: Dict[str, Any]) -> "CloudStorageExtractor":
83
+ """Create extractor from configuration dict."""
84
+ storage_config = config.get("storage", {})
85
+ return cls(
86
+ provider=storage_config.get("provider") or config.get("provider"),
87
+ bucket=storage_config.get("bucket") or config.get("bucket"),
88
+ path=storage_config.get("path") or config.get("path"),
89
+ credentials=storage_config.get("credentials") or config.get("credentials"),
90
+ file_format=config.get("format"),
91
+ batch_size=config.get("batch_size", 1000),
92
+ max_records=config.get("max_records"),
93
+ )
94
+
95
+ async def extract(self, **params) -> AsyncIterator[List[Dict[str, Any]]]:
96
+ """
97
+ Extract data from cloud storage.
98
+
99
+ Yields:
100
+ Batches of records
101
+ """
102
+ if not self.provider:
103
+ raise ValueError("Provider is required (s3, gcs, azure)")
104
+ if not self.bucket:
105
+ raise ValueError("Bucket is required")
106
+ if not self.path:
107
+ raise ValueError("Path is required")
108
+
109
+ extract_config = {
110
+ "storage": {
111
+ "provider": self.provider,
112
+ "bucket": self.bucket,
113
+ "path": self.path,
114
+ "credentials": self.credentials,
115
+ },
116
+ "format": self.file_format,
117
+ }
118
+
119
+ async for batch in self.extract_streaming(
120
+ extract_config, {}, {},
121
+ batch_size=self.batch_size,
122
+ max_records=self.max_records,
123
+ ):
124
+ yield batch
125
+
126
+ def validate_config(self, extract_config: Dict[str, Any]) -> None:
127
+ """Validate cloud storage extractor configuration."""
128
+ if 'source_type' in extract_config and extract_config['source_type'] != 'cloud_storage':
129
+ raise ValueError(
130
+ f"CloudStorageExtractor requires source_type='cloud_storage', "
131
+ f"got '{extract_config.get('source_type')}'"
132
+ )
133
+
134
+ storage_config = extract_config.get('storage', {})
135
+ provider = storage_config.get('provider', '').lower()
136
+
137
+ if provider not in ['s3', 'gcs', 'azure']:
138
+ raise ValueError(
139
+ f"Cloud storage provider must be 's3', 'gcs', or 'azure', got '{provider}'"
140
+ )
141
+
142
+ if not storage_config.get('bucket'):
143
+ raise ValueError("Cloud storage extractor requires 'storage.bucket' in extract_config")
144
+
145
+ if not storage_config.get('path'):
146
+ raise ValueError("Cloud storage extractor requires 'storage.path' in extract_config")
147
+
148
+ async def extract_streaming(
149
+ self,
150
+ extract_config: Dict[str, Any],
151
+ params: Dict[str, Any],
152
+ headers: Dict[str, Any],
153
+ contract_dir: Optional[Any] = None,
154
+ batch_size: int = 1000,
155
+ max_records: Optional[int] = None,
156
+ config_context: Optional[Dict[str, Any]] = None,
157
+ ) -> AsyncIterator[List[Dict[str, Any]]]:
158
+ """
159
+ Extract data from cloud storage.
160
+
161
+ Downloads files from cloud storage and processes them using FileExtractor.
162
+ Supports single files and prefixes (for multiple files).
163
+ """
164
+ storage_config = extract_config.get('storage', {})
165
+ provider = storage_config.get('provider', '').lower()
166
+
167
+ # Resolve variables
168
+ source_file = str(contract_dir / "extract.yaml") if contract_dir else None
169
+ bucket = resolve_values(storage_config.get('bucket'), context=config_context, source_file=source_file)
170
+ path = resolve_values(storage_config.get('path'), context=config_context, source_file=source_file)
171
+ credentials = storage_config.get('credentials')
172
+
173
+ # Detect format
174
+ file_format = extract_config.get('format')
175
+ if not file_format:
176
+ # Try to detect from path
177
+ path_obj = Path(path)
178
+ file_format = self._detect_format_from_path(path_obj)
179
+
180
+ logger.info(f"Extracting from {provider.upper()}: {bucket}/{path}")
181
+
182
+ # Download and process files
183
+ if provider == 's3':
184
+ async for batch in self._extract_from_s3(
185
+ bucket, path, credentials, file_format, batch_size, max_records, config_context, source_file
186
+ ):
187
+ yield batch
188
+ elif provider == 'gcs':
189
+ async for batch in self._extract_from_gcs(
190
+ bucket, path, credentials, file_format, batch_size, max_records, config_context, source_file
191
+ ):
192
+ yield batch
193
+ elif provider == 'azure':
194
+ async for batch in self._extract_from_azure(
195
+ bucket, path, credentials, file_format, batch_size, max_records, config_context, source_file
196
+ ):
197
+ yield batch
198
+ else:
199
+ raise ValueError(f"Unsupported cloud storage provider: {provider}")
200
+
201
+ async def _extract_from_s3(
202
+ self,
203
+ bucket: str,
204
+ path: str,
205
+ credentials: Optional[Dict[str, Any]],
206
+ file_format: Optional[str],
207
+ batch_size: int,
208
+ max_records: Optional[int],
209
+ config_context: Optional[Dict[str, Any]],
210
+ source_file: Optional[str],
211
+ ) -> AsyncIterator[List[Dict[str, Any]]]:
212
+ """Extract data from AWS S3."""
213
+ if not S3_AVAILABLE:
214
+ raise ImportError(
215
+ "boto3 is required for S3 extraction. "
216
+ "Install with: pip install boto3 or pip install pycharter[etl]"
217
+ )
218
+
219
+ # Initialize S3 client
220
+ s3_client = boto3.client('s3')
221
+
222
+ # Handle credentials if provided
223
+ if credentials:
224
+ if isinstance(credentials, dict):
225
+ aws_access_key_id = credentials.get('aws_access_key_id')
226
+ aws_secret_access_key = credentials.get('aws_secret_access_key')
227
+ region = credentials.get('region', 'us-east-1')
228
+
229
+ if aws_access_key_id and aws_secret_access_key:
230
+ s3_client = boto3.client(
231
+ 's3',
232
+ aws_access_key_id=aws_access_key_id,
233
+ aws_secret_access_key=aws_secret_access_key,
234
+ region_name=region,
235
+ )
236
+
237
+ # Check if path is a prefix (ends with / or contains *)
238
+ if path.endswith('/') or '*' in path:
239
+ # List objects with prefix
240
+ prefix = path.rstrip('/')
241
+ if '*' in prefix:
242
+ # Convert glob pattern to prefix
243
+ prefix = prefix.split('*')[0]
244
+
245
+ paginator = s3_client.get_paginator('list_objects_v2')
246
+ pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
247
+
248
+ total_extracted = 0
249
+ for page in pages:
250
+ if 'Contents' not in page:
251
+ continue
252
+
253
+ for obj in page['Contents']:
254
+ if max_records and total_extracted >= max_records:
255
+ break
256
+
257
+ key = obj['Key']
258
+ logger.info(f"Processing S3 object: {bucket}/{key}")
259
+
260
+ # Download file to temp location
261
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(key).suffix) as tmp_file:
262
+ try:
263
+ s3_client.download_fileobj(bucket, key, tmp_file)
264
+ tmp_path = Path(tmp_file.name)
265
+
266
+ # Use FileExtractor to process the file
267
+ file_extractor = FileExtractor()
268
+ file_config = {
269
+ 'source_type': 'file',
270
+ 'file_path': str(tmp_path),
271
+ 'format': file_format,
272
+ }
273
+
274
+ async for batch in file_extractor.extract_streaming(
275
+ file_config, {}, {}, None, batch_size, max_records, config_context
276
+ ):
277
+ total_extracted += len(batch)
278
+ yield batch
279
+ if max_records and total_extracted >= max_records:
280
+ break
281
+ finally:
282
+ # Cleanup temp file
283
+ if tmp_path.exists():
284
+ tmp_path.unlink()
285
+ else:
286
+ # Single file
287
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(path).suffix) as tmp_file:
288
+ try:
289
+ s3_client.download_fileobj(bucket, path, tmp_file)
290
+ tmp_path = Path(tmp_file.name)
291
+
292
+ # Use FileExtractor to process the file
293
+ file_extractor = FileExtractor()
294
+ file_config = {
295
+ 'source_type': 'file',
296
+ 'file_path': str(tmp_path),
297
+ 'format': file_format,
298
+ }
299
+
300
+ async for batch in file_extractor.extract_streaming(
301
+ file_config, {}, {}, None, batch_size, max_records, config_context
302
+ ):
303
+ yield batch
304
+ finally:
305
+ if tmp_path.exists():
306
+ tmp_path.unlink()
307
+
308
+ async def _extract_from_gcs(
309
+ self,
310
+ bucket: str,
311
+ path: str,
312
+ credentials: Optional[Dict[str, Any]],
313
+ file_format: Optional[str],
314
+ batch_size: int,
315
+ max_records: Optional[int],
316
+ config_context: Optional[Dict[str, Any]],
317
+ source_file: Optional[str],
318
+ ) -> AsyncIterator[List[Dict[str, Any]]]:
319
+ """Extract data from Google Cloud Storage."""
320
+ if not GCS_AVAILABLE:
321
+ raise ImportError(
322
+ "google-cloud-storage is required for GCS extraction. "
323
+ "Install with: pip install google-cloud-storage"
324
+ )
325
+
326
+ # Initialize GCS client
327
+ if credentials:
328
+ # Use provided credentials (path to JSON key file or dict)
329
+ if isinstance(credentials, str):
330
+ client = gcs_storage.Client.from_service_account_json(credentials)
331
+ elif isinstance(credentials, dict):
332
+ # Create temporary JSON file
333
+ import json
334
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
335
+ json.dump(credentials, tmp)
336
+ tmp_path = tmp.name
337
+ try:
338
+ client = gcs_storage.Client.from_service_account_json(tmp_path)
339
+ finally:
340
+ Path(tmp_path).unlink()
341
+ else:
342
+ client = gcs_storage.Client()
343
+ else:
344
+ client = gcs_storage.Client()
345
+
346
+ bucket_obj = client.bucket(bucket)
347
+
348
+ # Check if path is a prefix
349
+ if path.endswith('/') or '*' in path:
350
+ prefix = path.rstrip('/')
351
+ if '*' in prefix:
352
+ prefix = prefix.split('*')[0]
353
+
354
+ blobs = bucket_obj.list_blobs(prefix=prefix)
355
+
356
+ total_extracted = 0
357
+ for blob in blobs:
358
+ if max_records and total_extracted >= max_records:
359
+ break
360
+
361
+ logger.info(f"Processing GCS blob: {bucket}/{blob.name}")
362
+
363
+ # Download to temp file
364
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(blob.name).suffix) as tmp_file:
365
+ try:
366
+ blob.download_to_filename(tmp_file.name)
367
+ tmp_path = Path(tmp_file.name)
368
+
369
+ # Use FileExtractor
370
+ file_extractor = FileExtractor()
371
+ file_config = {
372
+ 'source_type': 'file',
373
+ 'file_path': str(tmp_path),
374
+ 'format': file_format,
375
+ }
376
+
377
+ async for batch in file_extractor.extract_streaming(
378
+ file_config, {}, {}, None, batch_size, max_records, config_context
379
+ ):
380
+ total_extracted += len(batch)
381
+ yield batch
382
+ if max_records and total_extracted >= max_records:
383
+ break
384
+ finally:
385
+ if tmp_path.exists():
386
+ tmp_path.unlink()
387
+ else:
388
+ # Single file
389
+ blob = bucket_obj.blob(path)
390
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(path).suffix) as tmp_file:
391
+ try:
392
+ blob.download_to_filename(tmp_file.name)
393
+ tmp_path = Path(tmp_file.name)
394
+
395
+ # Use FileExtractor
396
+ file_extractor = FileExtractor()
397
+ file_config = {
398
+ 'source_type': 'file',
399
+ 'file_path': str(tmp_path),
400
+ 'format': file_format,
401
+ }
402
+
403
+ async for batch in file_extractor.extract_streaming(
404
+ file_config, {}, {}, None, batch_size, max_records, config_context
405
+ ):
406
+ yield batch
407
+ finally:
408
+ if tmp_path.exists():
409
+ tmp_path.unlink()
410
+
411
+ async def _extract_from_azure(
412
+ self,
413
+ container: str,
414
+ path: str,
415
+ credentials: Optional[Dict[str, Any]],
416
+ file_format: Optional[str],
417
+ batch_size: int,
418
+ max_records: Optional[int],
419
+ config_context: Optional[Dict[str, Any]],
420
+ source_file: Optional[str],
421
+ ) -> AsyncIterator[List[Dict[str, Any]]]:
422
+ """Extract data from Azure Blob Storage."""
423
+ if not AZURE_AVAILABLE:
424
+ raise ImportError(
425
+ "azure-storage-blob is required for Azure extraction. "
426
+ "Install with: pip install azure-storage-blob"
427
+ )
428
+
429
+ # Initialize Azure client
430
+ if credentials:
431
+ connection_string = credentials.get('connection_string')
432
+ account_name = credentials.get('account_name')
433
+ account_key = credentials.get('account_key')
434
+
435
+ if connection_string:
436
+ blob_service_client = BlobServiceClient.from_connection_string(connection_string)
437
+ elif account_name and account_key:
438
+ account_url = f"https://{account_name}.blob.core.windows.net"
439
+ blob_service_client = BlobServiceClient(account_url, credential=account_key)
440
+ else:
441
+ raise ValueError("Azure credentials must include 'connection_string' or ('account_name', 'account_key')")
442
+ else:
443
+ # Use default credentials (environment variables)
444
+ blob_service_client = BlobServiceClient.from_connection_string(
445
+ os.environ.get('AZURE_STORAGE_CONNECTION_STRING', '')
446
+ )
447
+
448
+ container_client = blob_service_client.get_container_client(container)
449
+
450
+ # Check if path is a prefix
451
+ if path.endswith('/') or '*' in path:
452
+ prefix = path.rstrip('/')
453
+ if '*' in prefix:
454
+ prefix = prefix.split('*')[0]
455
+
456
+ blobs = container_client.list_blobs(name_starts_with=prefix)
457
+
458
+ total_extracted = 0
459
+ for blob in blobs:
460
+ if max_records and total_extracted >= max_records:
461
+ break
462
+
463
+ logger.info(f"Processing Azure blob: {container}/{blob.name}")
464
+
465
+ # Download to temp file
466
+ blob_client = container_client.get_blob_client(blob.name)
467
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(blob.name).suffix) as tmp_file:
468
+ try:
469
+ blob_data = blob_client.download_blob()
470
+ blob_data.download_to_stream(tmp_file)
471
+ tmp_path = Path(tmp_file.name)
472
+
473
+ # Use FileExtractor
474
+ file_extractor = FileExtractor()
475
+ file_config = {
476
+ 'source_type': 'file',
477
+ 'file_path': str(tmp_path),
478
+ 'format': file_format,
479
+ }
480
+
481
+ async for batch in file_extractor.extract_streaming(
482
+ file_config, {}, {}, None, batch_size, max_records, config_context
483
+ ):
484
+ total_extracted += len(batch)
485
+ yield batch
486
+ if max_records and total_extracted >= max_records:
487
+ break
488
+ finally:
489
+ if tmp_path.exists():
490
+ tmp_path.unlink()
491
+ else:
492
+ # Single file
493
+ blob_client = container_client.get_blob_client(path)
494
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(path).suffix) as tmp_file:
495
+ try:
496
+ blob_data = blob_client.download_blob()
497
+ blob_data.download_to_stream(tmp_file)
498
+ tmp_path = Path(tmp_file.name)
499
+
500
+ # Use FileExtractor
501
+ file_extractor = FileExtractor()
502
+ file_config = {
503
+ 'source_type': 'file',
504
+ 'file_path': str(tmp_path),
505
+ 'format': file_format,
506
+ }
507
+
508
+ async for batch in file_extractor.extract_streaming(
509
+ file_config, {}, {}, None, batch_size, max_records, config_context
510
+ ):
511
+ yield batch
512
+ finally:
513
+ if tmp_path.exists():
514
+ tmp_path.unlink()
515
+
516
+ def _detect_format_from_path(self, path: Path) -> Optional[str]:
517
+ """Detect file format from path extension."""
518
+ suffix = path.suffix.lower()
519
+ format_map = {
520
+ '.csv': 'csv',
521
+ '.tsv': 'tsv',
522
+ '.json': 'json',
523
+ '.jsonl': 'jsonl',
524
+ '.ndjson': 'jsonl',
525
+ '.parquet': 'parquet',
526
+ '.xlsx': 'excel',
527
+ '.xls': 'excel',
528
+ '.xml': 'xml',
529
+ }
530
+ return format_map.get(suffix)