pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. api/main.py +27 -1
  2. api/models/docs.py +68 -0
  3. api/models/evolution.py +117 -0
  4. api/models/tracking.py +111 -0
  5. api/models/validation.py +46 -6
  6. api/routes/v1/__init__.py +14 -1
  7. api/routes/v1/docs.py +187 -0
  8. api/routes/v1/evolution.py +337 -0
  9. api/routes/v1/templates.py +211 -27
  10. api/routes/v1/tracking.py +301 -0
  11. api/routes/v1/validation.py +68 -31
  12. pycharter/__init__.py +268 -58
  13. pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
  14. pycharter/data/templates/contract/template_contract.yaml +122 -0
  15. pycharter/data/templates/contract/template_metadata.yaml +68 -0
  16. pycharter/data/templates/contract/template_schema.yaml +100 -0
  17. pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
  18. pycharter/data/templates/etl/README.md +224 -0
  19. pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
  20. pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
  21. pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
  22. pycharter/data/templates/etl/extract_database.yaml +34 -0
  23. pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
  24. pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
  25. pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
  26. pycharter/data/templates/etl/extract_file_json.yaml +24 -0
  27. pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
  28. pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
  29. pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
  30. pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
  31. pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
  32. pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
  33. pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
  34. pycharter/data/templates/etl/load_file.yaml +34 -0
  35. pycharter/data/templates/etl/load_insert.yaml +18 -0
  36. pycharter/data/templates/etl/load_postgresql.yaml +39 -0
  37. pycharter/data/templates/etl/load_sqlite.yaml +21 -0
  38. pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
  39. pycharter/data/templates/etl/load_upsert.yaml +25 -0
  40. pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
  41. pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
  42. pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
  43. pycharter/data/templates/etl/transform_combined.yaml +48 -0
  44. pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
  45. pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
  46. pycharter/data/templates/etl/transform_simple.yaml +59 -0
  47. pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
  48. pycharter/docs_generator/__init__.py +43 -0
  49. pycharter/docs_generator/generator.py +465 -0
  50. pycharter/docs_generator/renderers.py +247 -0
  51. pycharter/etl_generator/__init__.py +168 -80
  52. pycharter/etl_generator/builder.py +121 -0
  53. pycharter/etl_generator/config_loader.py +394 -0
  54. pycharter/etl_generator/config_validator.py +418 -0
  55. pycharter/etl_generator/context.py +132 -0
  56. pycharter/etl_generator/expression.py +499 -0
  57. pycharter/etl_generator/extractors/__init__.py +30 -0
  58. pycharter/etl_generator/extractors/base.py +70 -0
  59. pycharter/etl_generator/extractors/cloud_storage.py +530 -0
  60. pycharter/etl_generator/extractors/database.py +221 -0
  61. pycharter/etl_generator/extractors/factory.py +185 -0
  62. pycharter/etl_generator/extractors/file.py +475 -0
  63. pycharter/etl_generator/extractors/http.py +895 -0
  64. pycharter/etl_generator/extractors/streaming.py +57 -0
  65. pycharter/etl_generator/loaders/__init__.py +41 -0
  66. pycharter/etl_generator/loaders/base.py +35 -0
  67. pycharter/etl_generator/loaders/cloud.py +87 -0
  68. pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
  69. pycharter/etl_generator/loaders/database.py +274 -0
  70. pycharter/etl_generator/loaders/factory.py +180 -0
  71. pycharter/etl_generator/loaders/file.py +72 -0
  72. pycharter/etl_generator/loaders/file_loader.py +130 -0
  73. pycharter/etl_generator/pipeline.py +743 -0
  74. pycharter/etl_generator/protocols.py +54 -0
  75. pycharter/etl_generator/result.py +63 -0
  76. pycharter/etl_generator/schemas/__init__.py +49 -0
  77. pycharter/etl_generator/transformers/__init__.py +49 -0
  78. pycharter/etl_generator/transformers/base.py +63 -0
  79. pycharter/etl_generator/transformers/config.py +45 -0
  80. pycharter/etl_generator/transformers/custom_function.py +101 -0
  81. pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
  82. pycharter/etl_generator/transformers/operations.py +218 -0
  83. pycharter/etl_generator/transformers/pipeline.py +54 -0
  84. pycharter/etl_generator/transformers/simple_operations.py +131 -0
  85. pycharter/quality/__init__.py +25 -0
  86. pycharter/quality/tracking/__init__.py +64 -0
  87. pycharter/quality/tracking/collector.py +318 -0
  88. pycharter/quality/tracking/exporters.py +238 -0
  89. pycharter/quality/tracking/models.py +194 -0
  90. pycharter/quality/tracking/store.py +385 -0
  91. pycharter/runtime_validator/__init__.py +20 -7
  92. pycharter/runtime_validator/builder.py +328 -0
  93. pycharter/runtime_validator/validator.py +311 -7
  94. pycharter/runtime_validator/validator_core.py +61 -0
  95. pycharter/schema_evolution/__init__.py +61 -0
  96. pycharter/schema_evolution/compatibility.py +270 -0
  97. pycharter/schema_evolution/diff.py +496 -0
  98. pycharter/schema_evolution/models.py +201 -0
  99. pycharter/shared/__init__.py +56 -0
  100. pycharter/shared/errors.py +296 -0
  101. pycharter/shared/protocols.py +234 -0
  102. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
  103. pycharter-0.0.24.dist-info/RECORD +543 -0
  104. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
  105. ui/static/404/index.html +1 -1
  106. ui/static/404.html +1 -1
  107. ui/static/__next.__PAGE__.txt +1 -1
  108. ui/static/__next._full.txt +1 -1
  109. ui/static/__next._head.txt +1 -1
  110. ui/static/__next._index.txt +1 -1
  111. ui/static/__next._tree.txt +1 -1
  112. ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
  113. ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
  114. ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
  115. ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
  116. ui/static/_not-found/__next._full.txt +1 -1
  117. ui/static/_not-found/__next._head.txt +1 -1
  118. ui/static/_not-found/__next._index.txt +1 -1
  119. ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  120. ui/static/_not-found/__next._not-found.txt +1 -1
  121. ui/static/_not-found/__next._tree.txt +1 -1
  122. ui/static/_not-found/index.html +1 -1
  123. ui/static/_not-found/index.txt +1 -1
  124. ui/static/contracts/__next._full.txt +2 -2
  125. ui/static/contracts/__next._head.txt +1 -1
  126. ui/static/contracts/__next._index.txt +1 -1
  127. ui/static/contracts/__next._tree.txt +1 -1
  128. ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  129. ui/static/contracts/__next.contracts.txt +1 -1
  130. ui/static/contracts/index.html +1 -1
  131. ui/static/contracts/index.txt +2 -2
  132. ui/static/documentation/__next._full.txt +1 -1
  133. ui/static/documentation/__next._head.txt +1 -1
  134. ui/static/documentation/__next._index.txt +1 -1
  135. ui/static/documentation/__next._tree.txt +1 -1
  136. ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
  137. ui/static/documentation/__next.documentation.txt +1 -1
  138. ui/static/documentation/index.html +2 -2
  139. ui/static/documentation/index.txt +1 -1
  140. ui/static/index.html +1 -1
  141. ui/static/index.txt +1 -1
  142. ui/static/metadata/__next._full.txt +1 -1
  143. ui/static/metadata/__next._head.txt +1 -1
  144. ui/static/metadata/__next._index.txt +1 -1
  145. ui/static/metadata/__next._tree.txt +1 -1
  146. ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  147. ui/static/metadata/__next.metadata.txt +1 -1
  148. ui/static/metadata/index.html +1 -1
  149. ui/static/metadata/index.txt +1 -1
  150. ui/static/quality/__next._full.txt +2 -2
  151. ui/static/quality/__next._head.txt +1 -1
  152. ui/static/quality/__next._index.txt +1 -1
  153. ui/static/quality/__next._tree.txt +1 -1
  154. ui/static/quality/__next.quality.__PAGE__.txt +2 -2
  155. ui/static/quality/__next.quality.txt +1 -1
  156. ui/static/quality/index.html +2 -2
  157. ui/static/quality/index.txt +2 -2
  158. ui/static/rules/__next._full.txt +1 -1
  159. ui/static/rules/__next._head.txt +1 -1
  160. ui/static/rules/__next._index.txt +1 -1
  161. ui/static/rules/__next._tree.txt +1 -1
  162. ui/static/rules/__next.rules.__PAGE__.txt +1 -1
  163. ui/static/rules/__next.rules.txt +1 -1
  164. ui/static/rules/index.html +1 -1
  165. ui/static/rules/index.txt +1 -1
  166. ui/static/schemas/__next._full.txt +1 -1
  167. ui/static/schemas/__next._head.txt +1 -1
  168. ui/static/schemas/__next._index.txt +1 -1
  169. ui/static/schemas/__next._tree.txt +1 -1
  170. ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  171. ui/static/schemas/__next.schemas.txt +1 -1
  172. ui/static/schemas/index.html +1 -1
  173. ui/static/schemas/index.txt +1 -1
  174. ui/static/settings/__next._full.txt +1 -1
  175. ui/static/settings/__next._head.txt +1 -1
  176. ui/static/settings/__next._index.txt +1 -1
  177. ui/static/settings/__next._tree.txt +1 -1
  178. ui/static/settings/__next.settings.__PAGE__.txt +1 -1
  179. ui/static/settings/__next.settings.txt +1 -1
  180. ui/static/settings/index.html +1 -1
  181. ui/static/settings/index.txt +1 -1
  182. ui/static/static/404/index.html +1 -1
  183. ui/static/static/404.html +1 -1
  184. ui/static/static/__next.__PAGE__.txt +1 -1
  185. ui/static/static/__next._full.txt +2 -2
  186. ui/static/static/__next._head.txt +1 -1
  187. ui/static/static/__next._index.txt +2 -2
  188. ui/static/static/__next._tree.txt +2 -2
  189. ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
  190. ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
  191. ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
  192. ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
  193. ui/static/static/_not-found/__next._full.txt +2 -2
  194. ui/static/static/_not-found/__next._head.txt +1 -1
  195. ui/static/static/_not-found/__next._index.txt +2 -2
  196. ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  197. ui/static/static/_not-found/__next._not-found.txt +1 -1
  198. ui/static/static/_not-found/__next._tree.txt +2 -2
  199. ui/static/static/_not-found/index.html +1 -1
  200. ui/static/static/_not-found/index.txt +2 -2
  201. ui/static/static/contracts/__next._full.txt +3 -3
  202. ui/static/static/contracts/__next._head.txt +1 -1
  203. ui/static/static/contracts/__next._index.txt +2 -2
  204. ui/static/static/contracts/__next._tree.txt +2 -2
  205. ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  206. ui/static/static/contracts/__next.contracts.txt +1 -1
  207. ui/static/static/contracts/index.html +1 -1
  208. ui/static/static/contracts/index.txt +3 -3
  209. ui/static/static/documentation/__next._full.txt +3 -3
  210. ui/static/static/documentation/__next._head.txt +1 -1
  211. ui/static/static/documentation/__next._index.txt +2 -2
  212. ui/static/static/documentation/__next._tree.txt +2 -2
  213. ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
  214. ui/static/static/documentation/__next.documentation.txt +1 -1
  215. ui/static/static/documentation/index.html +2 -2
  216. ui/static/static/documentation/index.txt +3 -3
  217. ui/static/static/index.html +1 -1
  218. ui/static/static/index.txt +2 -2
  219. ui/static/static/metadata/__next._full.txt +2 -2
  220. ui/static/static/metadata/__next._head.txt +1 -1
  221. ui/static/static/metadata/__next._index.txt +2 -2
  222. ui/static/static/metadata/__next._tree.txt +2 -2
  223. ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  224. ui/static/static/metadata/__next.metadata.txt +1 -1
  225. ui/static/static/metadata/index.html +1 -1
  226. ui/static/static/metadata/index.txt +2 -2
  227. ui/static/static/quality/__next._full.txt +2 -2
  228. ui/static/static/quality/__next._head.txt +1 -1
  229. ui/static/static/quality/__next._index.txt +2 -2
  230. ui/static/static/quality/__next._tree.txt +2 -2
  231. ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
  232. ui/static/static/quality/__next.quality.txt +1 -1
  233. ui/static/static/quality/index.html +2 -2
  234. ui/static/static/quality/index.txt +2 -2
  235. ui/static/static/rules/__next._full.txt +2 -2
  236. ui/static/static/rules/__next._head.txt +1 -1
  237. ui/static/static/rules/__next._index.txt +2 -2
  238. ui/static/static/rules/__next._tree.txt +2 -2
  239. ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
  240. ui/static/static/rules/__next.rules.txt +1 -1
  241. ui/static/static/rules/index.html +1 -1
  242. ui/static/static/rules/index.txt +2 -2
  243. ui/static/static/schemas/__next._full.txt +2 -2
  244. ui/static/static/schemas/__next._head.txt +1 -1
  245. ui/static/static/schemas/__next._index.txt +2 -2
  246. ui/static/static/schemas/__next._tree.txt +2 -2
  247. ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  248. ui/static/static/schemas/__next.schemas.txt +1 -1
  249. ui/static/static/schemas/index.html +1 -1
  250. ui/static/static/schemas/index.txt +2 -2
  251. ui/static/static/settings/__next._full.txt +2 -2
  252. ui/static/static/settings/__next._head.txt +1 -1
  253. ui/static/static/settings/__next._index.txt +2 -2
  254. ui/static/static/settings/__next._tree.txt +2 -2
  255. ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
  256. ui/static/static/settings/__next.settings.txt +1 -1
  257. ui/static/static/settings/index.html +1 -1
  258. ui/static/static/settings/index.txt +2 -2
  259. ui/static/static/static/.gitkeep +0 -0
  260. ui/static/static/static/404/index.html +1 -0
  261. ui/static/static/static/404.html +1 -0
  262. ui/static/static/static/__next.__PAGE__.txt +10 -0
  263. ui/static/static/static/__next._full.txt +30 -0
  264. ui/static/static/static/__next._head.txt +7 -0
  265. ui/static/static/static/__next._index.txt +9 -0
  266. ui/static/static/static/__next._tree.txt +2 -0
  267. ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
  268. ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
  269. ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
  270. ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
  271. ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
  272. ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
  273. ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
  274. ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
  275. ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
  276. ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
  277. ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
  278. ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
  279. ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
  280. ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
  281. ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
  282. ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
  283. ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
  284. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
  285. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
  286. ui/static/static/static/_not-found/__next._full.txt +17 -0
  287. ui/static/static/static/_not-found/__next._head.txt +7 -0
  288. ui/static/static/static/_not-found/__next._index.txt +9 -0
  289. ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
  290. ui/static/static/static/_not-found/__next._not-found.txt +4 -0
  291. ui/static/static/static/_not-found/__next._tree.txt +2 -0
  292. ui/static/static/static/_not-found/index.html +1 -0
  293. ui/static/static/static/_not-found/index.txt +17 -0
  294. ui/static/static/static/contracts/__next._full.txt +21 -0
  295. ui/static/static/static/contracts/__next._head.txt +7 -0
  296. ui/static/static/static/contracts/__next._index.txt +9 -0
  297. ui/static/static/static/contracts/__next._tree.txt +2 -0
  298. ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
  299. ui/static/static/static/contracts/__next.contracts.txt +4 -0
  300. ui/static/static/static/contracts/index.html +1 -0
  301. ui/static/static/static/contracts/index.txt +21 -0
  302. ui/static/static/static/documentation/__next._full.txt +21 -0
  303. ui/static/static/static/documentation/__next._head.txt +7 -0
  304. ui/static/static/static/documentation/__next._index.txt +9 -0
  305. ui/static/static/static/documentation/__next._tree.txt +2 -0
  306. ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
  307. ui/static/static/static/documentation/__next.documentation.txt +4 -0
  308. ui/static/static/static/documentation/index.html +93 -0
  309. ui/static/static/static/documentation/index.txt +21 -0
  310. ui/static/static/static/index.html +1 -0
  311. ui/static/static/static/index.txt +30 -0
  312. ui/static/static/static/metadata/__next._full.txt +21 -0
  313. ui/static/static/static/metadata/__next._head.txt +7 -0
  314. ui/static/static/static/metadata/__next._index.txt +9 -0
  315. ui/static/static/static/metadata/__next._tree.txt +2 -0
  316. ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
  317. ui/static/static/static/metadata/__next.metadata.txt +4 -0
  318. ui/static/static/static/metadata/index.html +1 -0
  319. ui/static/static/static/metadata/index.txt +21 -0
  320. ui/static/static/static/quality/__next._full.txt +21 -0
  321. ui/static/static/static/quality/__next._head.txt +7 -0
  322. ui/static/static/static/quality/__next._index.txt +9 -0
  323. ui/static/static/static/quality/__next._tree.txt +2 -0
  324. ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
  325. ui/static/static/static/quality/__next.quality.txt +4 -0
  326. ui/static/static/static/quality/index.html +2 -0
  327. ui/static/static/static/quality/index.txt +21 -0
  328. ui/static/static/static/rules/__next._full.txt +21 -0
  329. ui/static/static/static/rules/__next._head.txt +7 -0
  330. ui/static/static/static/rules/__next._index.txt +9 -0
  331. ui/static/static/static/rules/__next._tree.txt +2 -0
  332. ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
  333. ui/static/static/static/rules/__next.rules.txt +4 -0
  334. ui/static/static/static/rules/index.html +1 -0
  335. ui/static/static/static/rules/index.txt +21 -0
  336. ui/static/static/static/schemas/__next._full.txt +21 -0
  337. ui/static/static/static/schemas/__next._head.txt +7 -0
  338. ui/static/static/static/schemas/__next._index.txt +9 -0
  339. ui/static/static/static/schemas/__next._tree.txt +2 -0
  340. ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
  341. ui/static/static/static/schemas/__next.schemas.txt +4 -0
  342. ui/static/static/static/schemas/index.html +1 -0
  343. ui/static/static/static/schemas/index.txt +21 -0
  344. ui/static/static/static/settings/__next._full.txt +21 -0
  345. ui/static/static/static/settings/__next._head.txt +7 -0
  346. ui/static/static/static/settings/__next._index.txt +9 -0
  347. ui/static/static/static/settings/__next._tree.txt +2 -0
  348. ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
  349. ui/static/static/static/settings/__next.settings.txt +4 -0
  350. ui/static/static/static/settings/index.html +1 -0
  351. ui/static/static/static/settings/index.txt +21 -0
  352. ui/static/static/static/validation/__next._full.txt +21 -0
  353. ui/static/static/static/validation/__next._head.txt +7 -0
  354. ui/static/static/static/validation/__next._index.txt +9 -0
  355. ui/static/static/static/validation/__next._tree.txt +2 -0
  356. ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
  357. ui/static/static/static/validation/__next.validation.txt +4 -0
  358. ui/static/static/static/validation/index.html +1 -0
  359. ui/static/static/static/validation/index.txt +21 -0
  360. ui/static/static/validation/__next._full.txt +2 -2
  361. ui/static/static/validation/__next._head.txt +1 -1
  362. ui/static/static/validation/__next._index.txt +2 -2
  363. ui/static/static/validation/__next._tree.txt +2 -2
  364. ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
  365. ui/static/static/validation/__next.validation.txt +1 -1
  366. ui/static/static/validation/index.html +1 -1
  367. ui/static/static/validation/index.txt +2 -2
  368. ui/static/validation/__next._full.txt +2 -2
  369. ui/static/validation/__next._head.txt +1 -1
  370. ui/static/validation/__next._index.txt +1 -1
  371. ui/static/validation/__next._tree.txt +1 -1
  372. ui/static/validation/__next.validation.__PAGE__.txt +2 -2
  373. ui/static/validation/__next.validation.txt +1 -1
  374. ui/static/validation/index.html +1 -1
  375. ui/static/validation/index.txt +2 -2
  376. pycharter/data/templates/template_coercion_rules.yaml +0 -15
  377. pycharter/data/templates/template_contract.yaml +0 -587
  378. pycharter/data/templates/template_metadata.yaml +0 -38
  379. pycharter/data/templates/template_schema.yaml +0 -22
  380. pycharter/data/templates/template_transform_advanced.yaml +0 -50
  381. pycharter/data/templates/template_transform_simple.yaml +0 -59
  382. pycharter/data/templates/template_validation_rules.yaml +0 -29
  383. pycharter/etl_generator/extraction.py +0 -916
  384. pycharter/etl_generator/factory.py +0 -174
  385. pycharter/etl_generator/orchestrator.py +0 -1650
  386. pycharter/integrations/__init__.py +0 -19
  387. pycharter/integrations/kafka.py +0 -178
  388. pycharter/integrations/streaming.py +0 -100
  389. pycharter-0.0.22.dist-info/RECORD +0 -358
  390. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
  391. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
  392. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
  393. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
  394. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
  395. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
  396. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
  397. /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
  398. /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
  399. /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
  400. /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
  401. /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
  402. /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
  403. /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
  404. /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
@@ -0,0 +1,743 @@
1
+ """
2
+ Pipeline class with | operator for chaining.
3
+
4
+ Supports both config-driven and programmatic pipeline construction.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ import re
10
+ import uuid
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+ from typing import Any, Callable, Dict, List, Optional, Union
14
+
15
+ import yaml
16
+
17
+ from pycharter.etl_generator.context import PipelineContext
18
+ from pycharter.etl_generator.protocols import Extractor, Transformer, Loader
19
+ from pycharter.etl_generator.result import PipelineResult, BatchResult
20
+ from pycharter.shared.errors import ErrorContext, ErrorMode, get_error_context
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Variable pattern: ${VAR} or ${VAR:-default} or ${VAR:?error}
25
+ VARIABLE_PATTERN = re.compile(r'\$\{([^}:]+)(?::([?-])([^}]*))?\}')
26
+
27
+
28
+ class Pipeline:
29
+ """
30
+ ETL Pipeline with | operator for chaining transformers.
31
+
32
+ Programmatic usage:
33
+ >>> pipeline = (
34
+ ... Pipeline(HTTPExtractor(url="..."))
35
+ ... | Rename({"old": "new"})
36
+ ... | PostgresLoader(...)
37
+ ... )
38
+ >>> result = await pipeline.run()
39
+
40
+ Config-driven usage:
41
+ >>> # From explicit files (most flexible)
42
+ >>> pipeline = Pipeline.from_config_files(
43
+ ... extract="configs/extract.yaml",
44
+ ... load="configs/load.yaml",
45
+ ... variables={"API_KEY": "secret"}
46
+ ... )
47
+ >>>
48
+ >>> # From directory (expects extract.yaml, transform.yaml, load.yaml)
49
+ >>> pipeline = Pipeline.from_config_dir("pipelines/users/")
50
+ >>>
51
+ >>> # From single file (pipeline.yaml with all sections)
52
+ >>> pipeline = Pipeline.from_config_file("pipelines/users/pipeline.yaml")
53
+ >>>
54
+ >>> result = await pipeline.run()
55
+
56
+ Async execution:
57
+ run() is async. From a script use asyncio.run():
58
+ asyncio.run(pipeline.run())
59
+ From an async context (FastAPI, Jupyter) await directly:
60
+ result = await pipeline.run()
61
+ See pycharter/etl_generator/ASYNC_AND_EXECUTION.md for details.
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ extractor: Optional[Extractor] = None,
67
+ transformers: Optional[List[Transformer]] = None,
68
+ loader: Optional[Loader] = None,
69
+ context: Optional[PipelineContext] = None,
70
+ name: Optional[str] = None,
71
+ ):
72
+ self.extractor = extractor
73
+ self._transformers: List[Transformer] = list(transformers) if transformers else []
74
+ self.loader = loader
75
+ self.context = context or PipelineContext()
76
+ self.name = name
77
+
78
+ def __or__(self, other: Union[Transformer, Loader]) -> "Pipeline":
79
+ """Chain transformer or set loader using | operator."""
80
+ if isinstance(other, Loader):
81
+ return Pipeline(
82
+ extractor=self.extractor,
83
+ transformers=self._transformers.copy(),
84
+ loader=other,
85
+ context=self.context,
86
+ name=self.name,
87
+ )
88
+ else:
89
+ new_transformers = self._transformers.copy()
90
+ new_transformers.append(other)
91
+ return Pipeline(
92
+ extractor=self.extractor,
93
+ transformers=new_transformers,
94
+ loader=self.loader,
95
+ context=self.context,
96
+ name=self.name,
97
+ )
98
+
99
+ async def run(
100
+ self,
101
+ dry_run: bool = False,
102
+ error_context: Optional[ErrorContext] = None,
103
+ **params,
104
+ ) -> PipelineResult:
105
+ """
106
+ Run the ETL pipeline.
107
+
108
+ Args:
109
+ dry_run: If True, extract and transform but do not load.
110
+ error_context: Optional error context for handling failures.
111
+ If not set, uses the default from get_error_context().
112
+ In STRICT mode, extraction or load failures raise.
113
+ In LENIENT/COLLECT mode, errors are logged and appended to result.errors.
114
+ **params: Passed to extractor.extract() and loader.load().
115
+
116
+ Returns:
117
+ PipelineResult with counts and any errors.
118
+ """
119
+ run_id = str(uuid.uuid4())[:8]
120
+ start_time = datetime.now(timezone.utc)
121
+ ctx = error_context or get_error_context()
122
+
123
+ result = PipelineResult(
124
+ pipeline_name=self.name,
125
+ run_id=run_id,
126
+ start_time=start_time,
127
+ )
128
+
129
+ if not self.extractor:
130
+ result.success = False
131
+ result.errors.append("No extractor configured")
132
+ return result
133
+
134
+ logger.info(f"[{run_id}] Starting pipeline: {self.name or 'unnamed'}")
135
+
136
+ try:
137
+ batch_index = 0
138
+ async for batch in self.extractor.extract(**params):
139
+ batch_result = BatchResult(batch_index=batch_index, rows_in=len(batch))
140
+
141
+ # Transform
142
+ transformed = self._apply_transforms(batch)
143
+ batch_result.rows_out = len(transformed)
144
+
145
+ # Load
146
+ if not dry_run and self.loader and transformed:
147
+ try:
148
+ load_result = await self.loader.load(transformed, **params)
149
+ if load_result.success:
150
+ result.rows_loaded += load_result.rows_loaded
151
+ else:
152
+ msg = load_result.error or "Load failed"
153
+ ctx.handle_error(msg, category="load")
154
+ batch_result.errors.append(msg)
155
+ batch_result.rows_failed += len(transformed)
156
+ except Exception as e:
157
+ ctx.handle_error(str(e), e, category="load")
158
+ batch_result.errors.append(str(e))
159
+ batch_result.rows_failed += len(transformed)
160
+ elif dry_run:
161
+ result.rows_loaded += len(transformed)
162
+
163
+ result.rows_extracted += len(batch)
164
+ result.rows_transformed += len(transformed)
165
+ result.batches_processed += 1
166
+ result.batch_results.append(batch_result)
167
+ batch_index += 1
168
+
169
+ except Exception as e:
170
+ result.success = False
171
+ result.errors.append(str(e))
172
+ ctx.handle_error(str(e), e, category="pipeline")
173
+ logger.error(f"[{run_id}] Pipeline error: {e}")
174
+
175
+ result.end_time = datetime.now(timezone.utc)
176
+ result.duration_seconds = (result.end_time - start_time).total_seconds()
177
+ result.rows_failed = sum(br.rows_failed for br in result.batch_results)
178
+
179
+ if result.errors:
180
+ result.success = False
181
+
182
+ logger.info(f"[{run_id}] Complete: extracted={result.rows_extracted}, loaded={result.rows_loaded}")
183
+ return result
184
+
185
+ def _apply_transforms(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
186
+ """Apply all transformers to data."""
187
+ result = data
188
+ for transformer in self._transformers:
189
+ result = transformer.transform(result)
190
+ return result
191
+
192
+ # =========================================================================
193
+ # CONFIG-DRIVEN FACTORY METHODS
194
+ # =========================================================================
195
+
196
+ @classmethod
197
+ def from_config_files(
198
+ cls,
199
+ extract: Union[str, Path, Dict[str, Any]],
200
+ load: Union[str, Path, Dict[str, Any]],
201
+ transform: Optional[Union[str, Path, Dict[str, Any], List[Dict[str, Any]]]] = None,
202
+ variables: Optional[Dict[str, str]] = None,
203
+ validate: bool = True,
204
+ name: Optional[str] = None,
205
+ ) -> "Pipeline":
206
+ """
207
+ Create pipeline from explicit file paths or dictionaries.
208
+
209
+ This is the most flexible method - use any file paths without any
210
+ assumptions about directory structure or file naming.
211
+
212
+ Args:
213
+ extract: Path to extract config file OR config as dict
214
+ load: Path to load config file OR config as dict
215
+ transform: Optional path to transform config OR config as dict/list
216
+ variables: Variables for ${VAR} substitution in config values
217
+ validate: If True, validate configs against schemas
218
+ name: Optional pipeline name
219
+
220
+ Returns:
221
+ Configured Pipeline instance
222
+
223
+ Example:
224
+ pipeline = Pipeline.from_config_files(
225
+ extract="configs/my_http_source.yaml",
226
+ transform="configs/my_transforms.yaml",
227
+ load="configs/my_postgres_sink.yaml",
228
+ variables={"API_KEY": "secret", "DB_URL": "postgresql://..."}
229
+ )
230
+ """
231
+ variables = variables or {}
232
+
233
+ # Load configs
234
+ extract_config = _load_config_input(extract, variables)
235
+ load_config = _load_config_input(load, variables)
236
+
237
+ if transform is not None:
238
+ transform_config = _load_config_input(transform, variables)
239
+ else:
240
+ transform_config = {}
241
+
242
+ return cls._build_from_configs(
243
+ extract_config=extract_config,
244
+ transform_config=transform_config,
245
+ load_config=load_config,
246
+ variables=variables,
247
+ validate=validate,
248
+ name=name,
249
+ )
250
+
251
+ @classmethod
252
+ def from_config_dir(
253
+ cls,
254
+ directory: Union[str, Path],
255
+ variables: Optional[Dict[str, str]] = None,
256
+ validate: bool = True,
257
+ name: Optional[str] = None,
258
+ ) -> "Pipeline":
259
+ """
260
+ Create pipeline from a directory containing config files.
261
+
262
+ Expects files with standard names:
263
+ - extract.yaml (required)
264
+ - transform.yaml (optional)
265
+ - load.yaml (required)
266
+
267
+ Args:
268
+ directory: Path to directory containing config files
269
+ variables: Variables for ${VAR} substitution
270
+ validate: If True, validate configs against schemas
271
+ name: Optional pipeline name (defaults to directory name)
272
+
273
+ Returns:
274
+ Configured Pipeline instance
275
+
276
+ Example:
277
+ pipeline = Pipeline.from_config_dir(
278
+ "pipelines/users/",
279
+ variables={"DATA_DIR": "./data", "OUTPUT_DIR": "./output"}
280
+ )
281
+ """
282
+ directory = Path(directory)
283
+ if not directory.is_dir():
284
+ raise NotADirectoryError(f"Not a directory: {directory}")
285
+
286
+ variables = variables or {}
287
+
288
+ # Check for required files
289
+ extract_file = directory / "extract.yaml"
290
+ load_file = directory / "load.yaml"
291
+ transform_file = directory / "transform.yaml"
292
+
293
+ if not extract_file.exists():
294
+ raise FileNotFoundError(f"Required file not found: {extract_file}")
295
+ if not load_file.exists():
296
+ raise FileNotFoundError(f"Required file not found: {load_file}")
297
+
298
+ # Load configs
299
+ extract_config = _load_config_input(extract_file, variables)
300
+ load_config = _load_config_input(load_file, variables)
301
+ transform_config = _load_config_input(transform_file, variables) if transform_file.exists() else {}
302
+
303
+ return cls._build_from_configs(
304
+ extract_config=extract_config,
305
+ transform_config=transform_config,
306
+ load_config=load_config,
307
+ variables=variables,
308
+ validate=validate,
309
+ name=name or directory.name,
310
+ )
311
+
312
+ @classmethod
313
+ def from_config_file(
314
+ cls,
315
+ path: Union[str, Path],
316
+ variables: Optional[Dict[str, str]] = None,
317
+ validate: bool = True,
318
+ ) -> "Pipeline":
319
+ """
320
+ Create pipeline from a single config file containing all sections.
321
+
322
+ The file should have extract, transform (optional), and load sections:
323
+
324
+ name: my_pipeline
325
+ extract:
326
+ type: http
327
+ url: https://api.example.com
328
+ transform:
329
+ - rename: {old: new}
330
+ load:
331
+ type: file
332
+ path: output.json
333
+
334
+ Args:
335
+ path: Path to pipeline config file (YAML)
336
+ variables: Variables for ${VAR} substitution
337
+ validate: If True, validate config against schema
338
+
339
+ Returns:
340
+ Configured Pipeline instance
341
+
342
+ Example:
343
+ pipeline = Pipeline.from_config_file(
344
+ "pipelines/users/pipeline.yaml",
345
+ variables={"API_KEY": "secret"}
346
+ )
347
+ """
348
+ path = Path(path)
349
+ if not path.exists():
350
+ raise FileNotFoundError(f"Config file not found: {path}")
351
+ if not path.is_file():
352
+ raise ValueError(f"Not a file: {path}. Use from_config_dir() for directories.")
353
+
354
+ variables = variables or {}
355
+
356
+ # Load the full config
357
+ config = _load_config_input(path, variables)
358
+
359
+ if "extract" not in config:
360
+ raise ValueError(f"Config file missing 'extract' section: {path}")
361
+ if "load" not in config:
362
+ raise ValueError(f"Config file missing 'load' section: {path}")
363
+
364
+ return cls._build_from_configs(
365
+ extract_config=config["extract"],
366
+ transform_config=config.get("transform", {}),
367
+ load_config=config["load"],
368
+ variables=variables,
369
+ validate=validate,
370
+ name=config.get("name"),
371
+ )
372
+
373
+ @classmethod
374
+ def from_dict(
375
+ cls,
376
+ config: Dict[str, Any],
377
+ variables: Optional[Dict[str, str]] = None,
378
+ validate: bool = True,
379
+ ) -> "Pipeline":
380
+ """
381
+ Create pipeline from a configuration dictionary.
382
+
383
+ Args:
384
+ config: Dict with 'extract', 'transform' (optional), 'load' sections
385
+ variables: Variables for ${VAR} substitution
386
+ validate: If True, validate config against schema
387
+
388
+ Returns:
389
+ Configured Pipeline instance
390
+
391
+ Example:
392
+ pipeline = Pipeline.from_dict({
393
+ "name": "my_pipeline",
394
+ "extract": {"type": "http", "url": "https://api.example.com"},
395
+ "transform": [{"rename": {"userId": "user_id"}}],
396
+ "load": {"type": "file", "path": "${OUTPUT_DIR}/result.json"}
397
+ }, variables={"OUTPUT_DIR": "./output"})
398
+ """
399
+ if "extract" not in config:
400
+ raise ValueError("Config dict missing 'extract' section")
401
+ if "load" not in config:
402
+ raise ValueError("Config dict missing 'load' section")
403
+
404
+ variables = variables or {}
405
+ context = PipelineContext(variables=variables)
406
+
407
+ # Resolve variables in config
408
+ extract_config = context.resolve_dict(config["extract"])
409
+ raw_transform = config.get("transform", {})
410
+ if isinstance(raw_transform, list):
411
+ transform_config = [
412
+ context.resolve_dict(item) if isinstance(item, dict) else item
413
+ for item in raw_transform
414
+ ]
415
+ else:
416
+ transform_config = context.resolve_dict(raw_transform)
417
+ load_config = context.resolve_dict(config["load"])
418
+
419
+ return cls._build_from_configs(
420
+ extract_config=extract_config,
421
+ transform_config=transform_config,
422
+ load_config=load_config,
423
+ variables=variables,
424
+ validate=validate,
425
+ name=config.get("name"),
426
+ )
427
+
428
+ @classmethod
429
+ def _build_from_configs(
430
+ cls,
431
+ extract_config: Dict[str, Any],
432
+ transform_config: Union[Dict[str, Any], List[Dict[str, Any]]],
433
+ load_config: Dict[str, Any],
434
+ variables: Dict[str, str],
435
+ validate: bool,
436
+ name: Optional[str],
437
+ ) -> "Pipeline":
438
+ """Internal method to build pipeline from resolved configs."""
439
+ from pycharter.etl_generator.config_validator import ConfigValidator
440
+
441
+ # Validate if enabled
442
+ if validate:
443
+ validator = ConfigValidator(strict=True)
444
+ validator.validate_extract(extract_config)
445
+ if transform_config:
446
+ # Wrap list in dict for validation
447
+ if isinstance(transform_config, list):
448
+ validator.validate_transform({"transform": transform_config})
449
+ else:
450
+ validator.validate_transform(transform_config)
451
+ validator.validate_load(load_config)
452
+
453
+ # Create context
454
+ context = PipelineContext(variables=variables)
455
+
456
+ # Create components
457
+ extractor = _create_extractor(extract_config)
458
+ transformers = _create_transformers(transform_config)
459
+ loader_instance = _create_loader(load_config)
460
+
461
+ return cls(
462
+ extractor=extractor,
463
+ transformers=transformers,
464
+ loader=loader_instance,
465
+ context=context,
466
+ name=name,
467
+ )
468
+
469
+
470
+ # =============================================================================
471
+ # HELPER FUNCTIONS
472
+ # =============================================================================
473
+
474
+ def _load_config_input(
475
+ config_input: Union[str, Path, Dict[str, Any], List[Dict[str, Any]]],
476
+ variables: Dict[str, str],
477
+ ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
478
+ """Load config from file path or return dict/list directly."""
479
+ if isinstance(config_input, (dict, list)):
480
+ return config_input
481
+
482
+ path = Path(config_input)
483
+ if not path.exists():
484
+ raise FileNotFoundError(f"Config file not found: {path}")
485
+
486
+ with open(path) as f:
487
+ content = f.read()
488
+
489
+ # Resolve variables in content before parsing
490
+ content = _resolve_variables(content, variables)
491
+
492
+ return yaml.safe_load(content) or {}
493
+
494
+
495
+ def _resolve_variables(content: str, variables: Dict[str, str]) -> str:
496
+ """Resolve ${VAR} placeholders in content string."""
497
+ def replace_var(match):
498
+ var_name = match.group(1)
499
+ modifier = match.group(2)
500
+ modifier_value = match.group(3)
501
+
502
+ # Check provided variables first, then environment
503
+ value = variables.get(var_name) or os.environ.get(var_name)
504
+
505
+ if value:
506
+ return value
507
+
508
+ # Handle modifiers
509
+ if modifier == "-":
510
+ return modifier_value if modifier_value is not None else ""
511
+ elif modifier == "?":
512
+ error_msg = modifier_value or f"Required variable {var_name} is not set"
513
+ raise ValueError(error_msg)
514
+
515
+ return match.group(0)
516
+
517
+ return VARIABLE_PATTERN.sub(replace_var, content)
518
+
519
+
520
+ def _create_extractor(config: Dict[str, Any]) -> Optional[Extractor]:
521
+ """Create extractor from config using explicit type field."""
522
+ if not config:
523
+ return None
524
+
525
+ from pycharter.etl_generator.extractors import (
526
+ HTTPExtractor,
527
+ FileExtractor,
528
+ DatabaseExtractor,
529
+ CloudStorageExtractor,
530
+ )
531
+
532
+ EXTRACTOR_REGISTRY = {
533
+ "http": HTTPExtractor,
534
+ "file": FileExtractor,
535
+ "database": DatabaseExtractor,
536
+ "cloud_storage": CloudStorageExtractor,
537
+ }
538
+
539
+ # Get type field
540
+ extract_type = config.get("type")
541
+
542
+ if not extract_type:
543
+ raise ValueError(
544
+ "Extract config missing required 'type' field. "
545
+ f"Supported types: {list(EXTRACTOR_REGISTRY.keys())}"
546
+ )
547
+
548
+ extract_type = extract_type.lower()
549
+ extractor_class = EXTRACTOR_REGISTRY.get(extract_type)
550
+
551
+ if not extractor_class:
552
+ raise ValueError(
553
+ f"Unknown extractor type: '{extract_type}'. "
554
+ f"Supported types: {list(EXTRACTOR_REGISTRY.keys())}"
555
+ )
556
+
557
+ return extractor_class.from_config(config)
558
+
559
+
560
+ def _create_transformers(config: Union[Dict[str, Any], List[Dict[str, Any]]]) -> List[Transformer]:
561
+ """Create transformer chain from config."""
562
+ if not config:
563
+ return []
564
+
565
+ # Handle nested 'transform' key
566
+ if isinstance(config, dict) and "transform" in config:
567
+ config = config["transform"]
568
+
569
+ # List format - ordered transforms
570
+ if isinstance(config, list):
571
+ return _create_transformers_from_list(config)
572
+
573
+ # Dict format - fixed order
574
+ return _create_transformers_from_dict(config)
575
+
576
+
577
+ def _create_transformers_from_list(config: List[Dict[str, Any]]) -> List[Transformer]:
578
+ """Create transformers from list format (user-specified order)."""
579
+ transformers = []
580
+
581
+ for step in config:
582
+ if not isinstance(step, dict):
583
+ logger.warning(f"Invalid transform step (expected dict): {step}")
584
+ continue
585
+
586
+ for op_name, op_config in step.items():
587
+ transformer = _create_single_transformer(op_name, op_config)
588
+ if transformer:
589
+ if isinstance(transformer, list):
590
+ transformers.extend(transformer)
591
+ else:
592
+ transformers.append(transformer)
593
+
594
+ return transformers
595
+
596
+
597
+ def _create_transformers_from_dict(config: Dict[str, Any]) -> List[Transformer]:
598
+ """Create transformers from dict format (fixed order)."""
599
+ transformers = []
600
+ ordered_ops = ["rename", "convert", "defaults", "add", "select", "drop", "filter", "custom_function"]
601
+
602
+ for op_name in ordered_ops:
603
+ if op_name in config:
604
+ transformer = _create_single_transformer(op_name, config[op_name])
605
+ if transformer:
606
+ if isinstance(transformer, list):
607
+ transformers.extend(transformer)
608
+ else:
609
+ transformers.append(transformer)
610
+
611
+ return transformers
612
+
613
+
614
+ def _create_single_transformer(op_name: str, op_config: Any) -> Optional[Union[Transformer, List[Transformer]]]:
615
+ """Create a single transformer from operation name and config."""
616
+ from pycharter.etl_generator.transformers import (
617
+ Rename, AddField, Drop, Select, Filter, Convert, Default, CustomFunction,
618
+ )
619
+ from pycharter.etl_generator.transformers.simple_operations import convert_type
620
+
621
+ op_name = op_name.lower()
622
+
623
+ if op_name == "rename":
624
+ if isinstance(op_config, dict):
625
+ return Rename(op_config)
626
+
627
+ elif op_name == "add":
628
+ if isinstance(op_config, dict):
629
+ return [AddField(field, value) for field, value in op_config.items()]
630
+
631
+ elif op_name == "drop":
632
+ if isinstance(op_config, list):
633
+ return Drop(op_config)
634
+
635
+ elif op_name == "select":
636
+ if isinstance(op_config, list):
637
+ return Select(op_config)
638
+
639
+ elif op_name == "convert":
640
+ if isinstance(op_config, dict):
641
+ type_map = {
642
+ "int": int, "integer": int,
643
+ "float": float, "number": float, "numeric": float,
644
+ "str": str, "string": str,
645
+ "bool": bool, "boolean": bool,
646
+ }
647
+ conversions = {}
648
+ for field, target_type in op_config.items():
649
+ target_lower = target_type.lower() if isinstance(target_type, str) else str(target_type)
650
+ if target_lower in type_map:
651
+ conversions[field] = type_map[target_lower]
652
+ elif target_lower in ("datetime", "date"):
653
+ conversions[field] = lambda v, t=target_lower: convert_type(v, t)
654
+ else:
655
+ conversions[field] = str
656
+ return Convert(conversions)
657
+
658
+ elif op_name == "defaults":
659
+ if isinstance(op_config, dict):
660
+ return Default(op_config)
661
+
662
+ elif op_name == "filter":
663
+ if isinstance(op_config, dict):
664
+ field = op_config.get("field")
665
+ operator = op_config.get("operator", "eq")
666
+ value = op_config.get("value")
667
+ if field and operator:
668
+ predicate = _create_filter_predicate(field, operator, value)
669
+ if predicate:
670
+ return Filter(predicate)
671
+
672
+ elif op_name == "custom_function":
673
+ if isinstance(op_config, dict):
674
+ return CustomFunction(
675
+ module=op_config.get("module"),
676
+ function=op_config.get("function"),
677
+ kwargs=op_config.get("kwargs", {}),
678
+ )
679
+
680
+ else:
681
+ logger.warning(f"Unknown transform operation: {op_name}")
682
+
683
+ return None
684
+
685
+
686
+ def _create_filter_predicate(field: str, operator: str, value: Any) -> Optional[Callable]:
687
+ """Create a filter predicate function from operator and value."""
688
+ operators = {
689
+ "eq": lambda r: r.get(field) == value,
690
+ "ne": lambda r: r.get(field) != value,
691
+ "gt": lambda r: r.get(field) is not None and r.get(field) > value,
692
+ "gte": lambda r: r.get(field) is not None and r.get(field) >= value,
693
+ "lt": lambda r: r.get(field) is not None and r.get(field) < value,
694
+ "lte": lambda r: r.get(field) is not None and r.get(field) <= value,
695
+ "in": lambda r: r.get(field) in (value if isinstance(value, (list, tuple, set)) else [value]),
696
+ "not_in": lambda r: r.get(field) not in (value if isinstance(value, (list, tuple, set)) else [value]),
697
+ "contains": lambda r: value in str(r.get(field, "")),
698
+ "not_contains": lambda r: value not in str(r.get(field, "")),
699
+ "is_null": lambda r: r.get(field) is None,
700
+ "is_not_null": lambda r: r.get(field) is not None,
701
+ }
702
+ return operators.get(operator)
703
+
704
+
705
+ def _create_loader(config: Dict[str, Any]) -> Optional[Loader]:
706
+ """Create loader from config using explicit type field."""
707
+ if not config:
708
+ return None
709
+
710
+ from pycharter.etl_generator.loaders import (
711
+ PostgresLoader,
712
+ FileLoader,
713
+ CloudStorageLoader,
714
+ )
715
+
716
+ LOADER_REGISTRY = {
717
+ "postgres": PostgresLoader,
718
+ "postgresql": PostgresLoader,
719
+ "database": PostgresLoader,
720
+ "sqlite": PostgresLoader,
721
+ "file": FileLoader,
722
+ "cloud_storage": CloudStorageLoader,
723
+ }
724
+
725
+ # Get type field
726
+ load_type = config.get("type")
727
+
728
+ if not load_type:
729
+ raise ValueError(
730
+ "Load config missing required 'type' field. "
731
+ f"Supported types: postgres, sqlite, file, cloud_storage"
732
+ )
733
+
734
+ load_type = load_type.lower()
735
+ loader_class = LOADER_REGISTRY.get(load_type)
736
+
737
+ if not loader_class:
738
+ raise ValueError(
739
+ f"Unknown loader type: '{load_type}'. "
740
+ f"Supported types: postgres, sqlite, file, cloud_storage"
741
+ )
742
+
743
+ return loader_class.from_config(config)