pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. api/main.py +27 -1
  2. api/models/docs.py +68 -0
  3. api/models/evolution.py +117 -0
  4. api/models/tracking.py +111 -0
  5. api/models/validation.py +46 -6
  6. api/routes/v1/__init__.py +14 -1
  7. api/routes/v1/docs.py +187 -0
  8. api/routes/v1/evolution.py +337 -0
  9. api/routes/v1/templates.py +211 -27
  10. api/routes/v1/tracking.py +301 -0
  11. api/routes/v1/validation.py +68 -31
  12. pycharter/__init__.py +268 -58
  13. pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
  14. pycharter/data/templates/contract/template_contract.yaml +122 -0
  15. pycharter/data/templates/contract/template_metadata.yaml +68 -0
  16. pycharter/data/templates/contract/template_schema.yaml +100 -0
  17. pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
  18. pycharter/data/templates/etl/README.md +224 -0
  19. pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
  20. pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
  21. pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
  22. pycharter/data/templates/etl/extract_database.yaml +34 -0
  23. pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
  24. pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
  25. pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
  26. pycharter/data/templates/etl/extract_file_json.yaml +24 -0
  27. pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
  28. pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
  29. pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
  30. pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
  31. pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
  32. pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
  33. pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
  34. pycharter/data/templates/etl/load_file.yaml +34 -0
  35. pycharter/data/templates/etl/load_insert.yaml +18 -0
  36. pycharter/data/templates/etl/load_postgresql.yaml +39 -0
  37. pycharter/data/templates/etl/load_sqlite.yaml +21 -0
  38. pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
  39. pycharter/data/templates/etl/load_upsert.yaml +25 -0
  40. pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
  41. pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
  42. pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
  43. pycharter/data/templates/etl/transform_combined.yaml +48 -0
  44. pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
  45. pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
  46. pycharter/data/templates/etl/transform_simple.yaml +59 -0
  47. pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
  48. pycharter/docs_generator/__init__.py +43 -0
  49. pycharter/docs_generator/generator.py +465 -0
  50. pycharter/docs_generator/renderers.py +247 -0
  51. pycharter/etl_generator/__init__.py +168 -80
  52. pycharter/etl_generator/builder.py +121 -0
  53. pycharter/etl_generator/config_loader.py +394 -0
  54. pycharter/etl_generator/config_validator.py +418 -0
  55. pycharter/etl_generator/context.py +132 -0
  56. pycharter/etl_generator/expression.py +499 -0
  57. pycharter/etl_generator/extractors/__init__.py +30 -0
  58. pycharter/etl_generator/extractors/base.py +70 -0
  59. pycharter/etl_generator/extractors/cloud_storage.py +530 -0
  60. pycharter/etl_generator/extractors/database.py +221 -0
  61. pycharter/etl_generator/extractors/factory.py +185 -0
  62. pycharter/etl_generator/extractors/file.py +475 -0
  63. pycharter/etl_generator/extractors/http.py +895 -0
  64. pycharter/etl_generator/extractors/streaming.py +57 -0
  65. pycharter/etl_generator/loaders/__init__.py +41 -0
  66. pycharter/etl_generator/loaders/base.py +35 -0
  67. pycharter/etl_generator/loaders/cloud.py +87 -0
  68. pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
  69. pycharter/etl_generator/loaders/database.py +274 -0
  70. pycharter/etl_generator/loaders/factory.py +180 -0
  71. pycharter/etl_generator/loaders/file.py +72 -0
  72. pycharter/etl_generator/loaders/file_loader.py +130 -0
  73. pycharter/etl_generator/pipeline.py +743 -0
  74. pycharter/etl_generator/protocols.py +54 -0
  75. pycharter/etl_generator/result.py +63 -0
  76. pycharter/etl_generator/schemas/__init__.py +49 -0
  77. pycharter/etl_generator/transformers/__init__.py +49 -0
  78. pycharter/etl_generator/transformers/base.py +63 -0
  79. pycharter/etl_generator/transformers/config.py +45 -0
  80. pycharter/etl_generator/transformers/custom_function.py +101 -0
  81. pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
  82. pycharter/etl_generator/transformers/operations.py +218 -0
  83. pycharter/etl_generator/transformers/pipeline.py +54 -0
  84. pycharter/etl_generator/transformers/simple_operations.py +131 -0
  85. pycharter/quality/__init__.py +25 -0
  86. pycharter/quality/tracking/__init__.py +64 -0
  87. pycharter/quality/tracking/collector.py +318 -0
  88. pycharter/quality/tracking/exporters.py +238 -0
  89. pycharter/quality/tracking/models.py +194 -0
  90. pycharter/quality/tracking/store.py +385 -0
  91. pycharter/runtime_validator/__init__.py +20 -7
  92. pycharter/runtime_validator/builder.py +328 -0
  93. pycharter/runtime_validator/validator.py +311 -7
  94. pycharter/runtime_validator/validator_core.py +61 -0
  95. pycharter/schema_evolution/__init__.py +61 -0
  96. pycharter/schema_evolution/compatibility.py +270 -0
  97. pycharter/schema_evolution/diff.py +496 -0
  98. pycharter/schema_evolution/models.py +201 -0
  99. pycharter/shared/__init__.py +56 -0
  100. pycharter/shared/errors.py +296 -0
  101. pycharter/shared/protocols.py +234 -0
  102. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
  103. pycharter-0.0.24.dist-info/RECORD +543 -0
  104. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
  105. ui/static/404/index.html +1 -1
  106. ui/static/404.html +1 -1
  107. ui/static/__next.__PAGE__.txt +1 -1
  108. ui/static/__next._full.txt +1 -1
  109. ui/static/__next._head.txt +1 -1
  110. ui/static/__next._index.txt +1 -1
  111. ui/static/__next._tree.txt +1 -1
  112. ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
  113. ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
  114. ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
  115. ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
  116. ui/static/_not-found/__next._full.txt +1 -1
  117. ui/static/_not-found/__next._head.txt +1 -1
  118. ui/static/_not-found/__next._index.txt +1 -1
  119. ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  120. ui/static/_not-found/__next._not-found.txt +1 -1
  121. ui/static/_not-found/__next._tree.txt +1 -1
  122. ui/static/_not-found/index.html +1 -1
  123. ui/static/_not-found/index.txt +1 -1
  124. ui/static/contracts/__next._full.txt +2 -2
  125. ui/static/contracts/__next._head.txt +1 -1
  126. ui/static/contracts/__next._index.txt +1 -1
  127. ui/static/contracts/__next._tree.txt +1 -1
  128. ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  129. ui/static/contracts/__next.contracts.txt +1 -1
  130. ui/static/contracts/index.html +1 -1
  131. ui/static/contracts/index.txt +2 -2
  132. ui/static/documentation/__next._full.txt +1 -1
  133. ui/static/documentation/__next._head.txt +1 -1
  134. ui/static/documentation/__next._index.txt +1 -1
  135. ui/static/documentation/__next._tree.txt +1 -1
  136. ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
  137. ui/static/documentation/__next.documentation.txt +1 -1
  138. ui/static/documentation/index.html +2 -2
  139. ui/static/documentation/index.txt +1 -1
  140. ui/static/index.html +1 -1
  141. ui/static/index.txt +1 -1
  142. ui/static/metadata/__next._full.txt +1 -1
  143. ui/static/metadata/__next._head.txt +1 -1
  144. ui/static/metadata/__next._index.txt +1 -1
  145. ui/static/metadata/__next._tree.txt +1 -1
  146. ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  147. ui/static/metadata/__next.metadata.txt +1 -1
  148. ui/static/metadata/index.html +1 -1
  149. ui/static/metadata/index.txt +1 -1
  150. ui/static/quality/__next._full.txt +2 -2
  151. ui/static/quality/__next._head.txt +1 -1
  152. ui/static/quality/__next._index.txt +1 -1
  153. ui/static/quality/__next._tree.txt +1 -1
  154. ui/static/quality/__next.quality.__PAGE__.txt +2 -2
  155. ui/static/quality/__next.quality.txt +1 -1
  156. ui/static/quality/index.html +2 -2
  157. ui/static/quality/index.txt +2 -2
  158. ui/static/rules/__next._full.txt +1 -1
  159. ui/static/rules/__next._head.txt +1 -1
  160. ui/static/rules/__next._index.txt +1 -1
  161. ui/static/rules/__next._tree.txt +1 -1
  162. ui/static/rules/__next.rules.__PAGE__.txt +1 -1
  163. ui/static/rules/__next.rules.txt +1 -1
  164. ui/static/rules/index.html +1 -1
  165. ui/static/rules/index.txt +1 -1
  166. ui/static/schemas/__next._full.txt +1 -1
  167. ui/static/schemas/__next._head.txt +1 -1
  168. ui/static/schemas/__next._index.txt +1 -1
  169. ui/static/schemas/__next._tree.txt +1 -1
  170. ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  171. ui/static/schemas/__next.schemas.txt +1 -1
  172. ui/static/schemas/index.html +1 -1
  173. ui/static/schemas/index.txt +1 -1
  174. ui/static/settings/__next._full.txt +1 -1
  175. ui/static/settings/__next._head.txt +1 -1
  176. ui/static/settings/__next._index.txt +1 -1
  177. ui/static/settings/__next._tree.txt +1 -1
  178. ui/static/settings/__next.settings.__PAGE__.txt +1 -1
  179. ui/static/settings/__next.settings.txt +1 -1
  180. ui/static/settings/index.html +1 -1
  181. ui/static/settings/index.txt +1 -1
  182. ui/static/static/404/index.html +1 -1
  183. ui/static/static/404.html +1 -1
  184. ui/static/static/__next.__PAGE__.txt +1 -1
  185. ui/static/static/__next._full.txt +2 -2
  186. ui/static/static/__next._head.txt +1 -1
  187. ui/static/static/__next._index.txt +2 -2
  188. ui/static/static/__next._tree.txt +2 -2
  189. ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
  190. ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
  191. ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
  192. ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
  193. ui/static/static/_not-found/__next._full.txt +2 -2
  194. ui/static/static/_not-found/__next._head.txt +1 -1
  195. ui/static/static/_not-found/__next._index.txt +2 -2
  196. ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  197. ui/static/static/_not-found/__next._not-found.txt +1 -1
  198. ui/static/static/_not-found/__next._tree.txt +2 -2
  199. ui/static/static/_not-found/index.html +1 -1
  200. ui/static/static/_not-found/index.txt +2 -2
  201. ui/static/static/contracts/__next._full.txt +3 -3
  202. ui/static/static/contracts/__next._head.txt +1 -1
  203. ui/static/static/contracts/__next._index.txt +2 -2
  204. ui/static/static/contracts/__next._tree.txt +2 -2
  205. ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  206. ui/static/static/contracts/__next.contracts.txt +1 -1
  207. ui/static/static/contracts/index.html +1 -1
  208. ui/static/static/contracts/index.txt +3 -3
  209. ui/static/static/documentation/__next._full.txt +3 -3
  210. ui/static/static/documentation/__next._head.txt +1 -1
  211. ui/static/static/documentation/__next._index.txt +2 -2
  212. ui/static/static/documentation/__next._tree.txt +2 -2
  213. ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
  214. ui/static/static/documentation/__next.documentation.txt +1 -1
  215. ui/static/static/documentation/index.html +2 -2
  216. ui/static/static/documentation/index.txt +3 -3
  217. ui/static/static/index.html +1 -1
  218. ui/static/static/index.txt +2 -2
  219. ui/static/static/metadata/__next._full.txt +2 -2
  220. ui/static/static/metadata/__next._head.txt +1 -1
  221. ui/static/static/metadata/__next._index.txt +2 -2
  222. ui/static/static/metadata/__next._tree.txt +2 -2
  223. ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  224. ui/static/static/metadata/__next.metadata.txt +1 -1
  225. ui/static/static/metadata/index.html +1 -1
  226. ui/static/static/metadata/index.txt +2 -2
  227. ui/static/static/quality/__next._full.txt +2 -2
  228. ui/static/static/quality/__next._head.txt +1 -1
  229. ui/static/static/quality/__next._index.txt +2 -2
  230. ui/static/static/quality/__next._tree.txt +2 -2
  231. ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
  232. ui/static/static/quality/__next.quality.txt +1 -1
  233. ui/static/static/quality/index.html +2 -2
  234. ui/static/static/quality/index.txt +2 -2
  235. ui/static/static/rules/__next._full.txt +2 -2
  236. ui/static/static/rules/__next._head.txt +1 -1
  237. ui/static/static/rules/__next._index.txt +2 -2
  238. ui/static/static/rules/__next._tree.txt +2 -2
  239. ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
  240. ui/static/static/rules/__next.rules.txt +1 -1
  241. ui/static/static/rules/index.html +1 -1
  242. ui/static/static/rules/index.txt +2 -2
  243. ui/static/static/schemas/__next._full.txt +2 -2
  244. ui/static/static/schemas/__next._head.txt +1 -1
  245. ui/static/static/schemas/__next._index.txt +2 -2
  246. ui/static/static/schemas/__next._tree.txt +2 -2
  247. ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  248. ui/static/static/schemas/__next.schemas.txt +1 -1
  249. ui/static/static/schemas/index.html +1 -1
  250. ui/static/static/schemas/index.txt +2 -2
  251. ui/static/static/settings/__next._full.txt +2 -2
  252. ui/static/static/settings/__next._head.txt +1 -1
  253. ui/static/static/settings/__next._index.txt +2 -2
  254. ui/static/static/settings/__next._tree.txt +2 -2
  255. ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
  256. ui/static/static/settings/__next.settings.txt +1 -1
  257. ui/static/static/settings/index.html +1 -1
  258. ui/static/static/settings/index.txt +2 -2
  259. ui/static/static/static/.gitkeep +0 -0
  260. ui/static/static/static/404/index.html +1 -0
  261. ui/static/static/static/404.html +1 -0
  262. ui/static/static/static/__next.__PAGE__.txt +10 -0
  263. ui/static/static/static/__next._full.txt +30 -0
  264. ui/static/static/static/__next._head.txt +7 -0
  265. ui/static/static/static/__next._index.txt +9 -0
  266. ui/static/static/static/__next._tree.txt +2 -0
  267. ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
  268. ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
  269. ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
  270. ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
  271. ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
  272. ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
  273. ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
  274. ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
  275. ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
  276. ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
  277. ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
  278. ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
  279. ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
  280. ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
  281. ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
  282. ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
  283. ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
  284. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
  285. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
  286. ui/static/static/static/_not-found/__next._full.txt +17 -0
  287. ui/static/static/static/_not-found/__next._head.txt +7 -0
  288. ui/static/static/static/_not-found/__next._index.txt +9 -0
  289. ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
  290. ui/static/static/static/_not-found/__next._not-found.txt +4 -0
  291. ui/static/static/static/_not-found/__next._tree.txt +2 -0
  292. ui/static/static/static/_not-found/index.html +1 -0
  293. ui/static/static/static/_not-found/index.txt +17 -0
  294. ui/static/static/static/contracts/__next._full.txt +21 -0
  295. ui/static/static/static/contracts/__next._head.txt +7 -0
  296. ui/static/static/static/contracts/__next._index.txt +9 -0
  297. ui/static/static/static/contracts/__next._tree.txt +2 -0
  298. ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
  299. ui/static/static/static/contracts/__next.contracts.txt +4 -0
  300. ui/static/static/static/contracts/index.html +1 -0
  301. ui/static/static/static/contracts/index.txt +21 -0
  302. ui/static/static/static/documentation/__next._full.txt +21 -0
  303. ui/static/static/static/documentation/__next._head.txt +7 -0
  304. ui/static/static/static/documentation/__next._index.txt +9 -0
  305. ui/static/static/static/documentation/__next._tree.txt +2 -0
  306. ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
  307. ui/static/static/static/documentation/__next.documentation.txt +4 -0
  308. ui/static/static/static/documentation/index.html +93 -0
  309. ui/static/static/static/documentation/index.txt +21 -0
  310. ui/static/static/static/index.html +1 -0
  311. ui/static/static/static/index.txt +30 -0
  312. ui/static/static/static/metadata/__next._full.txt +21 -0
  313. ui/static/static/static/metadata/__next._head.txt +7 -0
  314. ui/static/static/static/metadata/__next._index.txt +9 -0
  315. ui/static/static/static/metadata/__next._tree.txt +2 -0
  316. ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
  317. ui/static/static/static/metadata/__next.metadata.txt +4 -0
  318. ui/static/static/static/metadata/index.html +1 -0
  319. ui/static/static/static/metadata/index.txt +21 -0
  320. ui/static/static/static/quality/__next._full.txt +21 -0
  321. ui/static/static/static/quality/__next._head.txt +7 -0
  322. ui/static/static/static/quality/__next._index.txt +9 -0
  323. ui/static/static/static/quality/__next._tree.txt +2 -0
  324. ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
  325. ui/static/static/static/quality/__next.quality.txt +4 -0
  326. ui/static/static/static/quality/index.html +2 -0
  327. ui/static/static/static/quality/index.txt +21 -0
  328. ui/static/static/static/rules/__next._full.txt +21 -0
  329. ui/static/static/static/rules/__next._head.txt +7 -0
  330. ui/static/static/static/rules/__next._index.txt +9 -0
  331. ui/static/static/static/rules/__next._tree.txt +2 -0
  332. ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
  333. ui/static/static/static/rules/__next.rules.txt +4 -0
  334. ui/static/static/static/rules/index.html +1 -0
  335. ui/static/static/static/rules/index.txt +21 -0
  336. ui/static/static/static/schemas/__next._full.txt +21 -0
  337. ui/static/static/static/schemas/__next._head.txt +7 -0
  338. ui/static/static/static/schemas/__next._index.txt +9 -0
  339. ui/static/static/static/schemas/__next._tree.txt +2 -0
  340. ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
  341. ui/static/static/static/schemas/__next.schemas.txt +4 -0
  342. ui/static/static/static/schemas/index.html +1 -0
  343. ui/static/static/static/schemas/index.txt +21 -0
  344. ui/static/static/static/settings/__next._full.txt +21 -0
  345. ui/static/static/static/settings/__next._head.txt +7 -0
  346. ui/static/static/static/settings/__next._index.txt +9 -0
  347. ui/static/static/static/settings/__next._tree.txt +2 -0
  348. ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
  349. ui/static/static/static/settings/__next.settings.txt +4 -0
  350. ui/static/static/static/settings/index.html +1 -0
  351. ui/static/static/static/settings/index.txt +21 -0
  352. ui/static/static/static/validation/__next._full.txt +21 -0
  353. ui/static/static/static/validation/__next._head.txt +7 -0
  354. ui/static/static/static/validation/__next._index.txt +9 -0
  355. ui/static/static/static/validation/__next._tree.txt +2 -0
  356. ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
  357. ui/static/static/static/validation/__next.validation.txt +4 -0
  358. ui/static/static/static/validation/index.html +1 -0
  359. ui/static/static/static/validation/index.txt +21 -0
  360. ui/static/static/validation/__next._full.txt +2 -2
  361. ui/static/static/validation/__next._head.txt +1 -1
  362. ui/static/static/validation/__next._index.txt +2 -2
  363. ui/static/static/validation/__next._tree.txt +2 -2
  364. ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
  365. ui/static/static/validation/__next.validation.txt +1 -1
  366. ui/static/static/validation/index.html +1 -1
  367. ui/static/static/validation/index.txt +2 -2
  368. ui/static/validation/__next._full.txt +2 -2
  369. ui/static/validation/__next._head.txt +1 -1
  370. ui/static/validation/__next._index.txt +1 -1
  371. ui/static/validation/__next._tree.txt +1 -1
  372. ui/static/validation/__next.validation.__PAGE__.txt +2 -2
  373. ui/static/validation/__next.validation.txt +1 -1
  374. ui/static/validation/index.html +1 -1
  375. ui/static/validation/index.txt +2 -2
  376. pycharter/data/templates/template_coercion_rules.yaml +0 -15
  377. pycharter/data/templates/template_contract.yaml +0 -587
  378. pycharter/data/templates/template_metadata.yaml +0 -38
  379. pycharter/data/templates/template_schema.yaml +0 -22
  380. pycharter/data/templates/template_transform_advanced.yaml +0 -50
  381. pycharter/data/templates/template_transform_simple.yaml +0 -59
  382. pycharter/data/templates/template_validation_rules.yaml +0 -29
  383. pycharter/etl_generator/extraction.py +0 -916
  384. pycharter/etl_generator/factory.py +0 -174
  385. pycharter/etl_generator/orchestrator.py +0 -1650
  386. pycharter/integrations/__init__.py +0 -19
  387. pycharter/integrations/kafka.py +0 -178
  388. pycharter/integrations/streaming.py +0 -100
  389. pycharter-0.0.22.dist-info/RECORD +0 -358
  390. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
  391. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
  392. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
  393. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
  394. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
  395. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
  396. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
  397. /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
  398. /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
  399. /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
  400. /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
  401. /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
  402. /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
  403. /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
  404. /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
@@ -0,0 +1,54 @@
1
+ """
2
+ Protocol definitions for ETL components.
3
+
4
+ Uses Python's Protocol for structural subtyping (duck typing with type hints).
5
+ """
6
+
7
+ from typing import Any, AsyncIterator, Dict, List, Protocol, runtime_checkable
8
+
9
+ from pycharter.etl_generator.result import LoadResult
10
+
11
+
12
+ @runtime_checkable
13
+ class Extractor(Protocol):
14
+ """
15
+ Protocol for data extractors.
16
+
17
+ Extractors read data from sources (HTTP, files, databases, cloud storage)
18
+ and yield batches of records.
19
+ """
20
+
21
+ async def extract(self, **params) -> AsyncIterator[List[Dict[str, Any]]]:
22
+ """
23
+ Extract data from the source.
24
+
25
+ Yields:
26
+ Batches of records (list of dicts)
27
+ """
28
+ ...
29
+
30
+
31
+ @runtime_checkable
32
+ class Transformer(Protocol):
33
+ """
34
+ Protocol for data transformers.
35
+
36
+ Transformers process batches of records. They can be chained with |.
37
+ """
38
+
39
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
40
+ """Transform a batch of records."""
41
+ ...
42
+
43
+
44
+ @runtime_checkable
45
+ class Loader(Protocol):
46
+ """
47
+ Protocol for data loaders.
48
+
49
+ Loaders write data to destinations (databases, files, cloud storage).
50
+ """
51
+
52
+ async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
53
+ """Load data to the destination."""
54
+ ...
@@ -0,0 +1,63 @@
1
+ """
2
+ Result classes for ETL operations.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from typing import Any, Dict, List, Optional
8
+
9
+
10
+ @dataclass
11
+ class LoadResult:
12
+ """Result from a load operation."""
13
+ success: bool = True
14
+ rows_loaded: int = 0
15
+ rows_failed: int = 0
16
+ error: Optional[str] = None
17
+ duration_seconds: Optional[float] = None
18
+
19
+
20
+ @dataclass
21
+ class BatchResult:
22
+ """Result from processing a single batch."""
23
+ batch_index: int
24
+ rows_in: int = 0
25
+ rows_out: int = 0
26
+ rows_failed: int = 0
27
+ errors: List[str] = field(default_factory=list)
28
+
29
+ @property
30
+ def success(self) -> bool:
31
+ return len(self.errors) == 0 and self.rows_failed == 0
32
+
33
+
34
+ @dataclass
35
+ class PipelineResult:
36
+ """Complete result from running an ETL pipeline."""
37
+ success: bool = True
38
+ rows_extracted: int = 0
39
+ rows_transformed: int = 0
40
+ rows_loaded: int = 0
41
+ rows_failed: int = 0
42
+ start_time: Optional[datetime] = None
43
+ end_time: Optional[datetime] = None
44
+ duration_seconds: Optional[float] = None
45
+ batches_processed: int = 0
46
+ batch_results: List[BatchResult] = field(default_factory=list)
47
+ errors: List[str] = field(default_factory=list)
48
+ pipeline_name: Optional[str] = None
49
+ run_id: Optional[str] = None
50
+
51
+ def to_dict(self) -> Dict[str, Any]:
52
+ return {
53
+ "success": self.success,
54
+ "rows_extracted": self.rows_extracted,
55
+ "rows_transformed": self.rows_transformed,
56
+ "rows_loaded": self.rows_loaded,
57
+ "rows_failed": self.rows_failed,
58
+ "duration_seconds": self.duration_seconds,
59
+ "batches_processed": self.batches_processed,
60
+ "errors": self.errors,
61
+ "pipeline_name": self.pipeline_name,
62
+ "run_id": self.run_id,
63
+ }
@@ -0,0 +1,49 @@
1
+ """
2
+ JSON Schemas for ETL Pipeline Configuration.
3
+
4
+ Provides validation schemas for extract, transform, load, and pipeline configs.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Dict, Any
10
+
11
+ SCHEMA_DIR = Path(__file__).parent
12
+
13
+
14
+ def load_schema(name: str) -> Dict[str, Any]:
15
+ """Load a JSON schema by name."""
16
+ schema_path = SCHEMA_DIR / f"{name}.json"
17
+ if not schema_path.exists():
18
+ raise FileNotFoundError(f"Schema not found: {schema_path}")
19
+ with open(schema_path) as f:
20
+ return json.load(f)
21
+
22
+
23
+ def get_extract_schema() -> Dict[str, Any]:
24
+ """Get the extract config schema."""
25
+ return load_schema("extract")
26
+
27
+
28
+ def get_transform_schema() -> Dict[str, Any]:
29
+ """Get the transform config schema."""
30
+ return load_schema("transform")
31
+
32
+
33
+ def get_load_schema() -> Dict[str, Any]:
34
+ """Get the load config schema."""
35
+ return load_schema("load")
36
+
37
+
38
+ def get_pipeline_schema() -> Dict[str, Any]:
39
+ """Get the combined pipeline config schema."""
40
+ return load_schema("pipeline")
41
+
42
+
43
+ __all__ = [
44
+ "load_schema",
45
+ "get_extract_schema",
46
+ "get_transform_schema",
47
+ "get_load_schema",
48
+ "get_pipeline_schema",
49
+ ]
@@ -0,0 +1,49 @@
1
+ """
2
+ Transform stage for ETL pipelines.
3
+
4
+ Two APIs:
5
+ 1. Config-driven: apply_transforms(data, config) - uses YAML config
6
+ 2. Programmatic: Rename(...) | AddField(...) | Filter(...) - chainable
7
+
8
+ Pipeline order for config: Simple operations → JSONata → Custom function.
9
+ """
10
+
11
+ # Config-driven API
12
+ from pycharter.etl_generator.transformers.pipeline import apply_transforms
13
+
14
+ # Chainable transformers
15
+ from pycharter.etl_generator.transformers.base import (
16
+ BaseTransformer,
17
+ TransformerChain,
18
+ )
19
+ from pycharter.etl_generator.transformers.operations import (
20
+ Rename,
21
+ AddField,
22
+ Drop,
23
+ Select,
24
+ Filter,
25
+ Convert,
26
+ Default,
27
+ Map,
28
+ FlatMap,
29
+ CustomFunction,
30
+ )
31
+
32
+ __all__ = [
33
+ # Config-driven
34
+ "apply_transforms",
35
+ # Base classes
36
+ "BaseTransformer",
37
+ "TransformerChain",
38
+ # Operations
39
+ "Rename",
40
+ "AddField",
41
+ "Drop",
42
+ "Select",
43
+ "Filter",
44
+ "Convert",
45
+ "Default",
46
+ "Map",
47
+ "FlatMap",
48
+ "CustomFunction",
49
+ ]
@@ -0,0 +1,63 @@
1
+ """
2
+ Base transformer class with | operator support.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, Dict, List, Optional
7
+
8
+
9
+ class BaseTransformer(ABC):
10
+ """
11
+ Base class for chainable transformers.
12
+
13
+ Supports | operator for chaining:
14
+ >>> chain = Rename({"a": "b"}) | AddField("c", "value") | Drop(["d"])
15
+ >>> result = chain.transform(data)
16
+ """
17
+
18
+ @abstractmethod
19
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
20
+ """Transform a batch of records."""
21
+ ...
22
+
23
+ def __or__(self, other: "BaseTransformer") -> "TransformerChain":
24
+ """Chain transformers with | operator."""
25
+ if isinstance(other, TransformerChain):
26
+ return TransformerChain([self] + other.transformers)
27
+ return TransformerChain([self, other])
28
+
29
+ def __ror__(self, other: "BaseTransformer") -> "TransformerChain":
30
+ """Support other | self."""
31
+ if isinstance(other, TransformerChain):
32
+ return TransformerChain(other.transformers + [self])
33
+ return TransformerChain([other, self])
34
+
35
+
36
+ class TransformerChain(BaseTransformer):
37
+ """
38
+ Chain of transformers that processes data through each in sequence.
39
+
40
+ Created automatically when using | operator.
41
+ """
42
+
43
+ def __init__(self, transformers: Optional[List[BaseTransformer]] = None):
44
+ self.transformers: List[BaseTransformer] = list(transformers) if transformers else []
45
+
46
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
47
+ """Transform data through all transformers."""
48
+ result = data
49
+ for transformer in self.transformers:
50
+ result = transformer.transform(result)
51
+ return result
52
+
53
+ def __or__(self, other: BaseTransformer) -> "TransformerChain":
54
+ """Add transformer to chain."""
55
+ if isinstance(other, TransformerChain):
56
+ return TransformerChain(self.transformers + other.transformers)
57
+ return TransformerChain(self.transformers + [other])
58
+
59
+ def __len__(self) -> int:
60
+ return len(self.transformers)
61
+
62
+ def __iter__(self):
63
+ return iter(self.transformers)
@@ -0,0 +1,45 @@
1
+ """
2
+ Normalize transform configuration for use by transformer modules.
3
+
4
+ Supports both the canonical shape (transform: { rename, convert, ... }) and
5
+ legacy top-level keys; outputs a single normalized dict for each step.
6
+ """
7
+
8
+ from typing import Any, Dict
9
+
10
+
11
+ def normalize_transform_config(raw: Dict[str, Any]) -> Dict[str, Any]:
12
+ """
13
+ Normalize transform config so transformers see a single shape.
14
+
15
+ Canonical: transform: { rename, convert, defaults, add, select, drop }
16
+ Legacy: rename, convert, ... at top level (when 'transform' not in config)
17
+
18
+ Returns a dict with keys: simple_ops, jsonata, custom_function.
19
+ Each is a dict or None if not configured.
20
+ """
21
+ out: Dict[str, Any] = {
22
+ "simple_ops": None,
23
+ "jsonata": None,
24
+ "custom_function": None,
25
+ }
26
+
27
+ # Simple operations: merge from transform.X or top-level X
28
+ simple_ops: Dict[str, Any] = {}
29
+ if "transform" in raw:
30
+ simple_ops = dict(raw.get("transform") or {})
31
+ for key in ("rename", "convert", "defaults", "add", "select", "drop"):
32
+ if key in raw and "transform" not in raw:
33
+ val = raw.get(key)
34
+ if val is not None:
35
+ simple_ops[key] = val
36
+ if simple_ops:
37
+ out["simple_ops"] = simple_ops
38
+
39
+ if raw.get("jsonata"):
40
+ out["jsonata"] = dict(raw["jsonata"])
41
+
42
+ if raw.get("custom_function"):
43
+ out["custom_function"] = dict(raw["custom_function"])
44
+
45
+ return out
@@ -0,0 +1,101 @@
1
+ """
2
+ Custom Python function transformation.
3
+
4
+ Dynamically imports and invokes a module/function or callable path,
5
+ with optional class instantiation (optimize/run/__call__).
6
+ """
7
+
8
+ import importlib
9
+ import logging
10
+ from typing import Any, Dict, List
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def apply_custom_function(
16
+ data: List[Dict[str, Any]], config: Dict[str, Any], **kwargs: Any
17
+ ) -> List[Dict[str, Any]]:
18
+ """
19
+ Run a custom Python function for transformation.
20
+
21
+ Args:
22
+ data: Input data.
23
+ config: 'callable' ("module.func") or 'module' + 'function'.
24
+ Optional 'mode': "batch" (default) or "record".
25
+ Optional 'kwargs': dict merged with **kwargs.
26
+ **kwargs: Runtime kwargs merged with config['kwargs'].
27
+
28
+ Returns:
29
+ Transformed list of records.
30
+
31
+ Example config:
32
+ custom_function:
33
+ module: "pyoptima"
34
+ function: "optimize_from_etl_inputs"
35
+ mode: "batch"
36
+ kwargs:
37
+ method: "min_volatility"
38
+ """
39
+ callable_path = config.get("callable")
40
+ module_path = config.get("module")
41
+ func_name = config.get("function")
42
+
43
+ if callable_path:
44
+ parts = callable_path.rsplit(".", 1)
45
+ if len(parts) != 2:
46
+ raise ValueError(
47
+ f"Invalid callable path: {callable_path}. "
48
+ "Use 'module.function' format."
49
+ )
50
+ module_path, func_name = parts
51
+
52
+ if not module_path or not func_name:
53
+ raise ValueError(
54
+ "custom_function requires either 'callable' or 'module' + 'function'"
55
+ )
56
+
57
+ try:
58
+ module = importlib.import_module(module_path)
59
+ func = getattr(module, func_name)
60
+ except ImportError as e:
61
+ raise ValueError(f"Cannot import module '{module_path}': {e}") from e
62
+ except AttributeError as e:
63
+ raise ValueError(
64
+ f"Function '{func_name}' not found in module '{module_path}'"
65
+ ) from e
66
+
67
+ if isinstance(func, type):
68
+ instance = func()
69
+ if hasattr(instance, "optimize"):
70
+ func = instance.optimize
71
+ elif hasattr(instance, "run"):
72
+ func = instance.run
73
+ elif hasattr(instance, "__call__"):
74
+ func = instance
75
+ else:
76
+ raise ValueError(
77
+ f"Class '{func_name}' has no 'optimize', 'run', or '__call__'"
78
+ )
79
+
80
+ mode = config.get("mode", "batch")
81
+ func_kwargs = config.get("kwargs", {})
82
+ merged_kwargs = {**func_kwargs, **kwargs}
83
+
84
+ try:
85
+ if mode == "batch":
86
+ result = func(data, **merged_kwargs)
87
+ if result is None:
88
+ return []
89
+ return result if isinstance(result, list) else [result]
90
+ results = []
91
+ for record in data:
92
+ record_result = func(record, **merged_kwargs)
93
+ if record_result is not None:
94
+ if isinstance(record_result, list):
95
+ results.extend(record_result)
96
+ else:
97
+ results.append(record_result)
98
+ return results
99
+ except Exception as e:
100
+ logger.error("Custom function %r failed: %s", func_name, e)
101
+ raise ValueError(f"Custom function error: {e}") from e
@@ -0,0 +1,56 @@
1
+ """
2
+ JSONata-based transformation.
3
+
4
+ Applies a JSONata expression to data in batch or record mode.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any, Dict, List
9
+
10
+ import jsonata
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def apply_jsonata(
16
+ data: List[Dict[str, Any]], config: Dict[str, Any]
17
+ ) -> List[Dict[str, Any]]:
18
+ """
19
+ Apply a JSONata expression to transform data.
20
+
21
+ Args:
22
+ data: Input data (list of records).
23
+ config: Must have 'expression'. Optional 'mode': "batch" (default) or "record".
24
+
25
+ Returns:
26
+ Transformed list of records.
27
+
28
+ Example config:
29
+ jsonata:
30
+ expression: |
31
+ $.{"ticker": symbol, "avg_price": $average(prices)}
32
+ mode: "batch"
33
+ """
34
+ expression_str = config.get("expression")
35
+ if not expression_str:
36
+ return data
37
+
38
+ mode = config.get("mode", "batch")
39
+
40
+ try:
41
+ expr = jsonata.Jsonata(expression_str)
42
+
43
+ if mode == "batch":
44
+ result = expr.evaluate(data)
45
+ if result is None:
46
+ return []
47
+ return result if isinstance(result, list) else [result]
48
+ # record mode
49
+ return [
50
+ expr.evaluate(record)
51
+ for record in data
52
+ if expr.evaluate(record) is not None
53
+ ]
54
+ except Exception as e:
55
+ logger.error("JSONata transformation failed: %s", e)
56
+ raise ValueError(f"JSONata transformation error: {e}") from e
@@ -0,0 +1,218 @@
1
+ """
2
+ Built-in transformer operations.
3
+ """
4
+
5
+ from typing import Any, Callable, Dict, List, Optional, Union
6
+
7
+ from pycharter.etl_generator.transformers.base import BaseTransformer
8
+ from pycharter.etl_generator.expression import evaluate_expression, is_expression
9
+
10
+
11
+ class Rename(BaseTransformer):
12
+ """Rename fields in records."""
13
+
14
+ def __init__(self, mapping: Dict[str, str]):
15
+ self.mapping = mapping
16
+
17
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
18
+ return [
19
+ {self.mapping.get(k, k): v for k, v in record.items()}
20
+ for record in data
21
+ ]
22
+
23
+
24
+ class AddField(BaseTransformer):
25
+ """
26
+ Add a new field to records.
27
+
28
+ Supports:
29
+ - Static values: AddField("status", "active")
30
+ - Callable: AddField("full_name", lambda r: f"{r['first']} {r['last']}")
31
+ - Expressions: AddField("full_name", "${first_name} ${last_name}")
32
+ - Functions: AddField("id", "uuid()")
33
+
34
+ Expression syntax:
35
+ - ${field_name} - Reference field value
36
+ - ${field_name:-default} - Field with default
37
+ - now() - Current timestamp
38
+ - uuid() - Generate UUID
39
+ - concat(${a}, " ", ${b}) - Concatenate values
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ field: str,
45
+ value: Union[Any, Callable[[Dict[str, Any]], Any]],
46
+ evaluate_expressions: bool = True,
47
+ ):
48
+ """
49
+ Initialize AddField transformer.
50
+
51
+ Args:
52
+ field: Name of the field to add
53
+ value: Value, callable, or expression string
54
+ evaluate_expressions: If True, evaluate string expressions.
55
+ Set to False to use literal string values.
56
+ """
57
+ self.field = field
58
+ self.value = value
59
+ self.evaluate_expressions = evaluate_expressions
60
+
61
+ # Pre-check if value is an expression to optimize
62
+ self._is_expression = (
63
+ evaluate_expressions
64
+ and isinstance(value, str)
65
+ and is_expression(value)
66
+ )
67
+
68
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
69
+ result = []
70
+ for record in data:
71
+ new_record = dict(record)
72
+
73
+ if callable(self.value):
74
+ # Callable - call with record
75
+ new_record[self.field] = self.value(record)
76
+ elif self._is_expression:
77
+ # Expression - evaluate in record context
78
+ new_record[self.field] = evaluate_expression(self.value, record)
79
+ else:
80
+ # Static value
81
+ new_record[self.field] = self.value
82
+
83
+ result.append(new_record)
84
+ return result
85
+
86
+
87
+ class Drop(BaseTransformer):
88
+ """Drop fields from records."""
89
+
90
+ def __init__(self, fields: List[str]):
91
+ self.fields = set(fields)
92
+
93
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
94
+ return [
95
+ {k: v for k, v in record.items() if k not in self.fields}
96
+ for record in data
97
+ ]
98
+
99
+
100
+ class Select(BaseTransformer):
101
+ """Select only specific fields."""
102
+
103
+ def __init__(self, fields: List[str]):
104
+ self.fields = set(fields)
105
+
106
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
107
+ return [
108
+ {k: v for k, v in record.items() if k in self.fields}
109
+ for record in data
110
+ ]
111
+
112
+
113
+ class Filter(BaseTransformer):
114
+ """Filter records based on a predicate."""
115
+
116
+ def __init__(self, predicate: Callable[[Dict[str, Any]], bool]):
117
+ self.predicate = predicate
118
+
119
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
120
+ return [record for record in data if self.predicate(record)]
121
+
122
+
123
+ class Convert(BaseTransformer):
124
+ """Convert field types."""
125
+
126
+ def __init__(self, conversions: Dict[str, Callable[[Any], Any]], errors: str = "ignore"):
127
+ self.conversions = conversions
128
+ self.errors = errors
129
+
130
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
131
+ result = []
132
+ for record in data:
133
+ new_record = dict(record)
134
+ for field, converter in self.conversions.items():
135
+ if field in new_record:
136
+ try:
137
+ new_record[field] = converter(new_record[field])
138
+ except (ValueError, TypeError):
139
+ if self.errors == "raise":
140
+ raise
141
+ elif self.errors == "null":
142
+ new_record[field] = None
143
+ result.append(new_record)
144
+ return result
145
+
146
+
147
+ class Default(BaseTransformer):
148
+ """Set default values for missing or null fields."""
149
+
150
+ def __init__(self, defaults: Dict[str, Any], replace_null: bool = True):
151
+ self.defaults = defaults
152
+ self.replace_null = replace_null
153
+
154
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
155
+ result = []
156
+ for record in data:
157
+ new_record = dict(record)
158
+ for field, default in self.defaults.items():
159
+ if field not in new_record:
160
+ new_record[field] = default
161
+ elif self.replace_null and new_record[field] is None:
162
+ new_record[field] = default
163
+ result.append(new_record)
164
+ return result
165
+
166
+
167
+ class Map(BaseTransformer):
168
+ """Apply a function to each record."""
169
+
170
+ def __init__(self, func: Callable[[Dict[str, Any]], Dict[str, Any]]):
171
+ self.func = func
172
+
173
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
174
+ return [self.func(record) for record in data]
175
+
176
+
177
+ class FlatMap(BaseTransformer):
178
+ """Apply a function that returns multiple records per input."""
179
+
180
+ def __init__(self, func: Callable[[Dict[str, Any]], List[Dict[str, Any]]]):
181
+ self.func = func
182
+
183
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
184
+ result = []
185
+ for record in data:
186
+ result.extend(self.func(record))
187
+ return result
188
+
189
+
190
+ class CustomFunction(BaseTransformer):
191
+ """Run a custom Python function on data."""
192
+
193
+ def __init__(
194
+ self,
195
+ module: Optional[str] = None,
196
+ function: Optional[str] = None,
197
+ func: Optional[Callable[[List[Dict[str, Any]]], List[Dict[str, Any]]]] = None,
198
+ kwargs: Optional[Dict[str, Any]] = None,
199
+ ):
200
+ self.module = module
201
+ self.function = function
202
+ self._func = func
203
+ self.kwargs = kwargs or {}
204
+
205
+ if func is not None:
206
+ self._resolved_func = func
207
+ elif module and function:
208
+ self._resolved_func = self._import_function(module, function)
209
+ else:
210
+ raise ValueError("Must provide either 'func' or both 'module' and 'function'")
211
+
212
+ def _import_function(self, module: str, function: str) -> Callable:
213
+ import importlib
214
+ mod = importlib.import_module(module)
215
+ return getattr(mod, function)
216
+
217
+ def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
218
+ return self._resolved_func(data, **self.kwargs)