pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. api/main.py +27 -1
  2. api/models/docs.py +68 -0
  3. api/models/evolution.py +117 -0
  4. api/models/tracking.py +111 -0
  5. api/models/validation.py +46 -6
  6. api/routes/v1/__init__.py +14 -1
  7. api/routes/v1/docs.py +187 -0
  8. api/routes/v1/evolution.py +337 -0
  9. api/routes/v1/templates.py +211 -27
  10. api/routes/v1/tracking.py +301 -0
  11. api/routes/v1/validation.py +68 -31
  12. pycharter/__init__.py +268 -58
  13. pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
  14. pycharter/data/templates/contract/template_contract.yaml +122 -0
  15. pycharter/data/templates/contract/template_metadata.yaml +68 -0
  16. pycharter/data/templates/contract/template_schema.yaml +100 -0
  17. pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
  18. pycharter/data/templates/etl/README.md +224 -0
  19. pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
  20. pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
  21. pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
  22. pycharter/data/templates/etl/extract_database.yaml +34 -0
  23. pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
  24. pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
  25. pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
  26. pycharter/data/templates/etl/extract_file_json.yaml +24 -0
  27. pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
  28. pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
  29. pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
  30. pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
  31. pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
  32. pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
  33. pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
  34. pycharter/data/templates/etl/load_file.yaml +34 -0
  35. pycharter/data/templates/etl/load_insert.yaml +18 -0
  36. pycharter/data/templates/etl/load_postgresql.yaml +39 -0
  37. pycharter/data/templates/etl/load_sqlite.yaml +21 -0
  38. pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
  39. pycharter/data/templates/etl/load_upsert.yaml +25 -0
  40. pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
  41. pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
  42. pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
  43. pycharter/data/templates/etl/transform_combined.yaml +48 -0
  44. pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
  45. pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
  46. pycharter/data/templates/etl/transform_simple.yaml +59 -0
  47. pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
  48. pycharter/docs_generator/__init__.py +43 -0
  49. pycharter/docs_generator/generator.py +465 -0
  50. pycharter/docs_generator/renderers.py +247 -0
  51. pycharter/etl_generator/__init__.py +168 -80
  52. pycharter/etl_generator/builder.py +121 -0
  53. pycharter/etl_generator/config_loader.py +394 -0
  54. pycharter/etl_generator/config_validator.py +418 -0
  55. pycharter/etl_generator/context.py +132 -0
  56. pycharter/etl_generator/expression.py +499 -0
  57. pycharter/etl_generator/extractors/__init__.py +30 -0
  58. pycharter/etl_generator/extractors/base.py +70 -0
  59. pycharter/etl_generator/extractors/cloud_storage.py +530 -0
  60. pycharter/etl_generator/extractors/database.py +221 -0
  61. pycharter/etl_generator/extractors/factory.py +185 -0
  62. pycharter/etl_generator/extractors/file.py +475 -0
  63. pycharter/etl_generator/extractors/http.py +895 -0
  64. pycharter/etl_generator/extractors/streaming.py +57 -0
  65. pycharter/etl_generator/loaders/__init__.py +41 -0
  66. pycharter/etl_generator/loaders/base.py +35 -0
  67. pycharter/etl_generator/loaders/cloud.py +87 -0
  68. pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
  69. pycharter/etl_generator/loaders/database.py +274 -0
  70. pycharter/etl_generator/loaders/factory.py +180 -0
  71. pycharter/etl_generator/loaders/file.py +72 -0
  72. pycharter/etl_generator/loaders/file_loader.py +130 -0
  73. pycharter/etl_generator/pipeline.py +743 -0
  74. pycharter/etl_generator/protocols.py +54 -0
  75. pycharter/etl_generator/result.py +63 -0
  76. pycharter/etl_generator/schemas/__init__.py +49 -0
  77. pycharter/etl_generator/transformers/__init__.py +49 -0
  78. pycharter/etl_generator/transformers/base.py +63 -0
  79. pycharter/etl_generator/transformers/config.py +45 -0
  80. pycharter/etl_generator/transformers/custom_function.py +101 -0
  81. pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
  82. pycharter/etl_generator/transformers/operations.py +218 -0
  83. pycharter/etl_generator/transformers/pipeline.py +54 -0
  84. pycharter/etl_generator/transformers/simple_operations.py +131 -0
  85. pycharter/quality/__init__.py +25 -0
  86. pycharter/quality/tracking/__init__.py +64 -0
  87. pycharter/quality/tracking/collector.py +318 -0
  88. pycharter/quality/tracking/exporters.py +238 -0
  89. pycharter/quality/tracking/models.py +194 -0
  90. pycharter/quality/tracking/store.py +385 -0
  91. pycharter/runtime_validator/__init__.py +20 -7
  92. pycharter/runtime_validator/builder.py +328 -0
  93. pycharter/runtime_validator/validator.py +311 -7
  94. pycharter/runtime_validator/validator_core.py +61 -0
  95. pycharter/schema_evolution/__init__.py +61 -0
  96. pycharter/schema_evolution/compatibility.py +270 -0
  97. pycharter/schema_evolution/diff.py +496 -0
  98. pycharter/schema_evolution/models.py +201 -0
  99. pycharter/shared/__init__.py +56 -0
  100. pycharter/shared/errors.py +296 -0
  101. pycharter/shared/protocols.py +234 -0
  102. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
  103. pycharter-0.0.24.dist-info/RECORD +543 -0
  104. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
  105. ui/static/404/index.html +1 -1
  106. ui/static/404.html +1 -1
  107. ui/static/__next.__PAGE__.txt +1 -1
  108. ui/static/__next._full.txt +1 -1
  109. ui/static/__next._head.txt +1 -1
  110. ui/static/__next._index.txt +1 -1
  111. ui/static/__next._tree.txt +1 -1
  112. ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
  113. ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
  114. ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
  115. ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
  116. ui/static/_not-found/__next._full.txt +1 -1
  117. ui/static/_not-found/__next._head.txt +1 -1
  118. ui/static/_not-found/__next._index.txt +1 -1
  119. ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  120. ui/static/_not-found/__next._not-found.txt +1 -1
  121. ui/static/_not-found/__next._tree.txt +1 -1
  122. ui/static/_not-found/index.html +1 -1
  123. ui/static/_not-found/index.txt +1 -1
  124. ui/static/contracts/__next._full.txt +2 -2
  125. ui/static/contracts/__next._head.txt +1 -1
  126. ui/static/contracts/__next._index.txt +1 -1
  127. ui/static/contracts/__next._tree.txt +1 -1
  128. ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  129. ui/static/contracts/__next.contracts.txt +1 -1
  130. ui/static/contracts/index.html +1 -1
  131. ui/static/contracts/index.txt +2 -2
  132. ui/static/documentation/__next._full.txt +1 -1
  133. ui/static/documentation/__next._head.txt +1 -1
  134. ui/static/documentation/__next._index.txt +1 -1
  135. ui/static/documentation/__next._tree.txt +1 -1
  136. ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
  137. ui/static/documentation/__next.documentation.txt +1 -1
  138. ui/static/documentation/index.html +2 -2
  139. ui/static/documentation/index.txt +1 -1
  140. ui/static/index.html +1 -1
  141. ui/static/index.txt +1 -1
  142. ui/static/metadata/__next._full.txt +1 -1
  143. ui/static/metadata/__next._head.txt +1 -1
  144. ui/static/metadata/__next._index.txt +1 -1
  145. ui/static/metadata/__next._tree.txt +1 -1
  146. ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  147. ui/static/metadata/__next.metadata.txt +1 -1
  148. ui/static/metadata/index.html +1 -1
  149. ui/static/metadata/index.txt +1 -1
  150. ui/static/quality/__next._full.txt +2 -2
  151. ui/static/quality/__next._head.txt +1 -1
  152. ui/static/quality/__next._index.txt +1 -1
  153. ui/static/quality/__next._tree.txt +1 -1
  154. ui/static/quality/__next.quality.__PAGE__.txt +2 -2
  155. ui/static/quality/__next.quality.txt +1 -1
  156. ui/static/quality/index.html +2 -2
  157. ui/static/quality/index.txt +2 -2
  158. ui/static/rules/__next._full.txt +1 -1
  159. ui/static/rules/__next._head.txt +1 -1
  160. ui/static/rules/__next._index.txt +1 -1
  161. ui/static/rules/__next._tree.txt +1 -1
  162. ui/static/rules/__next.rules.__PAGE__.txt +1 -1
  163. ui/static/rules/__next.rules.txt +1 -1
  164. ui/static/rules/index.html +1 -1
  165. ui/static/rules/index.txt +1 -1
  166. ui/static/schemas/__next._full.txt +1 -1
  167. ui/static/schemas/__next._head.txt +1 -1
  168. ui/static/schemas/__next._index.txt +1 -1
  169. ui/static/schemas/__next._tree.txt +1 -1
  170. ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  171. ui/static/schemas/__next.schemas.txt +1 -1
  172. ui/static/schemas/index.html +1 -1
  173. ui/static/schemas/index.txt +1 -1
  174. ui/static/settings/__next._full.txt +1 -1
  175. ui/static/settings/__next._head.txt +1 -1
  176. ui/static/settings/__next._index.txt +1 -1
  177. ui/static/settings/__next._tree.txt +1 -1
  178. ui/static/settings/__next.settings.__PAGE__.txt +1 -1
  179. ui/static/settings/__next.settings.txt +1 -1
  180. ui/static/settings/index.html +1 -1
  181. ui/static/settings/index.txt +1 -1
  182. ui/static/static/404/index.html +1 -1
  183. ui/static/static/404.html +1 -1
  184. ui/static/static/__next.__PAGE__.txt +1 -1
  185. ui/static/static/__next._full.txt +2 -2
  186. ui/static/static/__next._head.txt +1 -1
  187. ui/static/static/__next._index.txt +2 -2
  188. ui/static/static/__next._tree.txt +2 -2
  189. ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
  190. ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
  191. ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
  192. ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
  193. ui/static/static/_not-found/__next._full.txt +2 -2
  194. ui/static/static/_not-found/__next._head.txt +1 -1
  195. ui/static/static/_not-found/__next._index.txt +2 -2
  196. ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  197. ui/static/static/_not-found/__next._not-found.txt +1 -1
  198. ui/static/static/_not-found/__next._tree.txt +2 -2
  199. ui/static/static/_not-found/index.html +1 -1
  200. ui/static/static/_not-found/index.txt +2 -2
  201. ui/static/static/contracts/__next._full.txt +3 -3
  202. ui/static/static/contracts/__next._head.txt +1 -1
  203. ui/static/static/contracts/__next._index.txt +2 -2
  204. ui/static/static/contracts/__next._tree.txt +2 -2
  205. ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  206. ui/static/static/contracts/__next.contracts.txt +1 -1
  207. ui/static/static/contracts/index.html +1 -1
  208. ui/static/static/contracts/index.txt +3 -3
  209. ui/static/static/documentation/__next._full.txt +3 -3
  210. ui/static/static/documentation/__next._head.txt +1 -1
  211. ui/static/static/documentation/__next._index.txt +2 -2
  212. ui/static/static/documentation/__next._tree.txt +2 -2
  213. ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
  214. ui/static/static/documentation/__next.documentation.txt +1 -1
  215. ui/static/static/documentation/index.html +2 -2
  216. ui/static/static/documentation/index.txt +3 -3
  217. ui/static/static/index.html +1 -1
  218. ui/static/static/index.txt +2 -2
  219. ui/static/static/metadata/__next._full.txt +2 -2
  220. ui/static/static/metadata/__next._head.txt +1 -1
  221. ui/static/static/metadata/__next._index.txt +2 -2
  222. ui/static/static/metadata/__next._tree.txt +2 -2
  223. ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  224. ui/static/static/metadata/__next.metadata.txt +1 -1
  225. ui/static/static/metadata/index.html +1 -1
  226. ui/static/static/metadata/index.txt +2 -2
  227. ui/static/static/quality/__next._full.txt +2 -2
  228. ui/static/static/quality/__next._head.txt +1 -1
  229. ui/static/static/quality/__next._index.txt +2 -2
  230. ui/static/static/quality/__next._tree.txt +2 -2
  231. ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
  232. ui/static/static/quality/__next.quality.txt +1 -1
  233. ui/static/static/quality/index.html +2 -2
  234. ui/static/static/quality/index.txt +2 -2
  235. ui/static/static/rules/__next._full.txt +2 -2
  236. ui/static/static/rules/__next._head.txt +1 -1
  237. ui/static/static/rules/__next._index.txt +2 -2
  238. ui/static/static/rules/__next._tree.txt +2 -2
  239. ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
  240. ui/static/static/rules/__next.rules.txt +1 -1
  241. ui/static/static/rules/index.html +1 -1
  242. ui/static/static/rules/index.txt +2 -2
  243. ui/static/static/schemas/__next._full.txt +2 -2
  244. ui/static/static/schemas/__next._head.txt +1 -1
  245. ui/static/static/schemas/__next._index.txt +2 -2
  246. ui/static/static/schemas/__next._tree.txt +2 -2
  247. ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  248. ui/static/static/schemas/__next.schemas.txt +1 -1
  249. ui/static/static/schemas/index.html +1 -1
  250. ui/static/static/schemas/index.txt +2 -2
  251. ui/static/static/settings/__next._full.txt +2 -2
  252. ui/static/static/settings/__next._head.txt +1 -1
  253. ui/static/static/settings/__next._index.txt +2 -2
  254. ui/static/static/settings/__next._tree.txt +2 -2
  255. ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
  256. ui/static/static/settings/__next.settings.txt +1 -1
  257. ui/static/static/settings/index.html +1 -1
  258. ui/static/static/settings/index.txt +2 -2
  259. ui/static/static/static/.gitkeep +0 -0
  260. ui/static/static/static/404/index.html +1 -0
  261. ui/static/static/static/404.html +1 -0
  262. ui/static/static/static/__next.__PAGE__.txt +10 -0
  263. ui/static/static/static/__next._full.txt +30 -0
  264. ui/static/static/static/__next._head.txt +7 -0
  265. ui/static/static/static/__next._index.txt +9 -0
  266. ui/static/static/static/__next._tree.txt +2 -0
  267. ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
  268. ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
  269. ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
  270. ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
  271. ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
  272. ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
  273. ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
  274. ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
  275. ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
  276. ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
  277. ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
  278. ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
  279. ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
  280. ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
  281. ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
  282. ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
  283. ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
  284. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
  285. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
  286. ui/static/static/static/_not-found/__next._full.txt +17 -0
  287. ui/static/static/static/_not-found/__next._head.txt +7 -0
  288. ui/static/static/static/_not-found/__next._index.txt +9 -0
  289. ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
  290. ui/static/static/static/_not-found/__next._not-found.txt +4 -0
  291. ui/static/static/static/_not-found/__next._tree.txt +2 -0
  292. ui/static/static/static/_not-found/index.html +1 -0
  293. ui/static/static/static/_not-found/index.txt +17 -0
  294. ui/static/static/static/contracts/__next._full.txt +21 -0
  295. ui/static/static/static/contracts/__next._head.txt +7 -0
  296. ui/static/static/static/contracts/__next._index.txt +9 -0
  297. ui/static/static/static/contracts/__next._tree.txt +2 -0
  298. ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
  299. ui/static/static/static/contracts/__next.contracts.txt +4 -0
  300. ui/static/static/static/contracts/index.html +1 -0
  301. ui/static/static/static/contracts/index.txt +21 -0
  302. ui/static/static/static/documentation/__next._full.txt +21 -0
  303. ui/static/static/static/documentation/__next._head.txt +7 -0
  304. ui/static/static/static/documentation/__next._index.txt +9 -0
  305. ui/static/static/static/documentation/__next._tree.txt +2 -0
  306. ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
  307. ui/static/static/static/documentation/__next.documentation.txt +4 -0
  308. ui/static/static/static/documentation/index.html +93 -0
  309. ui/static/static/static/documentation/index.txt +21 -0
  310. ui/static/static/static/index.html +1 -0
  311. ui/static/static/static/index.txt +30 -0
  312. ui/static/static/static/metadata/__next._full.txt +21 -0
  313. ui/static/static/static/metadata/__next._head.txt +7 -0
  314. ui/static/static/static/metadata/__next._index.txt +9 -0
  315. ui/static/static/static/metadata/__next._tree.txt +2 -0
  316. ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
  317. ui/static/static/static/metadata/__next.metadata.txt +4 -0
  318. ui/static/static/static/metadata/index.html +1 -0
  319. ui/static/static/static/metadata/index.txt +21 -0
  320. ui/static/static/static/quality/__next._full.txt +21 -0
  321. ui/static/static/static/quality/__next._head.txt +7 -0
  322. ui/static/static/static/quality/__next._index.txt +9 -0
  323. ui/static/static/static/quality/__next._tree.txt +2 -0
  324. ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
  325. ui/static/static/static/quality/__next.quality.txt +4 -0
  326. ui/static/static/static/quality/index.html +2 -0
  327. ui/static/static/static/quality/index.txt +21 -0
  328. ui/static/static/static/rules/__next._full.txt +21 -0
  329. ui/static/static/static/rules/__next._head.txt +7 -0
  330. ui/static/static/static/rules/__next._index.txt +9 -0
  331. ui/static/static/static/rules/__next._tree.txt +2 -0
  332. ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
  333. ui/static/static/static/rules/__next.rules.txt +4 -0
  334. ui/static/static/static/rules/index.html +1 -0
  335. ui/static/static/static/rules/index.txt +21 -0
  336. ui/static/static/static/schemas/__next._full.txt +21 -0
  337. ui/static/static/static/schemas/__next._head.txt +7 -0
  338. ui/static/static/static/schemas/__next._index.txt +9 -0
  339. ui/static/static/static/schemas/__next._tree.txt +2 -0
  340. ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
  341. ui/static/static/static/schemas/__next.schemas.txt +4 -0
  342. ui/static/static/static/schemas/index.html +1 -0
  343. ui/static/static/static/schemas/index.txt +21 -0
  344. ui/static/static/static/settings/__next._full.txt +21 -0
  345. ui/static/static/static/settings/__next._head.txt +7 -0
  346. ui/static/static/static/settings/__next._index.txt +9 -0
  347. ui/static/static/static/settings/__next._tree.txt +2 -0
  348. ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
  349. ui/static/static/static/settings/__next.settings.txt +4 -0
  350. ui/static/static/static/settings/index.html +1 -0
  351. ui/static/static/static/settings/index.txt +21 -0
  352. ui/static/static/static/validation/__next._full.txt +21 -0
  353. ui/static/static/static/validation/__next._head.txt +7 -0
  354. ui/static/static/static/validation/__next._index.txt +9 -0
  355. ui/static/static/static/validation/__next._tree.txt +2 -0
  356. ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
  357. ui/static/static/static/validation/__next.validation.txt +4 -0
  358. ui/static/static/static/validation/index.html +1 -0
  359. ui/static/static/static/validation/index.txt +21 -0
  360. ui/static/static/validation/__next._full.txt +2 -2
  361. ui/static/static/validation/__next._head.txt +1 -1
  362. ui/static/static/validation/__next._index.txt +2 -2
  363. ui/static/static/validation/__next._tree.txt +2 -2
  364. ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
  365. ui/static/static/validation/__next.validation.txt +1 -1
  366. ui/static/static/validation/index.html +1 -1
  367. ui/static/static/validation/index.txt +2 -2
  368. ui/static/validation/__next._full.txt +2 -2
  369. ui/static/validation/__next._head.txt +1 -1
  370. ui/static/validation/__next._index.txt +1 -1
  371. ui/static/validation/__next._tree.txt +1 -1
  372. ui/static/validation/__next.validation.__PAGE__.txt +2 -2
  373. ui/static/validation/__next.validation.txt +1 -1
  374. ui/static/validation/index.html +1 -1
  375. ui/static/validation/index.txt +2 -2
  376. pycharter/data/templates/template_coercion_rules.yaml +0 -15
  377. pycharter/data/templates/template_contract.yaml +0 -587
  378. pycharter/data/templates/template_metadata.yaml +0 -38
  379. pycharter/data/templates/template_schema.yaml +0 -22
  380. pycharter/data/templates/template_transform_advanced.yaml +0 -50
  381. pycharter/data/templates/template_transform_simple.yaml +0 -59
  382. pycharter/data/templates/template_validation_rules.yaml +0 -29
  383. pycharter/etl_generator/extraction.py +0 -916
  384. pycharter/etl_generator/factory.py +0 -174
  385. pycharter/etl_generator/orchestrator.py +0 -1650
  386. pycharter/integrations/__init__.py +0 -19
  387. pycharter/integrations/kafka.py +0 -178
  388. pycharter/integrations/streaming.py +0 -100
  389. pycharter-0.0.22.dist-info/RECORD +0 -358
  390. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
  391. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
  392. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
  393. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
  394. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
  395. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
  396. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
  397. /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
  398. /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
  399. /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
  400. /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
  401. /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
  402. /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
  403. /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
  404. /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
@@ -0,0 +1,274 @@
1
+ """
2
+ Database loaders for ETL pipelines.
3
+ """
4
+
5
+ import logging
6
+ import time
7
+ from typing import Any, Dict, List, Optional, Union
8
+
9
+ from sqlalchemy import create_engine, text
10
+ from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
11
+ from sqlalchemy.orm import sessionmaker
12
+
13
+ from pycharter.etl_generator.database import (
14
+ detect_database_type,
15
+ create_ssh_tunnel,
16
+ modify_url_for_tunnel,
17
+ load_data_postgresql,
18
+ load_data_mysql,
19
+ load_data_sqlite,
20
+ load_data_mssql,
21
+ DEFAULT_TUNNEL_LOCAL_PORT,
22
+ DB_POSTGRESQL,
23
+ DB_MYSQL,
24
+ DB_SQLITE,
25
+ DB_MSSQL,
26
+ )
27
+ from pycharter.etl_generator.loaders.base import BaseLoader
28
+ from pycharter.etl_generator.result import LoadResult
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class PostgresLoader(BaseLoader):
34
+ """
35
+ Loader for PostgreSQL databases.
36
+
37
+ Supports:
38
+ - Insert, upsert, replace, update, delete, truncate_and_load
39
+ - Bulk operations for efficiency
40
+ - SSH tunneling
41
+
42
+ Example:
43
+ >>> loader = PostgresLoader(
44
+ ... connection_string="postgresql://user:pass@localhost/db",
45
+ ... table="users",
46
+ ... write_method="upsert",
47
+ ... primary_key="id",
48
+ ... )
49
+ >>> result = await loader.load(data)
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ connection_string: str,
55
+ table: str,
56
+ schema: str = "public",
57
+ write_method: str = "upsert",
58
+ primary_key: Optional[Union[str, List[str]]] = None,
59
+ batch_size: int = 1000,
60
+ ssh_tunnel: Optional[Dict[str, Any]] = None,
61
+ ):
62
+ self.connection_string = connection_string
63
+ self.table = table
64
+ self.schema = schema
65
+ self.write_method = write_method
66
+ self.primary_key = primary_key
67
+ self.batch_size = batch_size
68
+ self.ssh_tunnel = ssh_tunnel
69
+
70
+ @classmethod
71
+ def from_config(cls, config: Dict[str, Any]) -> "PostgresLoader":
72
+ """Create loader from configuration dict."""
73
+ db_config = config.get("database", {})
74
+ return cls(
75
+ connection_string=db_config.get("url") or config.get("connection_string"),
76
+ table=db_config.get("table") or config.get("table"),
77
+ schema=db_config.get("schema", config.get("schema", "public")),
78
+ write_method=db_config.get("write_method", config.get("write_method", "upsert")),
79
+ primary_key=db_config.get("primary_key") or config.get("primary_key"),
80
+ batch_size=config.get("batch_size", 1000),
81
+ ssh_tunnel=db_config.get("ssh_tunnel"),
82
+ )
83
+
84
+ async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
85
+ """Load data to PostgreSQL."""
86
+ start_time = time.time()
87
+
88
+ if not data:
89
+ return LoadResult(success=True, rows_loaded=0)
90
+
91
+ # Handle SSH tunnel if configured
92
+ tunnel = None
93
+ connection_string = self.connection_string
94
+
95
+ if self.ssh_tunnel and self.ssh_tunnel.get("enabled"):
96
+ tunnel = create_ssh_tunnel(self.ssh_tunnel)
97
+ if tunnel:
98
+ local_port = int(self.ssh_tunnel.get("local_port", DEFAULT_TUNNEL_LOCAL_PORT))
99
+ connection_string = modify_url_for_tunnel(
100
+ connection_string, local_port, DB_POSTGRESQL
101
+ )
102
+
103
+ try:
104
+ # Use async engine for PostgreSQL
105
+ # Convert sync URL to async if needed
106
+ if "+asyncpg" not in connection_string:
107
+ async_url = connection_string.replace("postgresql://", "postgresql+asyncpg://")
108
+ else:
109
+ async_url = connection_string
110
+
111
+ engine = create_async_engine(async_url, echo=False)
112
+ async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
113
+
114
+ async with async_session() as session:
115
+ result = await load_data_postgresql(
116
+ data=data,
117
+ session=session,
118
+ schema_name=self.schema,
119
+ table_name=self.table,
120
+ write_method=self.write_method,
121
+ primary_key=self.primary_key,
122
+ batch_size=self.batch_size,
123
+ )
124
+
125
+ await engine.dispose()
126
+
127
+ duration = time.time() - start_time
128
+ logger.info(f"Loaded {result['total']} records to {self.schema}.{self.table} in {duration:.2f}s")
129
+
130
+ return LoadResult(
131
+ success=True,
132
+ rows_loaded=result.get("inserted", 0) + result.get("updated", 0),
133
+ duration_seconds=duration,
134
+ )
135
+
136
+ except Exception as e:
137
+ logger.error(f"PostgreSQL load failed: {e}", exc_info=True)
138
+ return LoadResult(
139
+ success=False,
140
+ error=str(e),
141
+ duration_seconds=time.time() - start_time,
142
+ )
143
+ finally:
144
+ if tunnel:
145
+ tunnel.stop()
146
+
147
+
148
+ class DatabaseLoader(BaseLoader):
149
+ """
150
+ Generic database loader that auto-detects database type.
151
+
152
+ Supports PostgreSQL, MySQL, SQLite, and MSSQL.
153
+
154
+ Example:
155
+ >>> loader = DatabaseLoader(
156
+ ... connection_string="mysql://user:pass@localhost/db",
157
+ ... table="users",
158
+ ... )
159
+ >>> result = await loader.load(data)
160
+ """
161
+
162
+ def __init__(
163
+ self,
164
+ connection_string: str,
165
+ table: str,
166
+ schema: Optional[str] = None,
167
+ write_method: str = "upsert",
168
+ primary_key: Optional[Union[str, List[str]]] = None,
169
+ batch_size: int = 1000,
170
+ ssh_tunnel: Optional[Dict[str, Any]] = None,
171
+ ):
172
+ self.connection_string = connection_string
173
+ self.table = table
174
+ self.schema = schema
175
+ self.write_method = write_method
176
+ self.primary_key = primary_key
177
+ self.batch_size = batch_size
178
+ self.ssh_tunnel = ssh_tunnel
179
+ self.db_type = detect_database_type(connection_string)
180
+
181
+ @classmethod
182
+ def from_config(cls, config: Dict[str, Any]) -> "DatabaseLoader":
183
+ """Create loader from configuration dict."""
184
+ db_config = config.get("database", {})
185
+ return cls(
186
+ connection_string=db_config.get("url") or config.get("connection_string"),
187
+ table=db_config.get("table") or config.get("table"),
188
+ schema=db_config.get("schema") or config.get("schema"),
189
+ write_method=db_config.get("write_method", config.get("write_method", "upsert")),
190
+ primary_key=db_config.get("primary_key") or config.get("primary_key"),
191
+ batch_size=config.get("batch_size", 1000),
192
+ ssh_tunnel=db_config.get("ssh_tunnel"),
193
+ )
194
+
195
+ async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
196
+ """Load data using appropriate database loader."""
197
+ if self.db_type == DB_POSTGRESQL:
198
+ loader = PostgresLoader(
199
+ connection_string=self.connection_string,
200
+ table=self.table,
201
+ schema=self.schema or "public",
202
+ write_method=self.write_method,
203
+ primary_key=self.primary_key,
204
+ batch_size=self.batch_size,
205
+ ssh_tunnel=self.ssh_tunnel,
206
+ )
207
+ return await loader.load(data, **params)
208
+ else:
209
+ # For non-PostgreSQL databases, use sync loading
210
+ return await self._load_sync(data, **params)
211
+
212
+ async def _load_sync(self, data: List[Dict[str, Any]], **params) -> LoadResult:
213
+ """Load data using sync database operations."""
214
+ start_time = time.time()
215
+
216
+ if not data:
217
+ return LoadResult(success=True, rows_loaded=0)
218
+
219
+ # Handle SSH tunnel if configured
220
+ tunnel = None
221
+ connection_string = self.connection_string
222
+
223
+ if self.ssh_tunnel and self.ssh_tunnel.get("enabled"):
224
+ tunnel = create_ssh_tunnel(self.ssh_tunnel)
225
+ if tunnel:
226
+ local_port = int(self.ssh_tunnel.get("local_port", DEFAULT_TUNNEL_LOCAL_PORT))
227
+ connection_string = modify_url_for_tunnel(
228
+ connection_string, local_port, self.db_type
229
+ )
230
+
231
+ try:
232
+ engine = create_engine(connection_string, echo=False)
233
+ Session = sessionmaker(bind=engine)
234
+ session = Session()
235
+
236
+ # Select appropriate load function
237
+ if self.db_type == DB_MYSQL:
238
+ result = load_data_mysql(
239
+ data, session, self.schema or "", self.table,
240
+ self.write_method, self.primary_key, self.batch_size
241
+ )
242
+ elif self.db_type == DB_SQLITE:
243
+ result = load_data_sqlite(
244
+ data, session, "", self.table,
245
+ self.write_method, self.primary_key, self.batch_size
246
+ )
247
+ elif self.db_type == DB_MSSQL:
248
+ result = load_data_mssql(
249
+ data, session, self.schema or "dbo", self.table,
250
+ self.write_method, self.primary_key, self.batch_size
251
+ )
252
+ else:
253
+ raise ValueError(f"Unsupported database type: {self.db_type}")
254
+
255
+ session.close()
256
+ engine.dispose()
257
+
258
+ duration = time.time() - start_time
259
+ return LoadResult(
260
+ success=True,
261
+ rows_loaded=result.get("inserted", 0) + result.get("updated", 0),
262
+ duration_seconds=duration,
263
+ )
264
+
265
+ except Exception as e:
266
+ logger.error(f"Database load failed: {e}", exc_info=True)
267
+ return LoadResult(
268
+ success=False,
269
+ error=str(e),
270
+ duration_seconds=time.time() - start_time,
271
+ )
272
+ finally:
273
+ if tunnel:
274
+ tunnel.stop()
@@ -0,0 +1,180 @@
1
+ """
2
+ Loader factory for ETL pipelines.
3
+
4
+ Provides a registry pattern to select and instantiate the appropriate loader
5
+ based on the target type specified in load configuration.
6
+
7
+ Usage:
8
+ from pycharter.etl_generator.loaders.factory import LoaderFactory
9
+
10
+ # Create loader from config
11
+ loader = LoaderFactory.create(load_config)
12
+
13
+ # Register custom loader
14
+ LoaderFactory.register("bigquery", BigQueryLoader)
15
+ """
16
+
17
+ import logging
18
+ from typing import Any, Dict, List, Optional, Type
19
+
20
+ from pycharter.etl_generator.loaders.base import BaseLoader
21
+ from pycharter.etl_generator.loaders.database import PostgresLoader, DatabaseLoader
22
+ from pycharter.etl_generator.loaders.file import FileLoader
23
+ from pycharter.etl_generator.loaders.cloud import CloudStorageLoader
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class LoaderFactory:
29
+ """
30
+ Factory for creating loader instances based on target type.
31
+
32
+ Supports:
33
+ - Explicit 'type' field (recommended)
34
+ - Legacy 'target_type' field
35
+ - Auto-detection from config keys (for backward compatibility)
36
+
37
+ Example:
38
+ # With explicit type (recommended)
39
+ config = {"type": "postgres", "table": "users", "database": {"url": "..."}}
40
+ loader = LoaderFactory.create(config)
41
+
42
+ # Auto-detected (legacy)
43
+ config = {"table": "users", "connection_string": "postgresql://..."}
44
+ loader = LoaderFactory.create(config) # Detected as postgres
45
+ """
46
+
47
+ # Registry of loaders by target type
48
+ _registry: Dict[str, Type[BaseLoader]] = {
49
+ "postgres": PostgresLoader,
50
+ "postgresql": PostgresLoader,
51
+ "database": DatabaseLoader,
52
+ "sqlite": DatabaseLoader,
53
+ "file": FileLoader,
54
+ "cloud_storage": CloudStorageLoader,
55
+ }
56
+
57
+ @classmethod
58
+ def register(cls, type_name: str, loader_class: Type[BaseLoader]) -> None:
59
+ """
60
+ Register a custom loader class.
61
+
62
+ Args:
63
+ type_name: Type identifier (e.g., 'bigquery', 'snowflake')
64
+ loader_class: Loader class that inherits from BaseLoader
65
+
66
+ Example:
67
+ class BigQueryLoader(BaseLoader):
68
+ ...
69
+
70
+ LoaderFactory.register("bigquery", BigQueryLoader)
71
+ """
72
+ if not issubclass(loader_class, BaseLoader):
73
+ raise TypeError(f"Loader class must inherit from BaseLoader: {loader_class}")
74
+ cls._registry[type_name.lower()] = loader_class
75
+ logger.info(f"Registered loader: {type_name} -> {loader_class.__name__}")
76
+
77
+ @classmethod
78
+ def unregister(cls, type_name: str) -> None:
79
+ """Remove a loader from the registry."""
80
+ cls._registry.pop(type_name.lower(), None)
81
+
82
+ @classmethod
83
+ def list_types(cls) -> List[str]:
84
+ """List all registered loader types."""
85
+ return list(cls._registry.keys())
86
+
87
+ @classmethod
88
+ def create(cls, config: Dict[str, Any]) -> BaseLoader:
89
+ """
90
+ Create a loader instance from configuration.
91
+
92
+ Args:
93
+ config: Load configuration dictionary
94
+
95
+ Returns:
96
+ Configured loader instance
97
+
98
+ Raises:
99
+ ValueError: If type cannot be determined or is not registered
100
+ """
101
+ # Get type from config (check 'type' first, then 'target_type' for legacy)
102
+ load_type = config.get("type") or config.get("target_type")
103
+
104
+ # Auto-detect if not specified
105
+ if not load_type:
106
+ load_type = cls._detect_type(config)
107
+ if load_type:
108
+ logger.debug(f"Auto-detected loader type: {load_type}")
109
+ else:
110
+ raise ValueError(
111
+ "Cannot determine loader type. "
112
+ f"Add 'type' field with one of: {', '.join(set(cls._registry.keys()))}"
113
+ )
114
+
115
+ load_type = load_type.lower()
116
+
117
+ # Get loader class from registry
118
+ loader_class = cls._registry.get(load_type)
119
+ if not loader_class:
120
+ raise ValueError(
121
+ f"Unknown loader type: '{load_type}'. "
122
+ f"Available types: {', '.join(set(cls._registry.keys()))}. "
123
+ f"Register custom loaders with LoaderFactory.register()"
124
+ )
125
+
126
+ # Create loader using from_config if available
127
+ if hasattr(loader_class, "from_config"):
128
+ loader = loader_class.from_config(config)
129
+ else:
130
+ loader = loader_class()
131
+
132
+ logger.debug(f"Created {loader_class.__name__} for type: {load_type}")
133
+ return loader
134
+
135
+ @classmethod
136
+ def _detect_type(cls, config: Dict[str, Any]) -> Optional[str]:
137
+ """
138
+ Auto-detect loader type from configuration keys.
139
+
140
+ This is for backward compatibility. New configs should use explicit 'type'.
141
+ """
142
+ # Database indicators
143
+ if "table" in config:
144
+ if "connection_string" in config or "database" in config:
145
+ # Check if it's SQLite
146
+ conn_str = config.get("connection_string", "")
147
+ if not conn_str and "database" in config:
148
+ conn_str = config["database"].get("url", "")
149
+ if "sqlite" in conn_str.lower():
150
+ return "sqlite"
151
+ return "postgres"
152
+
153
+ # File indicators
154
+ if any(key in config for key in ("path", "file_path")) and "storage" not in config:
155
+ return "file"
156
+
157
+ # Cloud storage indicators
158
+ if any(key in config for key in ("storage", "bucket", "container")):
159
+ return "cloud_storage"
160
+
161
+ return None
162
+
163
+ # Legacy method name for consistency with ExtractorFactory
164
+ @classmethod
165
+ def get_loader(cls, load_config: Dict[str, Any]) -> BaseLoader:
166
+ """Legacy method. Use create() instead."""
167
+ return cls.create(load_config)
168
+
169
+
170
+ def get_loader(load_config: Dict[str, Any]) -> BaseLoader:
171
+ """
172
+ Convenience function to get loader instance.
173
+
174
+ Args:
175
+ load_config: Load configuration dictionary
176
+
177
+ Returns:
178
+ Loader instance
179
+ """
180
+ return LoaderFactory.create(load_config)
@@ -0,0 +1,72 @@
1
+ """
2
+ File loader for ETL pipelines.
3
+ """
4
+
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pycharter.etl_generator.loaders.base import BaseLoader
10
+ from pycharter.etl_generator.loaders.file_loader import load_to_file
11
+ from pycharter.etl_generator.result import LoadResult
12
+
13
+
14
+ class FileLoader(BaseLoader):
15
+ """
16
+ Loader for local files.
17
+
18
+ Supports JSON, CSV, Parquet, and JSONL formats.
19
+
20
+ Example:
21
+ >>> loader = FileLoader(path="output/data.json", format="json")
22
+ >>> result = await loader.load(data)
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ path: str,
28
+ file_format: str = "json",
29
+ write_mode: str = "overwrite",
30
+ ):
31
+ self.path = path
32
+ self.file_format = file_format
33
+ self.write_mode = write_mode
34
+
35
+ @classmethod
36
+ def from_config(cls, config: Dict[str, Any]) -> "FileLoader":
37
+ """Create loader from configuration dict."""
38
+ return cls(
39
+ path=config.get("file_path") or config.get("path"),
40
+ file_format=config.get("format", "json"),
41
+ write_mode=config.get("write_mode", "overwrite"),
42
+ )
43
+
44
+ async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
45
+ """Load data to file."""
46
+ start_time = time.time()
47
+
48
+ if not data:
49
+ return LoadResult(success=True, rows_loaded=0)
50
+
51
+ try:
52
+ load_config = {
53
+ "file_path": self.path,
54
+ "format": self.file_format,
55
+ "write_mode": self.write_mode,
56
+ }
57
+
58
+ result = load_to_file(data, load_config)
59
+
60
+ duration = time.time() - start_time
61
+ return LoadResult(
62
+ success=True,
63
+ rows_loaded=result.get("written", 0),
64
+ duration_seconds=duration,
65
+ )
66
+
67
+ except Exception as e:
68
+ return LoadResult(
69
+ success=False,
70
+ error=str(e),
71
+ duration_seconds=time.time() - start_time,
72
+ )
@@ -0,0 +1,130 @@
1
+ """
2
+ File-based loader for ETL orchestrator.
3
+
4
+ Writes transformed data to local files in JSON, CSV, Parquet, or JSONL format.
5
+ """
6
+
7
+ import io
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from pycharter.utils.value_injector import resolve_values
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ SUPPORTED_FORMATS = ("json", "csv", "parquet", "jsonl")
18
+
19
+
20
+ def load_to_file(
21
+ data: List[Dict[str, Any]],
22
+ load_config: Dict[str, Any],
23
+ contract_dir: Optional[Any] = None,
24
+ config_context: Optional[Dict[str, Any]] = None,
25
+ ) -> Dict[str, Any]:
26
+ """
27
+ Write transformed data to a local file.
28
+
29
+ Load config (destination_type: file):
30
+ file_path: Path to output file (required). Supports ${VAR} resolution.
31
+ format: json | csv | parquet | jsonl (default: json)
32
+ write_mode: overwrite | append (default: overwrite).
33
+ append: for jsonl/csv, appends lines; for json, read-merge-write (array concat).
34
+
35
+ Returns:
36
+ Dict with keys: written, total, path, format
37
+ """
38
+ source_file = str(contract_dir / "load.yaml") if contract_dir else None
39
+ file_path = load_config.get("file_path")
40
+ if not file_path:
41
+ raise ValueError(
42
+ "File loader requires 'file_path' in load configuration. "
43
+ "Example: file_path: ./output/data.json"
44
+ )
45
+ file_path = resolve_values(
46
+ file_path, context=config_context, source_file=source_file
47
+ )
48
+ path = Path(file_path)
49
+
50
+ fmt = (load_config.get("format") or "json").lower()
51
+ if fmt not in SUPPORTED_FORMATS:
52
+ raise ValueError(
53
+ f"File loader format must be one of {SUPPORTED_FORMATS}, got '{fmt}'"
54
+ )
55
+ write_mode = (load_config.get("write_mode") or "overwrite").lower()
56
+ if write_mode not in ("overwrite", "append"):
57
+ raise ValueError(
58
+ "File loader write_mode must be 'overwrite' or 'append', "
59
+ f"got '{write_mode}'"
60
+ )
61
+
62
+ path.parent.mkdir(parents=True, exist_ok=True)
63
+
64
+ if fmt == "json":
65
+ _write_json(data, path, write_mode)
66
+ elif fmt == "jsonl":
67
+ _write_jsonl(data, path, write_mode)
68
+ elif fmt == "csv":
69
+ _write_csv(data, path, write_mode)
70
+ elif fmt == "parquet":
71
+ _write_parquet(data, path, write_mode)
72
+
73
+ logger.info(f"File loader wrote {len(data)} records to {path} ({fmt})")
74
+ return {"written": len(data), "total": len(data), "path": str(path), "format": fmt}
75
+
76
+
77
+ def _write_json(
78
+ data: List[Dict[str, Any]], path: Path, write_mode: str
79
+ ) -> None:
80
+ if write_mode == "append" and path.exists():
81
+ with open(path, "r", encoding="utf-8") as f:
82
+ existing = json.load(f)
83
+ if isinstance(existing, list):
84
+ data = existing + data
85
+ else:
86
+ data = [existing] + data
87
+ with open(path, "w", encoding="utf-8") as f:
88
+ json.dump(data, f, indent=2, default=str)
89
+
90
+
91
+ def _write_jsonl(
92
+ data: List[Dict[str, Any]], path: Path, write_mode: str
93
+ ) -> None:
94
+ mode = "a" if write_mode == "append" and path.exists() else "w"
95
+ with open(path, mode, encoding="utf-8") as f:
96
+ for record in data:
97
+ f.write(json.dumps(record, default=str) + "\n")
98
+
99
+
100
+ def _write_csv(
101
+ data: List[Dict[str, Any]], path: Path, write_mode: str
102
+ ) -> None:
103
+ if not data:
104
+ return
105
+ import csv
106
+
107
+ mode = "a" if write_mode == "append" and path.exists() else "w"
108
+ newfile = mode == "w"
109
+ with open(path, mode, encoding="utf-8", newline="") as f:
110
+ writer = csv.DictWriter(f, fieldnames=data[0].keys())
111
+ if newfile:
112
+ writer.writeheader()
113
+ writer.writerows(data)
114
+
115
+
116
+ def _write_parquet(
117
+ data: List[Dict[str, Any]], path: Path, write_mode: str
118
+ ) -> None:
119
+ try:
120
+ import pandas as pd
121
+ except ImportError as e:
122
+ raise ImportError(
123
+ "pandas is required for Parquet file load. "
124
+ "Install with: pip install pandas pyarrow"
125
+ ) from e
126
+ df = pd.DataFrame(data)
127
+ if write_mode == "append" and path.exists():
128
+ existing = pd.read_parquet(path)
129
+ df = pd.concat([existing, df], ignore_index=True)
130
+ df.to_parquet(path, index=False)