pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. api/main.py +27 -1
  2. api/models/docs.py +68 -0
  3. api/models/evolution.py +117 -0
  4. api/models/tracking.py +111 -0
  5. api/models/validation.py +46 -6
  6. api/routes/v1/__init__.py +14 -1
  7. api/routes/v1/docs.py +187 -0
  8. api/routes/v1/evolution.py +337 -0
  9. api/routes/v1/templates.py +211 -27
  10. api/routes/v1/tracking.py +301 -0
  11. api/routes/v1/validation.py +68 -31
  12. pycharter/__init__.py +268 -58
  13. pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
  14. pycharter/data/templates/contract/template_contract.yaml +122 -0
  15. pycharter/data/templates/contract/template_metadata.yaml +68 -0
  16. pycharter/data/templates/contract/template_schema.yaml +100 -0
  17. pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
  18. pycharter/data/templates/etl/README.md +224 -0
  19. pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
  20. pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
  21. pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
  22. pycharter/data/templates/etl/extract_database.yaml +34 -0
  23. pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
  24. pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
  25. pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
  26. pycharter/data/templates/etl/extract_file_json.yaml +24 -0
  27. pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
  28. pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
  29. pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
  30. pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
  31. pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
  32. pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
  33. pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
  34. pycharter/data/templates/etl/load_file.yaml +34 -0
  35. pycharter/data/templates/etl/load_insert.yaml +18 -0
  36. pycharter/data/templates/etl/load_postgresql.yaml +39 -0
  37. pycharter/data/templates/etl/load_sqlite.yaml +21 -0
  38. pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
  39. pycharter/data/templates/etl/load_upsert.yaml +25 -0
  40. pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
  41. pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
  42. pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
  43. pycharter/data/templates/etl/transform_combined.yaml +48 -0
  44. pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
  45. pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
  46. pycharter/data/templates/etl/transform_simple.yaml +59 -0
  47. pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
  48. pycharter/docs_generator/__init__.py +43 -0
  49. pycharter/docs_generator/generator.py +465 -0
  50. pycharter/docs_generator/renderers.py +247 -0
  51. pycharter/etl_generator/__init__.py +168 -80
  52. pycharter/etl_generator/builder.py +121 -0
  53. pycharter/etl_generator/config_loader.py +394 -0
  54. pycharter/etl_generator/config_validator.py +418 -0
  55. pycharter/etl_generator/context.py +132 -0
  56. pycharter/etl_generator/expression.py +499 -0
  57. pycharter/etl_generator/extractors/__init__.py +30 -0
  58. pycharter/etl_generator/extractors/base.py +70 -0
  59. pycharter/etl_generator/extractors/cloud_storage.py +530 -0
  60. pycharter/etl_generator/extractors/database.py +221 -0
  61. pycharter/etl_generator/extractors/factory.py +185 -0
  62. pycharter/etl_generator/extractors/file.py +475 -0
  63. pycharter/etl_generator/extractors/http.py +895 -0
  64. pycharter/etl_generator/extractors/streaming.py +57 -0
  65. pycharter/etl_generator/loaders/__init__.py +41 -0
  66. pycharter/etl_generator/loaders/base.py +35 -0
  67. pycharter/etl_generator/loaders/cloud.py +87 -0
  68. pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
  69. pycharter/etl_generator/loaders/database.py +274 -0
  70. pycharter/etl_generator/loaders/factory.py +180 -0
  71. pycharter/etl_generator/loaders/file.py +72 -0
  72. pycharter/etl_generator/loaders/file_loader.py +130 -0
  73. pycharter/etl_generator/pipeline.py +743 -0
  74. pycharter/etl_generator/protocols.py +54 -0
  75. pycharter/etl_generator/result.py +63 -0
  76. pycharter/etl_generator/schemas/__init__.py +49 -0
  77. pycharter/etl_generator/transformers/__init__.py +49 -0
  78. pycharter/etl_generator/transformers/base.py +63 -0
  79. pycharter/etl_generator/transformers/config.py +45 -0
  80. pycharter/etl_generator/transformers/custom_function.py +101 -0
  81. pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
  82. pycharter/etl_generator/transformers/operations.py +218 -0
  83. pycharter/etl_generator/transformers/pipeline.py +54 -0
  84. pycharter/etl_generator/transformers/simple_operations.py +131 -0
  85. pycharter/quality/__init__.py +25 -0
  86. pycharter/quality/tracking/__init__.py +64 -0
  87. pycharter/quality/tracking/collector.py +318 -0
  88. pycharter/quality/tracking/exporters.py +238 -0
  89. pycharter/quality/tracking/models.py +194 -0
  90. pycharter/quality/tracking/store.py +385 -0
  91. pycharter/runtime_validator/__init__.py +20 -7
  92. pycharter/runtime_validator/builder.py +328 -0
  93. pycharter/runtime_validator/validator.py +311 -7
  94. pycharter/runtime_validator/validator_core.py +61 -0
  95. pycharter/schema_evolution/__init__.py +61 -0
  96. pycharter/schema_evolution/compatibility.py +270 -0
  97. pycharter/schema_evolution/diff.py +496 -0
  98. pycharter/schema_evolution/models.py +201 -0
  99. pycharter/shared/__init__.py +56 -0
  100. pycharter/shared/errors.py +296 -0
  101. pycharter/shared/protocols.py +234 -0
  102. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
  103. pycharter-0.0.24.dist-info/RECORD +543 -0
  104. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
  105. ui/static/404/index.html +1 -1
  106. ui/static/404.html +1 -1
  107. ui/static/__next.__PAGE__.txt +1 -1
  108. ui/static/__next._full.txt +1 -1
  109. ui/static/__next._head.txt +1 -1
  110. ui/static/__next._index.txt +1 -1
  111. ui/static/__next._tree.txt +1 -1
  112. ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
  113. ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
  114. ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
  115. ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
  116. ui/static/_not-found/__next._full.txt +1 -1
  117. ui/static/_not-found/__next._head.txt +1 -1
  118. ui/static/_not-found/__next._index.txt +1 -1
  119. ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  120. ui/static/_not-found/__next._not-found.txt +1 -1
  121. ui/static/_not-found/__next._tree.txt +1 -1
  122. ui/static/_not-found/index.html +1 -1
  123. ui/static/_not-found/index.txt +1 -1
  124. ui/static/contracts/__next._full.txt +2 -2
  125. ui/static/contracts/__next._head.txt +1 -1
  126. ui/static/contracts/__next._index.txt +1 -1
  127. ui/static/contracts/__next._tree.txt +1 -1
  128. ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  129. ui/static/contracts/__next.contracts.txt +1 -1
  130. ui/static/contracts/index.html +1 -1
  131. ui/static/contracts/index.txt +2 -2
  132. ui/static/documentation/__next._full.txt +1 -1
  133. ui/static/documentation/__next._head.txt +1 -1
  134. ui/static/documentation/__next._index.txt +1 -1
  135. ui/static/documentation/__next._tree.txt +1 -1
  136. ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
  137. ui/static/documentation/__next.documentation.txt +1 -1
  138. ui/static/documentation/index.html +2 -2
  139. ui/static/documentation/index.txt +1 -1
  140. ui/static/index.html +1 -1
  141. ui/static/index.txt +1 -1
  142. ui/static/metadata/__next._full.txt +1 -1
  143. ui/static/metadata/__next._head.txt +1 -1
  144. ui/static/metadata/__next._index.txt +1 -1
  145. ui/static/metadata/__next._tree.txt +1 -1
  146. ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  147. ui/static/metadata/__next.metadata.txt +1 -1
  148. ui/static/metadata/index.html +1 -1
  149. ui/static/metadata/index.txt +1 -1
  150. ui/static/quality/__next._full.txt +2 -2
  151. ui/static/quality/__next._head.txt +1 -1
  152. ui/static/quality/__next._index.txt +1 -1
  153. ui/static/quality/__next._tree.txt +1 -1
  154. ui/static/quality/__next.quality.__PAGE__.txt +2 -2
  155. ui/static/quality/__next.quality.txt +1 -1
  156. ui/static/quality/index.html +2 -2
  157. ui/static/quality/index.txt +2 -2
  158. ui/static/rules/__next._full.txt +1 -1
  159. ui/static/rules/__next._head.txt +1 -1
  160. ui/static/rules/__next._index.txt +1 -1
  161. ui/static/rules/__next._tree.txt +1 -1
  162. ui/static/rules/__next.rules.__PAGE__.txt +1 -1
  163. ui/static/rules/__next.rules.txt +1 -1
  164. ui/static/rules/index.html +1 -1
  165. ui/static/rules/index.txt +1 -1
  166. ui/static/schemas/__next._full.txt +1 -1
  167. ui/static/schemas/__next._head.txt +1 -1
  168. ui/static/schemas/__next._index.txt +1 -1
  169. ui/static/schemas/__next._tree.txt +1 -1
  170. ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  171. ui/static/schemas/__next.schemas.txt +1 -1
  172. ui/static/schemas/index.html +1 -1
  173. ui/static/schemas/index.txt +1 -1
  174. ui/static/settings/__next._full.txt +1 -1
  175. ui/static/settings/__next._head.txt +1 -1
  176. ui/static/settings/__next._index.txt +1 -1
  177. ui/static/settings/__next._tree.txt +1 -1
  178. ui/static/settings/__next.settings.__PAGE__.txt +1 -1
  179. ui/static/settings/__next.settings.txt +1 -1
  180. ui/static/settings/index.html +1 -1
  181. ui/static/settings/index.txt +1 -1
  182. ui/static/static/404/index.html +1 -1
  183. ui/static/static/404.html +1 -1
  184. ui/static/static/__next.__PAGE__.txt +1 -1
  185. ui/static/static/__next._full.txt +2 -2
  186. ui/static/static/__next._head.txt +1 -1
  187. ui/static/static/__next._index.txt +2 -2
  188. ui/static/static/__next._tree.txt +2 -2
  189. ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
  190. ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
  191. ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
  192. ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
  193. ui/static/static/_not-found/__next._full.txt +2 -2
  194. ui/static/static/_not-found/__next._head.txt +1 -1
  195. ui/static/static/_not-found/__next._index.txt +2 -2
  196. ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  197. ui/static/static/_not-found/__next._not-found.txt +1 -1
  198. ui/static/static/_not-found/__next._tree.txt +2 -2
  199. ui/static/static/_not-found/index.html +1 -1
  200. ui/static/static/_not-found/index.txt +2 -2
  201. ui/static/static/contracts/__next._full.txt +3 -3
  202. ui/static/static/contracts/__next._head.txt +1 -1
  203. ui/static/static/contracts/__next._index.txt +2 -2
  204. ui/static/static/contracts/__next._tree.txt +2 -2
  205. ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  206. ui/static/static/contracts/__next.contracts.txt +1 -1
  207. ui/static/static/contracts/index.html +1 -1
  208. ui/static/static/contracts/index.txt +3 -3
  209. ui/static/static/documentation/__next._full.txt +3 -3
  210. ui/static/static/documentation/__next._head.txt +1 -1
  211. ui/static/static/documentation/__next._index.txt +2 -2
  212. ui/static/static/documentation/__next._tree.txt +2 -2
  213. ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
  214. ui/static/static/documentation/__next.documentation.txt +1 -1
  215. ui/static/static/documentation/index.html +2 -2
  216. ui/static/static/documentation/index.txt +3 -3
  217. ui/static/static/index.html +1 -1
  218. ui/static/static/index.txt +2 -2
  219. ui/static/static/metadata/__next._full.txt +2 -2
  220. ui/static/static/metadata/__next._head.txt +1 -1
  221. ui/static/static/metadata/__next._index.txt +2 -2
  222. ui/static/static/metadata/__next._tree.txt +2 -2
  223. ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  224. ui/static/static/metadata/__next.metadata.txt +1 -1
  225. ui/static/static/metadata/index.html +1 -1
  226. ui/static/static/metadata/index.txt +2 -2
  227. ui/static/static/quality/__next._full.txt +2 -2
  228. ui/static/static/quality/__next._head.txt +1 -1
  229. ui/static/static/quality/__next._index.txt +2 -2
  230. ui/static/static/quality/__next._tree.txt +2 -2
  231. ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
  232. ui/static/static/quality/__next.quality.txt +1 -1
  233. ui/static/static/quality/index.html +2 -2
  234. ui/static/static/quality/index.txt +2 -2
  235. ui/static/static/rules/__next._full.txt +2 -2
  236. ui/static/static/rules/__next._head.txt +1 -1
  237. ui/static/static/rules/__next._index.txt +2 -2
  238. ui/static/static/rules/__next._tree.txt +2 -2
  239. ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
  240. ui/static/static/rules/__next.rules.txt +1 -1
  241. ui/static/static/rules/index.html +1 -1
  242. ui/static/static/rules/index.txt +2 -2
  243. ui/static/static/schemas/__next._full.txt +2 -2
  244. ui/static/static/schemas/__next._head.txt +1 -1
  245. ui/static/static/schemas/__next._index.txt +2 -2
  246. ui/static/static/schemas/__next._tree.txt +2 -2
  247. ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  248. ui/static/static/schemas/__next.schemas.txt +1 -1
  249. ui/static/static/schemas/index.html +1 -1
  250. ui/static/static/schemas/index.txt +2 -2
  251. ui/static/static/settings/__next._full.txt +2 -2
  252. ui/static/static/settings/__next._head.txt +1 -1
  253. ui/static/static/settings/__next._index.txt +2 -2
  254. ui/static/static/settings/__next._tree.txt +2 -2
  255. ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
  256. ui/static/static/settings/__next.settings.txt +1 -1
  257. ui/static/static/settings/index.html +1 -1
  258. ui/static/static/settings/index.txt +2 -2
  259. ui/static/static/static/.gitkeep +0 -0
  260. ui/static/static/static/404/index.html +1 -0
  261. ui/static/static/static/404.html +1 -0
  262. ui/static/static/static/__next.__PAGE__.txt +10 -0
  263. ui/static/static/static/__next._full.txt +30 -0
  264. ui/static/static/static/__next._head.txt +7 -0
  265. ui/static/static/static/__next._index.txt +9 -0
  266. ui/static/static/static/__next._tree.txt +2 -0
  267. ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
  268. ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
  269. ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
  270. ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
  271. ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
  272. ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
  273. ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
  274. ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
  275. ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
  276. ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
  277. ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
  278. ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
  279. ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
  280. ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
  281. ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
  282. ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
  283. ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
  284. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
  285. ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
  286. ui/static/static/static/_not-found/__next._full.txt +17 -0
  287. ui/static/static/static/_not-found/__next._head.txt +7 -0
  288. ui/static/static/static/_not-found/__next._index.txt +9 -0
  289. ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
  290. ui/static/static/static/_not-found/__next._not-found.txt +4 -0
  291. ui/static/static/static/_not-found/__next._tree.txt +2 -0
  292. ui/static/static/static/_not-found/index.html +1 -0
  293. ui/static/static/static/_not-found/index.txt +17 -0
  294. ui/static/static/static/contracts/__next._full.txt +21 -0
  295. ui/static/static/static/contracts/__next._head.txt +7 -0
  296. ui/static/static/static/contracts/__next._index.txt +9 -0
  297. ui/static/static/static/contracts/__next._tree.txt +2 -0
  298. ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
  299. ui/static/static/static/contracts/__next.contracts.txt +4 -0
  300. ui/static/static/static/contracts/index.html +1 -0
  301. ui/static/static/static/contracts/index.txt +21 -0
  302. ui/static/static/static/documentation/__next._full.txt +21 -0
  303. ui/static/static/static/documentation/__next._head.txt +7 -0
  304. ui/static/static/static/documentation/__next._index.txt +9 -0
  305. ui/static/static/static/documentation/__next._tree.txt +2 -0
  306. ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
  307. ui/static/static/static/documentation/__next.documentation.txt +4 -0
  308. ui/static/static/static/documentation/index.html +93 -0
  309. ui/static/static/static/documentation/index.txt +21 -0
  310. ui/static/static/static/index.html +1 -0
  311. ui/static/static/static/index.txt +30 -0
  312. ui/static/static/static/metadata/__next._full.txt +21 -0
  313. ui/static/static/static/metadata/__next._head.txt +7 -0
  314. ui/static/static/static/metadata/__next._index.txt +9 -0
  315. ui/static/static/static/metadata/__next._tree.txt +2 -0
  316. ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
  317. ui/static/static/static/metadata/__next.metadata.txt +4 -0
  318. ui/static/static/static/metadata/index.html +1 -0
  319. ui/static/static/static/metadata/index.txt +21 -0
  320. ui/static/static/static/quality/__next._full.txt +21 -0
  321. ui/static/static/static/quality/__next._head.txt +7 -0
  322. ui/static/static/static/quality/__next._index.txt +9 -0
  323. ui/static/static/static/quality/__next._tree.txt +2 -0
  324. ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
  325. ui/static/static/static/quality/__next.quality.txt +4 -0
  326. ui/static/static/static/quality/index.html +2 -0
  327. ui/static/static/static/quality/index.txt +21 -0
  328. ui/static/static/static/rules/__next._full.txt +21 -0
  329. ui/static/static/static/rules/__next._head.txt +7 -0
  330. ui/static/static/static/rules/__next._index.txt +9 -0
  331. ui/static/static/static/rules/__next._tree.txt +2 -0
  332. ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
  333. ui/static/static/static/rules/__next.rules.txt +4 -0
  334. ui/static/static/static/rules/index.html +1 -0
  335. ui/static/static/static/rules/index.txt +21 -0
  336. ui/static/static/static/schemas/__next._full.txt +21 -0
  337. ui/static/static/static/schemas/__next._head.txt +7 -0
  338. ui/static/static/static/schemas/__next._index.txt +9 -0
  339. ui/static/static/static/schemas/__next._tree.txt +2 -0
  340. ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
  341. ui/static/static/static/schemas/__next.schemas.txt +4 -0
  342. ui/static/static/static/schemas/index.html +1 -0
  343. ui/static/static/static/schemas/index.txt +21 -0
  344. ui/static/static/static/settings/__next._full.txt +21 -0
  345. ui/static/static/static/settings/__next._head.txt +7 -0
  346. ui/static/static/static/settings/__next._index.txt +9 -0
  347. ui/static/static/static/settings/__next._tree.txt +2 -0
  348. ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
  349. ui/static/static/static/settings/__next.settings.txt +4 -0
  350. ui/static/static/static/settings/index.html +1 -0
  351. ui/static/static/static/settings/index.txt +21 -0
  352. ui/static/static/static/validation/__next._full.txt +21 -0
  353. ui/static/static/static/validation/__next._head.txt +7 -0
  354. ui/static/static/static/validation/__next._index.txt +9 -0
  355. ui/static/static/static/validation/__next._tree.txt +2 -0
  356. ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
  357. ui/static/static/static/validation/__next.validation.txt +4 -0
  358. ui/static/static/static/validation/index.html +1 -0
  359. ui/static/static/static/validation/index.txt +21 -0
  360. ui/static/static/validation/__next._full.txt +2 -2
  361. ui/static/static/validation/__next._head.txt +1 -1
  362. ui/static/static/validation/__next._index.txt +2 -2
  363. ui/static/static/validation/__next._tree.txt +2 -2
  364. ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
  365. ui/static/static/validation/__next.validation.txt +1 -1
  366. ui/static/static/validation/index.html +1 -1
  367. ui/static/static/validation/index.txt +2 -2
  368. ui/static/validation/__next._full.txt +2 -2
  369. ui/static/validation/__next._head.txt +1 -1
  370. ui/static/validation/__next._index.txt +1 -1
  371. ui/static/validation/__next._tree.txt +1 -1
  372. ui/static/validation/__next.validation.__PAGE__.txt +2 -2
  373. ui/static/validation/__next.validation.txt +1 -1
  374. ui/static/validation/index.html +1 -1
  375. ui/static/validation/index.txt +2 -2
  376. pycharter/data/templates/template_coercion_rules.yaml +0 -15
  377. pycharter/data/templates/template_contract.yaml +0 -587
  378. pycharter/data/templates/template_metadata.yaml +0 -38
  379. pycharter/data/templates/template_schema.yaml +0 -22
  380. pycharter/data/templates/template_transform_advanced.yaml +0 -50
  381. pycharter/data/templates/template_transform_simple.yaml +0 -59
  382. pycharter/data/templates/template_validation_rules.yaml +0 -29
  383. pycharter/etl_generator/extraction.py +0 -916
  384. pycharter/etl_generator/factory.py +0 -174
  385. pycharter/etl_generator/orchestrator.py +0 -1650
  386. pycharter/integrations/__init__.py +0 -19
  387. pycharter/integrations/kafka.py +0 -178
  388. pycharter/integrations/streaming.py +0 -100
  389. pycharter-0.0.22.dist-info/RECORD +0 -358
  390. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
  391. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
  392. {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
  393. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
  394. /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
  395. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
  396. /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
  397. /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
  398. /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
  399. /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
  400. /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
  401. /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
  402. /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
  403. /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
  404. /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
@@ -0,0 +1,100 @@
1
+ # JSON Schema Template
2
+ # Defines the structure and constraints of your data
3
+ #
4
+ # Usage:
5
+ # from pycharter import from_dict
6
+ # Model = from_dict(schema, "MyModel")
7
+
8
+ type: object
9
+ title: my_entity
10
+ version: "1.0.0"
11
+
12
+ properties:
13
+ # Required string field
14
+ id:
15
+ type: string
16
+ description: Unique identifier
17
+ title: ID
18
+ minLength: 1
19
+
20
+ # Required string with max length
21
+ name:
22
+ type: string
23
+ description: Entity name
24
+ title: Name
25
+ minLength: 1
26
+ maxLength: 100
27
+
28
+ # Enum field
29
+ status:
30
+ type: string
31
+ description: Current status
32
+ enum: [active, inactive, pending]
33
+
34
+ # Integer with constraints
35
+ count:
36
+ type: integer
37
+ description: Item count
38
+ minimum: 0
39
+
40
+ # Number (float) with constraints
41
+ amount:
42
+ type: number
43
+ description: Monetary amount
44
+ minimum: 0
45
+
46
+ # Boolean field
47
+ is_active:
48
+ type: boolean
49
+ description: Active flag
50
+ default: true
51
+
52
+ # Date-time field
53
+ created_at:
54
+ type: string
55
+ format: date-time
56
+ description: Creation timestamp
57
+
58
+ # Email field
59
+ email:
60
+ type: string
61
+ format: email
62
+ description: Contact email
63
+
64
+ # UUID field
65
+ uuid:
66
+ type: string
67
+ format: uuid
68
+ description: Unique UUID
69
+
70
+ # Optional/nullable field (use anyOf for nullable)
71
+ notes:
72
+ anyOf:
73
+ - type: string
74
+ maxLength: 500
75
+ - type: "null"
76
+ default: null
77
+ description: Optional notes
78
+
79
+ # Nested object
80
+ metadata:
81
+ type: object
82
+ properties:
83
+ source:
84
+ type: string
85
+ version:
86
+ type: string
87
+ required: [source]
88
+
89
+ # Array of strings
90
+ tags:
91
+ type: array
92
+ items:
93
+ type: string
94
+ description: Tags list
95
+
96
+ required:
97
+ - id
98
+ - name
99
+ - status
100
+ - created_at
@@ -0,0 +1,75 @@
1
+ # Validation Rules Template
2
+ # Post-validation business rule checks
3
+ #
4
+ # Usage:
5
+ # Validation rules check data AFTER Pydantic validation.
6
+ # Useful for business rules that go beyond type checking.
7
+ #
8
+ # Available validations:
9
+ # - min_length: {threshold: N}
10
+ # - max_length: {threshold: N}
11
+ # - greater_than_or_equal_to:{threshold: N}
12
+ # - less_than_or_equal_to: {threshold: N}
13
+ # - only_allow: {allowed_values: [a, b, c]}
14
+ # - only_allow_if: {condition_field: x, condition_value: y, allowed_values: [...]}
15
+ # - non_empty_string: null (no params needed)
16
+ # - is_positive: null
17
+ # - is_email: null
18
+ # - is_url: null
19
+ # - is_alphanumeric: null
20
+ # - is_numeric_string: null
21
+ # - no_capital_characters: null
22
+ # - no_special_characters: null
23
+ # - matches_regex: {pattern: "^[A-Z]{2}[0-9]{4}$"}
24
+
25
+ title: my_validation_rules
26
+ description: Business validation rules
27
+ version: "1.0.0"
28
+
29
+ rules:
30
+ # String length validation
31
+ id:
32
+ min_length:
33
+ threshold: 1
34
+ max_length:
35
+ threshold: 50
36
+
37
+ name:
38
+ non_empty_string: null
39
+ max_length:
40
+ threshold: 100
41
+
42
+ # Enum-like validation
43
+ status:
44
+ only_allow:
45
+ allowed_values:
46
+ - active
47
+ - inactive
48
+ - pending
49
+
50
+ # Numeric range validation
51
+ count:
52
+ greater_than_or_equal_to:
53
+ threshold: 0
54
+
55
+ amount:
56
+ greater_than_or_equal_to:
57
+ threshold: 0
58
+ less_than_or_equal_to:
59
+ threshold: 1000000
60
+
61
+ # Email validation
62
+ email:
63
+ is_email: null
64
+
65
+ # Regex pattern validation
66
+ # code:
67
+ # matches_regex:
68
+ # pattern: "^[A-Z]{2}[0-9]{4}$"
69
+
70
+ # Conditional validation
71
+ # discount_rate:
72
+ # only_allow_if:
73
+ # condition_field: customer_type
74
+ # condition_value: premium
75
+ # allowed_values: [0.1, 0.15, 0.2]
@@ -0,0 +1,224 @@
1
+ # ETL Config Templates
2
+
3
+ Templates for ETL pipeline configuration. Supports two formats:
4
+
5
+ 1. **Single-file format**: `pipeline.yaml` with extract, transform, load sections
6
+ 2. **Multi-file format**: Separate `extract.yaml`, `transform.yaml`, `load.yaml` files
7
+
8
+ ## Quick Start
9
+
10
+ ### Single-File Format (Recommended)
11
+
12
+ ```yaml
13
+ # pipelines/users/pipeline.yaml
14
+ name: users_pipeline
15
+ version: "1.0.0"
16
+
17
+ extract:
18
+ type: http # Required: http | file | database | cloud_storage
19
+ url: https://api.example.com/users
20
+
21
+ transform:
22
+ - rename:
23
+ userId: user_id
24
+ - convert:
25
+ user_id: int
26
+ - add:
27
+ full_name: "${first_name} ${last_name}"
28
+ loaded_at: now()
29
+
30
+ load:
31
+ type: postgres # Required: postgres | sqlite | file | cloud_storage
32
+ table: users
33
+ database:
34
+ url: ${DATABASE_URL}
35
+ ```
36
+
37
+ ```python
38
+ from pycharter import Pipeline
39
+ import asyncio
40
+
41
+ async def main():
42
+ pipeline = Pipeline.from_config("pipelines/users/pipeline.yaml")
43
+ result = await pipeline.run()
44
+ print(f"Loaded {result.rows_loaded} rows")
45
+
46
+ asyncio.run(main())
47
+ ```
48
+
49
+ ### Multi-File Format
50
+
51
+ ```bash
52
+ mkdir -p pipelines/users
53
+ cp pycharter/data/templates/etl/extract_http_simple.yaml pipelines/users/extract.yaml
54
+ cp pycharter/data/templates/etl/transform_simple.yaml pipelines/users/transform.yaml
55
+ cp pycharter/data/templates/etl/load_postgresql.yaml pipelines/users/load.yaml
56
+ ```
57
+
58
+ ```python
59
+ pipeline = Pipeline.from_config("pipelines/users/")
60
+ ```
61
+
62
+ ## Type Field (Required)
63
+
64
+ All extract and load configs require an explicit `type` field:
65
+
66
+ **Extract types:**
67
+ - `http` - HTTP/API extraction
68
+ - `file` - Local file extraction (CSV, JSON, Parquet, etc.)
69
+ - `database` - SQL database extraction
70
+ - `cloud_storage` - Cloud storage (S3, GCS, Azure)
71
+
72
+ **Load types:**
73
+ - `postgres` / `postgresql` - PostgreSQL
74
+ - `sqlite` - SQLite
75
+ - `file` - Local file (JSON, CSV, Parquet)
76
+ - `cloud_storage` - Cloud storage (S3, GCS, Azure)
77
+
78
+ ## Extract Templates
79
+
80
+ | Template | Type | Description |
81
+ |----------|------|-------------|
82
+ | `extract_http_simple.yaml` | http | Single HTTP request (no pagination) |
83
+ | `extract_http_paginated.yaml` | http | HTTP with pagination (page/offset/cursor) |
84
+ | `extract_http_path_params.yaml` | http | HTTP with `{param}` path substitution |
85
+ | `extract_file_csv.yaml` | file | CSV file extraction |
86
+ | `extract_file_json.yaml` | file | JSON file extraction |
87
+ | `extract_file_parquet.yaml` | file | Parquet file extraction |
88
+ | `extract_file_glob.yaml` | file | Multiple files via glob pattern |
89
+ | `extract_database.yaml` | database | SQL database extraction |
90
+ | `extract_database_ssh.yaml` | database | Database via SSH tunnel |
91
+ | `extract_cloud_s3.yaml` | cloud_storage | AWS S3 extraction |
92
+ | `extract_cloud_gcs.yaml` | cloud_storage | Google Cloud Storage |
93
+ | `extract_cloud_azure.yaml` | cloud_storage | Azure Blob Storage |
94
+
95
+ ## Transform Templates
96
+
97
+ | Template | Description |
98
+ |----------|-------------|
99
+ | `transform_simple.yaml` | rename, convert, defaults, add, select, drop |
100
+ | `transform_custom_function.yaml` | Call Python function |
101
+ | `transform_jsonata.yaml` | JSONata expressions |
102
+ | `transform_combined.yaml` | Simple + JSONata + custom |
103
+
104
+ ### Transform Formats
105
+
106
+ **List format (ordered)** - Transforms applied in specified order:
107
+
108
+ ```yaml
109
+ transform:
110
+ - rename: {old_field: new_field}
111
+ - convert: {field: int}
112
+ - add:
113
+ full_name: "${first_name} ${last_name}"
114
+ timestamp: now()
115
+ - select: [id, name, email]
116
+ ```
117
+
118
+ **Dict format (legacy)** - Fixed order: rename → convert → defaults → add → select → drop
119
+
120
+ ```yaml
121
+ transform:
122
+ rename:
123
+ old_field: new_field
124
+ convert:
125
+ field: int
126
+ ```
127
+
128
+ ### Expression Syntax
129
+
130
+ In `add` fields, you can use expressions:
131
+
132
+ - `${field_name}` - Reference field value
133
+ - `${field_name:-default}` - Field with default if missing
134
+ - `now()` - Current timestamp (ISO format)
135
+ - `uuid()` - Generate UUID
136
+ - `concat(${a}, " ", ${b})` - Concatenate values
137
+ - `lower(${field})` / `upper(${field})` - Case conversion
138
+
139
+ ## Load Templates
140
+
141
+ | Template | Type | Description |
142
+ |----------|------|-------------|
143
+ | `load_postgresql.yaml` | postgres | PostgreSQL (upsert, insert, etc.) |
144
+ | `load_sqlite.yaml` | sqlite | SQLite database |
145
+ | `load_file.yaml` | file | JSON, CSV, Parquet file |
146
+ | `load_upsert.yaml` | postgres | Upsert by primary key |
147
+ | `load_insert.yaml` | postgres | Insert only |
148
+ | `load_truncate_and_load.yaml` | postgres | Truncate then insert |
149
+ | `load_with_dlq.yaml` | postgres | With dead letter queue |
150
+ | `load_with_ssh_tunnel.yaml` | postgres | Via SSH tunnel |
151
+ | `load_cloud_s3.yaml` | cloud_storage | AWS S3 |
152
+ | `load_cloud_gcs.yaml` | cloud_storage | Google Cloud Storage |
153
+ | `load_cloud_azure.yaml` | cloud_storage | Azure Blob Storage |
154
+
155
+ ## Complete Pipeline Template
156
+
157
+ See `pipeline_http_to_db.yaml` for a complete single-file pipeline example.
158
+
159
+ ## Variable Substitution
160
+
161
+ Configs support `${VAR}` syntax for flexible configuration:
162
+
163
+ ```yaml
164
+ path: ${DATA_DIR}/input.json # From variables
165
+ params:
166
+ api_key: ${API_KEY:?API_KEY is required} # Required - error if missing
167
+ limit: ${BATCH_LIMIT:-100} # With default value
168
+ database:
169
+ url: ${DATABASE_URL} # From environment
170
+ ```
171
+
172
+ Provide values via the `variables` parameter (recommended) or environment variables:
173
+
174
+ ```python
175
+ # Recommended: explicit variables - no assumptions about structure
176
+ pipeline = Pipeline.from_config_files(
177
+ extract="my_extract.yaml",
178
+ load="my_load.yaml",
179
+ variables={
180
+ "DATA_DIR": "./data",
181
+ "OUTPUT_DIR": "./output",
182
+ "API_KEY": "xxx",
183
+ "DATABASE_URL": "postgresql://..."
184
+ }
185
+ )
186
+
187
+ # Or with from_config() for directory-based loading
188
+ pipeline = Pipeline.from_config(
189
+ "pipelines/users",
190
+ variables={"API_KEY": "xxx"}
191
+ )
192
+ ```
193
+
194
+ ## Programmatic API
195
+
196
+ Instead of config files, use the Pipeline API directly:
197
+
198
+ ```python
199
+ from pycharter import (
200
+ Pipeline, HTTPExtractor, PostgresLoader,
201
+ Rename, Select, Filter, Convert, AddField
202
+ )
203
+
204
+ pipeline = (
205
+ Pipeline(HTTPExtractor(url="https://api.example.com/users"))
206
+ | Rename({"userName": "user_name"})
207
+ | AddField("full_name", "${first_name} ${last_name}") # Expression support!
208
+ | Select(["id", "user_name", "email", "full_name"])
209
+ | Convert({"id": int})
210
+ | Filter(lambda r: r.get("email"))
211
+ | PostgresLoader(
212
+ connection_string="postgresql://localhost/db",
213
+ table="users",
214
+ write_method="upsert",
215
+ primary_key="id"
216
+ )
217
+ )
218
+
219
+ result = await pipeline.run()
220
+ ```
221
+
222
+ ## Examples
223
+
224
+ See `examples/etl_config_example/` for working examples with config files.
@@ -0,0 +1,24 @@
1
+ # Template: Extract from Azure Blob Storage
2
+ # Copy to your pipeline directory as extract.yaml
3
+ # Requires: pip install azure-storage-blob
4
+
5
+ title: azure_extraction
6
+ description: Extract data from Azure Blob Storage
7
+ version: "1.0.0"
8
+
9
+ # Source type (required)
10
+ type: cloud_storage
11
+
12
+ storage:
13
+ provider: azure
14
+ container: my-container
15
+ path: data/input.json
16
+
17
+ # Or prefix for multiple blobs
18
+ # prefix: data/inputs/
19
+
20
+ credentials:
21
+ connection_string: ${AZURE_STORAGE_CONNECTION_STRING}
22
+
23
+ format: json
24
+ batch_size: 1000
@@ -0,0 +1,25 @@
1
+ # Template: Extract from Google Cloud Storage
2
+ # Copy to your pipeline directory as extract.yaml
3
+ # Requires: pip install google-cloud-storage
4
+
5
+ title: gcs_extraction
6
+ description: Extract data from Google Cloud Storage
7
+ version: "1.0.0"
8
+
9
+ # Source type (required)
10
+ type: cloud_storage
11
+
12
+ storage:
13
+ provider: gcs
14
+ bucket: my-bucket-name
15
+ path: data/input.json
16
+
17
+ # Or prefix for multiple objects
18
+ # prefix: data/inputs/
19
+
20
+ # Credentials (optional - uses default credentials if omitted)
21
+ # credentials:
22
+ # service_account_file: /path/to/service-account.json
23
+
24
+ format: json
25
+ batch_size: 1000
@@ -0,0 +1,30 @@
1
+ # Template: Extract from AWS S3
2
+ # Copy to your pipeline directory as extract.yaml
3
+ # Requires: pip install boto3
4
+
5
+ title: s3_extraction
6
+ description: Extract data from AWS S3
7
+ version: "1.0.0"
8
+
9
+ # Source type (required)
10
+ type: cloud_storage
11
+
12
+ storage:
13
+ provider: s3
14
+ bucket: my-bucket-name
15
+
16
+ # Single object
17
+ path: data/input.json
18
+
19
+ # Or prefix for multiple objects (uncomment)
20
+ # prefix: data/inputs/
21
+ # pattern: "*.json" # Filter by pattern
22
+
23
+ # Credentials (optional - uses AWS credential chain if omitted)
24
+ # credentials:
25
+ # aws_access_key_id: ${AWS_ACCESS_KEY_ID}
26
+ # aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY}
27
+ # region: us-east-1
28
+
29
+ format: json
30
+ batch_size: 1000
@@ -0,0 +1,34 @@
1
+ # Template: Extract from Database
2
+ # Copy to your pipeline directory as extract.yaml
3
+ #
4
+ # Supported databases: PostgreSQL, MySQL, SQLite, MSSQL, Oracle
5
+
6
+ title: database_extraction
7
+ description: Extract data from SQL database
8
+ version: "1.0.0"
9
+
10
+ # Source type (required)
11
+ type: database
12
+
13
+ # SQL query to execute
14
+ query: |
15
+ SELECT id, name, email, created_at
16
+ FROM users
17
+ WHERE status = :status
18
+ ORDER BY created_at DESC
19
+ LIMIT :limit
20
+
21
+ # Query parameters (substituted into :param placeholders)
22
+ params:
23
+ status: active
24
+ limit: 10000
25
+
26
+ # Database connection
27
+ connection_string: ${DATABASE_URL:?DATABASE_URL is required}
28
+ # Formats:
29
+ # PostgreSQL: postgresql://user:pass@host:5432/db
30
+ # MySQL: mysql://user:pass@host:3306/db
31
+ # SQLite: sqlite:///./data/local.db
32
+ # MSSQL: mssql+pyodbc://user:pass@host/db?driver=ODBC+Driver+17+for+SQL+Server
33
+
34
+ batch_size: 1000
@@ -0,0 +1,40 @@
1
+ # Template: Extract from Database via SSH Tunnel
2
+ # Copy to your pipeline directory as extract.yaml
3
+ # Use when database is only accessible through a bastion/jump host
4
+
5
+ title: database_ssh_extraction
6
+ description: Extract from database through SSH tunnel
7
+ version: "1.0.0"
8
+
9
+ # Source type (required)
10
+ type: database
11
+
12
+ query: |
13
+ SELECT id, name, email, created_at
14
+ FROM users
15
+ WHERE status = :status
16
+
17
+ params:
18
+ status: active
19
+
20
+ # Database connection (use localhost since tunnel forwards)
21
+ connection_string: postgresql://user:pass@localhost:5433/database
22
+
23
+ # SSH tunnel configuration
24
+ ssh_tunnel:
25
+ enabled: true
26
+ host: bastion.example.com
27
+ port: 22
28
+ username: ${SSH_USER}
29
+ key_file: ~/.ssh/id_rsa
30
+ # Or use password:
31
+ # password: ${SSH_PASSWORD}
32
+
33
+ # Remote database endpoint (the actual database host)
34
+ remote_host: db.internal.example.com
35
+ remote_port: 5432
36
+
37
+ # Local port for tunnel (connect to this)
38
+ local_port: 5433
39
+
40
+ batch_size: 1000
@@ -0,0 +1,21 @@
1
+ # Template: Extract from CSV File
2
+ # Copy to your pipeline directory as extract.yaml
3
+
4
+ title: csv_file_extraction
5
+ description: Extract data from CSV file
6
+ version: "1.0.0"
7
+
8
+ # Source type (required)
9
+ type: file
10
+
11
+ path: ./data/input.csv
12
+ format: csv
13
+ batch_size: 1000
14
+
15
+ # CSV-specific options
16
+ csv_options:
17
+ delimiter: ","
18
+ quotechar: '"'
19
+ has_header: true
20
+ # encoding: utf-8
21
+ # skip_rows: 0
@@ -0,0 +1,25 @@
1
+ # Template: Extract from Multiple Files (Glob Pattern)
2
+ # Copy to your pipeline directory as extract.yaml
3
+
4
+ title: glob_file_extraction
5
+ description: Extract from multiple files matching pattern
6
+ version: "1.0.0"
7
+
8
+ # Source type (required)
9
+ type: file
10
+
11
+ # Glob pattern to match files
12
+ path: ./data/inputs/*.json
13
+ # Examples:
14
+ # ./data/*.csv - All CSV files in data/
15
+ # ./data/**/*.json - All JSON files recursively
16
+ # ./data/2024-*.parquet - Parquet files starting with 2024-
17
+
18
+ # Format (auto-detected from first file if omitted)
19
+ format: json
20
+
21
+ batch_size: 1000
22
+
23
+ # Processing options
24
+ # sort_files: true # Process files in sorted order
25
+ # fail_on_empty: false # Don't fail if no files match
@@ -0,0 +1,24 @@
1
+ # Template: Extract from JSON File
2
+ # Copy to your pipeline directory as extract.yaml
3
+
4
+ title: json_file_extraction
5
+ description: Extract data from JSON file
6
+ version: "1.0.0"
7
+
8
+ # Source type (required)
9
+ type: file
10
+
11
+ # File path
12
+ path: ./data/input.json
13
+ # Or use environment variable:
14
+ # path: ${INPUT_DIR:-./data}/input.json
15
+
16
+ # Format (auto-detected from extension if omitted)
17
+ format: json
18
+
19
+ # Batch size for yielding records
20
+ batch_size: 1000
21
+
22
+ # For JSON files containing an object with data array:
23
+ # response_path: data.items
24
+ # response_path: results
@@ -0,0 +1,20 @@
1
+ # Template: Extract from Parquet File
2
+ # Copy to your pipeline directory as extract.yaml
3
+ # Requires: pip install pyarrow or pip install fastparquet
4
+
5
+ title: parquet_file_extraction
6
+ description: Extract data from Parquet file
7
+ version: "1.0.0"
8
+
9
+ # Source type (required)
10
+ type: file
11
+
12
+ path: ./data/input.parquet
13
+ format: parquet
14
+ batch_size: 1000
15
+
16
+ # Optional: select specific columns
17
+ # columns:
18
+ # - id
19
+ # - name
20
+ # - created_at