jerry-thomas 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. datapipeline/analysis/vector/collector.py +120 -17
  2. datapipeline/analysis/vector/matrix.py +33 -8
  3. datapipeline/analysis/vector/report.py +162 -32
  4. datapipeline/build/tasks/__init__.py +11 -0
  5. datapipeline/build/tasks/config.py +74 -0
  6. datapipeline/build/tasks/metadata.py +170 -0
  7. datapipeline/build/tasks/scaler.py +73 -0
  8. datapipeline/build/tasks/schema.py +60 -0
  9. datapipeline/build/tasks/utils.py +169 -0
  10. datapipeline/cli/app.py +304 -127
  11. datapipeline/cli/commands/build.py +240 -16
  12. datapipeline/cli/commands/contract.py +367 -0
  13. datapipeline/cli/commands/domain.py +8 -3
  14. datapipeline/cli/commands/inspect.py +401 -149
  15. datapipeline/cli/commands/list_.py +30 -7
  16. datapipeline/cli/commands/plugin.py +5 -1
  17. datapipeline/cli/commands/run.py +227 -241
  18. datapipeline/cli/commands/run_config.py +101 -0
  19. datapipeline/cli/commands/serve_pipeline.py +156 -0
  20. datapipeline/cli/commands/source.py +44 -8
  21. datapipeline/cli/visuals/__init__.py +4 -2
  22. datapipeline/cli/visuals/common.py +239 -0
  23. datapipeline/cli/visuals/labels.py +15 -15
  24. datapipeline/cli/visuals/runner.py +66 -0
  25. datapipeline/cli/visuals/sections.py +20 -0
  26. datapipeline/cli/visuals/sources.py +132 -119
  27. datapipeline/cli/visuals/sources_basic.py +260 -0
  28. datapipeline/cli/visuals/sources_off.py +76 -0
  29. datapipeline/cli/visuals/sources_rich.py +414 -0
  30. datapipeline/config/catalog.py +37 -3
  31. datapipeline/config/context.py +214 -0
  32. datapipeline/config/dataset/loader.py +21 -4
  33. datapipeline/config/dataset/normalize.py +4 -4
  34. datapipeline/config/metadata.py +43 -0
  35. datapipeline/config/postprocess.py +2 -2
  36. datapipeline/config/project.py +3 -2
  37. datapipeline/config/resolution.py +129 -0
  38. datapipeline/config/tasks.py +309 -0
  39. datapipeline/config/workspace.py +155 -0
  40. datapipeline/domain/__init__.py +12 -0
  41. datapipeline/domain/record.py +11 -0
  42. datapipeline/domain/sample.py +54 -0
  43. datapipeline/integrations/ml/adapter.py +34 -20
  44. datapipeline/integrations/ml/pandas_support.py +0 -2
  45. datapipeline/integrations/ml/rows.py +1 -6
  46. datapipeline/integrations/ml/torch_support.py +1 -3
  47. datapipeline/io/factory.py +112 -0
  48. datapipeline/io/output.py +132 -0
  49. datapipeline/io/protocols.py +21 -0
  50. datapipeline/io/serializers.py +219 -0
  51. datapipeline/io/sinks/__init__.py +23 -0
  52. datapipeline/io/sinks/base.py +2 -0
  53. datapipeline/io/sinks/files.py +79 -0
  54. datapipeline/io/sinks/rich.py +57 -0
  55. datapipeline/io/sinks/stdout.py +18 -0
  56. datapipeline/io/writers/__init__.py +14 -0
  57. datapipeline/io/writers/base.py +28 -0
  58. datapipeline/io/writers/csv_writer.py +25 -0
  59. datapipeline/io/writers/jsonl.py +52 -0
  60. datapipeline/io/writers/pickle_writer.py +30 -0
  61. datapipeline/pipeline/artifacts.py +58 -0
  62. datapipeline/pipeline/context.py +66 -7
  63. datapipeline/pipeline/observability.py +65 -0
  64. datapipeline/pipeline/pipelines.py +65 -13
  65. datapipeline/pipeline/split.py +11 -10
  66. datapipeline/pipeline/stages.py +127 -16
  67. datapipeline/pipeline/utils/keygen.py +20 -7
  68. datapipeline/pipeline/utils/memory_sort.py +22 -10
  69. datapipeline/pipeline/utils/transform_utils.py +22 -0
  70. datapipeline/runtime.py +5 -2
  71. datapipeline/services/artifacts.py +12 -6
  72. datapipeline/services/bootstrap/config.py +25 -0
  73. datapipeline/services/bootstrap/core.py +52 -37
  74. datapipeline/services/constants.py +6 -5
  75. datapipeline/services/factories.py +123 -1
  76. datapipeline/services/project_paths.py +43 -16
  77. datapipeline/services/runs.py +208 -0
  78. datapipeline/services/scaffold/domain.py +3 -2
  79. datapipeline/services/scaffold/filter.py +3 -2
  80. datapipeline/services/scaffold/mappers.py +9 -6
  81. datapipeline/services/scaffold/plugin.py +54 -10
  82. datapipeline/services/scaffold/source.py +93 -56
  83. datapipeline/sources/{composed_loader.py → data_loader.py} +9 -9
  84. datapipeline/sources/decoders.py +83 -18
  85. datapipeline/sources/factory.py +26 -16
  86. datapipeline/sources/models/__init__.py +2 -2
  87. datapipeline/sources/models/generator.py +0 -7
  88. datapipeline/sources/models/loader.py +3 -3
  89. datapipeline/sources/models/parsing_error.py +24 -0
  90. datapipeline/sources/models/source.py +6 -6
  91. datapipeline/sources/synthetic/time/loader.py +14 -2
  92. datapipeline/sources/transports.py +74 -37
  93. datapipeline/templates/plugin_skeleton/README.md +76 -30
  94. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +31 -0
  95. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +30 -0
  96. datapipeline/templates/plugin_skeleton/example/dataset.yaml +18 -0
  97. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +29 -0
  98. datapipeline/templates/plugin_skeleton/{config/datasets/default → example}/project.yaml +11 -8
  99. datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml +12 -0
  100. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +3 -0
  101. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +9 -0
  102. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +2 -0
  103. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +4 -0
  104. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +28 -0
  105. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +4 -0
  106. datapipeline/templates/plugin_skeleton/jerry.yaml +34 -0
  107. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +31 -0
  108. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +30 -0
  109. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +18 -0
  110. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +29 -0
  111. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +22 -0
  112. datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +12 -0
  113. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +3 -0
  114. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +9 -0
  115. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +2 -0
  116. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +4 -0
  117. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +28 -0
  118. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +4 -0
  119. datapipeline/templates/stubs/dto.py.j2 +2 -0
  120. datapipeline/templates/stubs/mapper.py.j2 +5 -4
  121. datapipeline/templates/stubs/parser.py.j2 +2 -0
  122. datapipeline/templates/stubs/record.py.j2 +2 -0
  123. datapipeline/templates/stubs/source.yaml.j2 +2 -3
  124. datapipeline/transforms/debug/lint.py +26 -41
  125. datapipeline/transforms/feature/scaler.py +89 -13
  126. datapipeline/transforms/record/floor_time.py +4 -4
  127. datapipeline/transforms/sequence.py +2 -35
  128. datapipeline/transforms/stream/dedupe.py +24 -0
  129. datapipeline/transforms/stream/ensure_ticks.py +7 -6
  130. datapipeline/transforms/vector/__init__.py +5 -0
  131. datapipeline/transforms/vector/common.py +98 -0
  132. datapipeline/transforms/vector/drop/__init__.py +4 -0
  133. datapipeline/transforms/vector/drop/horizontal.py +79 -0
  134. datapipeline/transforms/vector/drop/orchestrator.py +59 -0
  135. datapipeline/transforms/vector/drop/vertical.py +182 -0
  136. datapipeline/transforms/vector/ensure_schema.py +184 -0
  137. datapipeline/transforms/vector/fill.py +87 -0
  138. datapipeline/transforms/vector/replace.py +62 -0
  139. datapipeline/utils/load.py +24 -3
  140. datapipeline/utils/rich_compat.py +38 -0
  141. datapipeline/utils/window.py +76 -0
  142. jerry_thomas-1.0.1.dist-info/METADATA +825 -0
  143. jerry_thomas-1.0.1.dist-info/RECORD +199 -0
  144. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/entry_points.txt +9 -8
  145. datapipeline/build/tasks.py +0 -186
  146. datapipeline/cli/commands/link.py +0 -128
  147. datapipeline/cli/commands/writers.py +0 -138
  148. datapipeline/config/build.py +0 -64
  149. datapipeline/config/run.py +0 -116
  150. datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml +0 -24
  151. datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml +0 -23
  152. datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +0 -9
  153. datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -14
  154. datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +0 -13
  155. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +0 -10
  156. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +0 -10
  157. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +0 -10
  158. datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml +0 -11
  159. datapipeline/transforms/vector.py +0 -210
  160. jerry_thomas-0.3.0.dist-info/METADATA +0 -502
  161. jerry_thomas-0.3.0.dist-info/RECORD +0 -139
  162. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/WHEEL +0 -0
  163. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/licenses/LICENSE +0 -0
  164. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,199 @@
1
+ datapipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ datapipeline/plugins.py,sha256=Y0QfI313t5_w_m1ayQVEuac3lJ4YR_OSIYZol35ZOTk,838
3
+ datapipeline/runtime.py,sha256=yfSlQaq9OdjVVuqRtWzxLdw1ku4boZoONfCYQIMfe3A,2622
4
+ datapipeline/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ datapipeline/analysis/vector_analyzer.py,sha256=D6eDW0H55QGnWiULXJEirMjw6MeBcwjJ4zfS7M6tx98,175
6
+ datapipeline/analysis/vector/collector.py,sha256=J-a42GiTqjGlhXFfz3LCa2QzeWqRTOoaGE4E_HGKhOs,14762
7
+ datapipeline/analysis/vector/matrix.py,sha256=pzaMmEMD09cOlRbBzuD0Lgc91dtKnCdCkJAXtkcCOPc,18469
8
+ datapipeline/analysis/vector/report.py,sha256=OsSarYX4-CWwdKI3b35qBqrhHxShavFS9CeYKZ7WTRs,16709
9
+ datapipeline/build/__init__.py,sha256=XbuHhJzIRTNnOOJMYa_BHiA0P8yPrEk4tuVXnD2NQbI,109
10
+ datapipeline/build/state.py,sha256=XsL2CtQl7x80OpE1SJE42D-ig3lBMPr7_HSlpq5xwU4,1826
11
+ datapipeline/build/tasks/__init__.py,sha256=-HRDBwLY9eLsKxjDzsC-E10y_ytfhOs9yXVA_Nyt4_o,319
12
+ datapipeline/build/tasks/config.py,sha256=8cuX5nEWFesWoZHWfeTNMlXqsE_dsPgYf6x2eyz1l0c,2312
13
+ datapipeline/build/tasks/metadata.py,sha256=3eHI1vBRwm-fT342gu1wgj7oXNXKZ94D30wkdAVA7kM,5783
14
+ datapipeline/build/tasks/scaler.py,sha256=knJbdeGdDvYZ4O15ra4mnVkmLZbRZbCdJdjfELe_LnU,2554
15
+ datapipeline/build/tasks/schema.py,sha256=kJnAlD_Z8Pd_c9kJH5bDVixB_Vi_mBkKWpjW7eZru1s,2163
16
+ datapipeline/build/tasks/utils.py,sha256=iFMJ8hWk1iRnQ1bUz0huiKlWyevt3x0g8Vh6PNzlMU8,6335
17
+ datapipeline/cli/app.py,sha256=yjaKjEbQbDUAvigU5D8Q2UPbKFRs9l8bHVIG-56Nrsc,24056
18
+ datapipeline/cli/commands/build.py,sha256=OPJ-r3WWAzWwa1wHK0zxnQhuM_1h1mSfNpPRl_Bqrf8,8979
19
+ datapipeline/cli/commands/contract.py,sha256=bPWhWZgdnkk_Ajlm9zUJrvZ3SlyXVqBxx-IJV1zZ5kM,14953
20
+ datapipeline/cli/commands/domain.py,sha256=JdOMlfpZP996kuNGePdjCGMKrqezo-cX8lhlOfd9F44,479
21
+ datapipeline/cli/commands/filter.py,sha256=vhoCIETJNUJmiI37ZdBNaeJAm6O4AU_tveJxVj47S8A,307
22
+ datapipeline/cli/commands/inspect.py,sha256=aatn_olRcFaLyya6r2QMlzzAzlbguEtQ7mKRxoEOFAA,16066
23
+ datapipeline/cli/commands/list_.py,sha256=m9o_exiiC_aiQXsR4lZv_QmN1hfpSNq4ICvYLgiS2e8,1605
24
+ datapipeline/cli/commands/plugin.py,sha256=jUMBrxLw0QX61a2wf7rRGAWFg42xLZkI-C-HyUHiob4,427
25
+ datapipeline/cli/commands/run.py,sha256=TmbyggYOlF972oxwLhh-r27ggeWARg0_WfCMQJAudS8,8348
26
+ datapipeline/cli/commands/run_config.py,sha256=zeXCuDz1ez6Zd6Tq2N0S-YIPs1ZQ8U3fN3lvvd56108,3194
27
+ datapipeline/cli/commands/serve_pipeline.py,sha256=7i1HbuFIbYKkM-aQ2BrDN57K1kFv6NJ4EAN6NOz4aFE,5036
28
+ datapipeline/cli/commands/source.py,sha256=OyDOZm93Lbj6avbAefQPWX87WfRWE1phCPVPB-dNVc4,2073
29
+ datapipeline/cli/visuals/__init__.py,sha256=CUxCoMoU96FQonq6V_i_HBUwuwoWjML5X-_MZDF_i8M,371
30
+ datapipeline/cli/visuals/common.py,sha256=p66-3WBMfl7_3UVIfsrkXnzpE9BsHinpmkHWOZaK00c,8173
31
+ datapipeline/cli/visuals/labels.py,sha256=oK1PpgMoGhlwfyTqiXuaaDm65gDYqv9R-Ac8NqYPhHE,2680
32
+ datapipeline/cli/visuals/runner.py,sha256=GtQcFjmYfVuNMmmp3uoJo0sXJOGeCoN6EOUUB6kzWSU,2085
33
+ datapipeline/cli/visuals/sections.py,sha256=ZK02cjxd5FJAF-IJXqj9loWSrlWwMfYJlbsCfmLBJ5A,614
34
+ datapipeline/cli/visuals/sources.py,sha256=m0nNmRSlSNWTyGj_MF3PS_m9hXKtJv63bP_9-SKn0Xc,5144
35
+ datapipeline/cli/visuals/sources_basic.py,sha256=1-1JJ77AdiQv0iC4qpvb71cICmheInusIJFCc3csWLs,9086
36
+ datapipeline/cli/visuals/sources_off.py,sha256=8nWuwsj-XBsjcfZ9FBpuxC3vhC6mPPObjVsh1EUparM,2651
37
+ datapipeline/cli/visuals/sources_rich.py,sha256=W2ziT0Sb0QKhEbEjPYVUMY0TOrOhey0X3LXjNSwQHLg,16577
38
+ datapipeline/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ datapipeline/config/catalog.py,sha256=2eMwiCEof3WWKASfhxLNaZHLHCqXBoUtdJ1LvHUT74Y,2552
40
+ datapipeline/config/context.py,sha256=JzkhFHeSYuyID5L2FPSinA3S0Gx7BNsXXfy_adAjNHI,6857
41
+ datapipeline/config/metadata.py,sha256=Vckaz6P7_MparAR3IOWR5K5S5mqqul2F6wEg3DWMyzw,1309
42
+ datapipeline/config/postprocess.py,sha256=67ukmtGNHFmZd8bv-POWOE_HjE5T5AXQaOypCZG1d-o,361
43
+ datapipeline/config/project.py,sha256=nc2n9g-02pOmuErzHHxJasP4wn04rB7rXqIUieYg_vQ,997
44
+ datapipeline/config/resolution.py,sha256=syS9fLWNl17NrA-GPpAk3pbFtL4I7hQDJdfWaeldqXs,3477
45
+ datapipeline/config/split.py,sha256=VFYRF6Fz5xLTqqxIt3RVGB4kwlnHH8CxjOddEAJYG5Q,1048
46
+ datapipeline/config/tasks.py,sha256=qRHV6vEOWgpfknSplfV7RbCAdcIsAVcfk2NZU92zRsE,9777
47
+ datapipeline/config/workspace.py,sha256=mcZxnEtV2rNkb7TeeE7P_C-LIgQ27e10CawW1w4H7w0,5131
48
+ datapipeline/config/dataset/dataset.py,sha256=Q9cb5QoDtyPb4pbD9mSTZcJmXQhdEWwDLS52xKAcqXg,562
49
+ datapipeline/config/dataset/feature.py,sha256=2Hxz0FXZskLI4ICXhmlG6b1Vvxzh0Ql9e6BwjMRtzSs,346
50
+ datapipeline/config/dataset/loader.py,sha256=Eh_F56o242ptEIsR22kC7HkNi6SggpRf10Gpfc3ipTo,1063
51
+ datapipeline/config/dataset/normalize.py,sha256=5IFGYhRrJ4JMWLLy-qRc6W8p6FZr8T-Fz2FCxp6MHCA,803
52
+ datapipeline/domain/__init__.py,sha256=rfZZpfvozmQNKhBabzgC9g4urMbchjDXbbl54sNtxZQ,262
53
+ datapipeline/domain/feature.py,sha256=7BOI4H458BKU8B9vqdfez7WOO1YKiF6lt0oy7PMbqrQ,295
54
+ datapipeline/domain/record.py,sha256=VY2vxpVJGpn0sztI5mvD0oPdxy5auLJ8huKUR1VCgZA,1062
55
+ datapipeline/domain/sample.py,sha256=yjmxPJmjHwrw9xJR5hxFr1XKelpJEOZI7XrLlqsrzy4,1558
56
+ datapipeline/domain/vector.py,sha256=apK1iu7tca2k2xgNGJAAQfKhirno7ZKZ3pDheKf9euM,1041
57
+ datapipeline/filters/filters.py,sha256=dM6U-QpGCQQ4-CMBTJgWZp2zH2TVTk6uYOqGPC5NBCY,2649
58
+ datapipeline/integrations/__init__.py,sha256=tjTLsIa6NRWKI05wjwPAUuXozDA-gP98SccFJ9lYHs8,410
59
+ datapipeline/integrations/ml/__init__.py,sha256=oflJXnjQEn1Zv0Vho10mc2y3D6UkKusNZwE5yUtatb8,463
60
+ datapipeline/integrations/ml/adapter.py,sha256=X9UGbNev4eN-KhL8KAaSPKLrpkrgJA3c0sTgFRDAMv0,4591
61
+ datapipeline/integrations/ml/pandas_support.py,sha256=HEX-Dx9RG17uCKSiZ7M4gMCoZbMQTa_3xhlC0s6bIYM,1229
62
+ datapipeline/integrations/ml/rows.py,sha256=OhziMyP6uvFFErYYJkaQkaXQ4oX-jAXnIDazLUfhP5A,1995
63
+ datapipeline/integrations/ml/torch_support.py,sha256=RCQaOCaggddtAo67-ThkdX_GgVJyLCHGZs3YyOszF04,2703
64
+ datapipeline/io/factory.py,sha256=xChYRxe1SRxHj8SXNirPEi2J20AOH3968yN92BRykr8,3903
65
+ datapipeline/io/output.py,sha256=tbtE4iJTDNtQcburA6W25eGH0gX-hAoVMLas5slkVAA,4003
66
+ datapipeline/io/protocols.py,sha256=vHjXhuV2r1Lo7k8SJuPH0WL2EXG_nm3DBpSowobUZ2U,512
67
+ datapipeline/io/serializers.py,sha256=5g59YwEL4-FT2r5kSj3UoQCKFkn-va2EMA8z79YzA18,6380
68
+ datapipeline/io/sinks/__init__.py,sha256=7l-LmJAjuNrQZWMDFMXdjbZQ4Pq-iWMaN_3GcUvWntw,517
69
+ datapipeline/io/sinks/base.py,sha256=cXG6VXop0RVL1K4xpSaFq1scylhb6N6dsg6UMrQGw54,49
70
+ datapipeline/io/sinks/files.py,sha256=UgXXj8NxjvdOrwpJt5YNTgG1gW89YYCVpVkSg1eGgKI,1975
71
+ datapipeline/io/sinks/rich.py,sha256=hZNMttsqaMSUsQmCu6kubzkYbUGDTbTYBYnDwcFsEp0,1486
72
+ datapipeline/io/sinks/stdout.py,sha256=64VUdf_YghxTCjVyYcpBQpC_Pt5rPQrYejRg_0_cF7A,382
73
+ datapipeline/io/writers/__init__.py,sha256=V8228IYVxP4ay6yG8HF_ukBDseAERrqlWC4gbGDBmoc,397
74
+ datapipeline/io/writers/base.py,sha256=kUaFv6XOoUjYw5pE7XFUel5ptdEhuY03VTqajortUZY,814
75
+ datapipeline/io/writers/csv_writer.py,sha256=FL2qiS8Hr273lGcN6pQXGOSufcM06ApVZnPmNhuAwjQ,833
76
+ datapipeline/io/writers/jsonl.py,sha256=SP2yPgH4B_Xrr7GJFVVIsTxarNKAFbEB-0RS1F0fD-g,1736
77
+ datapipeline/io/writers/pickle_writer.py,sha256=omXSeGbrcSWwNBodwJNCBok0mW167xciT5S8w_w5xCo,928
78
+ datapipeline/mappers/noop.py,sha256=L8bH1QVbLH-ogIam0ppYdx7KuWQ7Dj44lvD8tvNlY0Q,111
79
+ datapipeline/mappers/synthetic/time.py,sha256=lt1pC0May6Y4E8bZO4sERm3D04_r-qv63Y5fwrtCaBQ,639
80
+ datapipeline/parsers/identity.py,sha256=pdGuz0SSQGfySPpvZSnLgfTXTkC36x-7dQMMei3XhsU,321
81
+ datapipeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
+ datapipeline/pipeline/artifacts.py,sha256=mD31N-tlFR3EePVHNaxyA3Diiqab9Kyc0Gh6jCX_z-g,1492
83
+ datapipeline/pipeline/context.py,sha256=-W8QvGm32QGmBziEuzl-BitscuxGPb9bgQYDhRC1tkc,4377
84
+ datapipeline/pipeline/observability.py,sha256=y5LWgY3vjlhA5paslWVkjtMjur8yAGXjFhYNrfuJUNg,2043
85
+ datapipeline/pipeline/pipelines.py,sha256=Ilys2Cyqee5kHQ_gTIwWr4UZNDvyhU_xxUhBj42b5yI,4274
86
+ datapipeline/pipeline/split.py,sha256=TCzOhd8PF81IcUzUdPSz0hs3pIHi9V4IhXbSY2ZHK3Q,6090
87
+ datapipeline/pipeline/stages.py,sha256=yWl7nCJt_kOh9VVLgM6fDFM0Ajgh0GCwtvA-gSDRHTs,9493
88
+ datapipeline/pipeline/utils/keygen.py,sha256=v2JJagJAE9iYfLtbl4uxoAEXZN_ALH0xdHhPDhNfKwU,1909
89
+ datapipeline/pipeline/utils/memory_sort.py,sha256=hS61n2CeIITRqffE1ftvn2IdqQp1IXYhuN4MJqncKvk,1155
90
+ datapipeline/pipeline/utils/ordering.py,sha256=ZX18I7GFtvyMFJB96vWQpTOGwljjeJ6ppCg5a3Av3es,1450
91
+ datapipeline/pipeline/utils/transform_utils.py,sha256=q4bxQ0NFC4G7IeRSSL4ZzQ7vvVkxAnovflhEtfVUXyU,4221
92
+ datapipeline/registries/registry.py,sha256=MWWOHz2wT1oHQmovodtEreEuQhvH-i11Y2yXUUgZJhQ,641
93
+ datapipeline/services/artifacts.py,sha256=5mqNs5G53RqOYlMGvF0-_ZZA1M8mMMUXip1HuFhckjI,2930
94
+ datapipeline/services/constants.py,sha256=OVUqBBDkpl-A_f71uT8QUwd_50fgN1pA6uL4Yv8ZpUE,517
95
+ datapipeline/services/entrypoints.py,sha256=NKcSbhGRtBLQXGf-TdujwbVSRH1zb5J-S2jxFPnk6HQ,2504
96
+ datapipeline/services/factories.py,sha256=4Udq2LBRHNJmBTZiXrbqmV6PVbesg2c5Nndh6CpYDnE,6011
97
+ datapipeline/services/paths.py,sha256=xHxos62Y2gjhLggrnrmRqPiLMseK10OX17NJjnVk8wE,966
98
+ datapipeline/services/project_paths.py,sha256=qWM5WN0aKB4KwkwXgZQywBFPu1Cfh9mUiAQZghRwNOs,4264
99
+ datapipeline/services/runs.py,sha256=_xcrgZXb3sFfRN1ohvTLicQHq7_33g62SCt_JXCOzqE,6185
100
+ datapipeline/services/bootstrap/__init__.py,sha256=Mc2w2S69kU1hnzCvsGMhFqyNoNMXPwQtxprAkGN-sYE,245
101
+ datapipeline/services/bootstrap/config.py,sha256=122JNE7gZF1mohAI1gvX8H6i0JTql_Mm9bWcTpoD77c,4936
102
+ datapipeline/services/bootstrap/core.py,sha256=7iWf05GRdIOvgEHKOoq5NUFAMNhkd_kTe3zunkJqOHw,7394
103
+ datapipeline/services/scaffold/__init__.py,sha256=PaQNtYki9Kc7mQPnUtKDPU-rKohLHoXLvFVwdHdbXNM,68
104
+ datapipeline/services/scaffold/domain.py,sha256=mww7HhZ1ZepNvn2tHczpLZH0y3Ej7vgDGVLepFkTgIY,946
105
+ datapipeline/services/scaffold/filter.py,sha256=EwLFeI3cRoHw-hYE3jlLfqV0DKk9Z8EnWyVymJmOppA,1084
106
+ datapipeline/services/scaffold/mappers.py,sha256=kkkJ-UB51B2yawRoUst3CGExn3gRYPm5d_3kbujPVMQ,1960
107
+ datapipeline/services/scaffold/plugin.py,sha256=Eecb-OUJsn-upsq2vC88JYOyYTRbrIxsPu4yYorziqs,3407
108
+ datapipeline/services/scaffold/source.py,sha256=w0w-oID9x48eMV6c0FhqW1myAWh_ELYmFojsdfVlaTk,6649
109
+ datapipeline/services/scaffold/templates.py,sha256=B3YnZpFUZLynijJosTNxZQLXnPP_Y_t1RHqfI1lGOxU,634
110
+ datapipeline/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
+ datapipeline/sources/data_loader.py,sha256=q-gAQDOkMwIvY5b-T0j6P_Bj716R5I7jIoyHOleTB2s,1358
112
+ datapipeline/sources/decoders.py,sha256=yH4uVDg0Hh6sUpk04W3u1dtJi_Xv-vPhvlMEIGs7-zs,3984
113
+ datapipeline/sources/factory.py,sha256=UkcrycyodBBGt7Q5EO8EirH4obnOQH8SqrFedjMEIR0,2410
114
+ datapipeline/sources/transports.py,sha256=o32uvCRWps-voresZ2gGwhXTiRpmDIWdyM_IE2zY0H8,3308
115
+ datapipeline/sources/models/__init__.py,sha256=_DVhnet2HMvw-H-UEFQeEXCwro6Qg1ws0iBgMSKbBbM,399
116
+ datapipeline/sources/models/base.py,sha256=MAUawd11fII-mxxuSPM4f6H1t1tbyZX_QWhoAgeYUcU,238
117
+ datapipeline/sources/models/generator.py,sha256=OTJEcbpRp6pPZyG_8sds2x-15LF-SvAR5yblivG1E2g,508
118
+ datapipeline/sources/models/loader.py,sha256=VMWfEzrBvKdtRPjixPbttTochO3IULdglJ01769310E,1028
119
+ datapipeline/sources/models/parser.py,sha256=Ts31aksHLDCw5ovF2D99w9g_j-NnEiZ8x0JHtUxmmXs,226
120
+ datapipeline/sources/models/parsing_error.py,sha256=41pmauyqNK75Hke-rauRRNc-UveNXt8czxCViyZidvs,734
121
+ datapipeline/sources/models/source.py,sha256=lcAcbwM-HrMVO3uEWTpbvqY42g74JZWKD-KJ89Lsjys,913
122
+ datapipeline/sources/models/synthetic.py,sha256=FLF2Jvdc06VCriTCliThuQTUXd6NrXIQpksIL8gBIH8,288
123
+ datapipeline/sources/synthetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
+ datapipeline/sources/synthetic/time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
+ datapipeline/sources/synthetic/time/loader.py,sha256=X_NQJFAHL8wHV5TxbLhRwqGfFJPOw6qVToBkKFD3r_k,2003
126
+ datapipeline/sources/synthetic/time/parser.py,sha256=d3GZMQ7L1Qi4LeEm7U3y0_pk0RdhskioQukYyqyoqic,343
127
+ datapipeline/templates/plugin_skeleton/README.md,sha256=L1FvFQrubqbnHbK8qB_Hz38JeYKlTQu5rjqDfhIVANw,7652
128
+ datapipeline/templates/plugin_skeleton/jerry.yaml,sha256=TYfZl0_HMnfj6F3oaSWq25Vb1BZlpILIIMU1N0U4L28,1343
129
+ datapipeline/templates/plugin_skeleton/pyproject.toml,sha256=0lmO5Aia9tB81Ez4SxP56DGisekx-palMmGCUzmAl4E,259
130
+ datapipeline/templates/plugin_skeleton/example/dataset.yaml,sha256=cSKk8IyoJebdc9b959Sw7gDfBXl2BT8hktyZ4Z43Nog,471
131
+ datapipeline/templates/plugin_skeleton/example/postprocess.yaml,sha256=yUYr5c6YtBeF_rm_ENsOMkn_sOAChzbqhL98WKr0CRw,710
132
+ datapipeline/templates/plugin_skeleton/example/project.yaml,sha256=4WyrlBCZZ47ceOw9nV8QgqU7_jH5Fu8-z4SJGKbPYK8,845
133
+ datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml,sha256=oHesyqPHQ6KYJeVNxZlB75Pw0BTezr2U9IhsdM6YQ7E,842
134
+ datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml,sha256=jtxL89bgdi84iUp4lcMUCZZGcxXsSmTaFOyfe5rxX-M,1579
135
+ datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml,sha256=bouMA0PdRETU67wkh8HTs7vzr0UkKcVXDjAoVChdHAc,210
136
+ datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml,sha256=IMeokmMOFwY1jrXauoFtTFV6gtdos9xFob7nCrqfkPA,104
137
+ datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml,sha256=xNl8JmJ4ogHtfq1jpNqMvH4GYWgxFJ0vMqB6XI3aM-g,237
138
+ datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml,sha256=Y5X3lOM-0FwKNjFLTeRtts6FZjSj2mLpsO4CS2GBafs,14
139
+ datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml,sha256=YXGYz3szsA92Qejdm3KdKYac3aFPPlnfnhipufGYzsA,35
140
+ datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml,sha256=9LFcjdkGYtz2WifCB-6avrvB-5TZUGBCJFQQbuFoQPk,910
141
+ datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml,sha256=ecV69-l6qQS2jIagh1SuehxfLdIBeR49uekhC6DB6EM,33
142
+ datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
+ datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml,sha256=cSKk8IyoJebdc9b959Sw7gDfBXl2BT8hktyZ4Z43Nog,471
144
+ datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml,sha256=yUYr5c6YtBeF_rm_ENsOMkn_sOAChzbqhL98WKr0CRw,710
145
+ datapipeline/templates/plugin_skeleton/your-dataset/project.yaml,sha256=u9LtgLt6OdLsPd7r4bmrujg87WX8NzQBNIvI3gs3QgQ,827
146
+ datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml,sha256=oHesyqPHQ6KYJeVNxZlB75Pw0BTezr2U9IhsdM6YQ7E,842
147
+ datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml,sha256=jtxL89bgdi84iUp4lcMUCZZGcxXsSmTaFOyfe5rxX-M,1579
148
+ datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml,sha256=bouMA0PdRETU67wkh8HTs7vzr0UkKcVXDjAoVChdHAc,210
149
+ datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml,sha256=IMeokmMOFwY1jrXauoFtTFV6gtdos9xFob7nCrqfkPA,104
150
+ datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml,sha256=xNl8JmJ4ogHtfq1jpNqMvH4GYWgxFJ0vMqB6XI3aM-g,237
151
+ datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml,sha256=Y5X3lOM-0FwKNjFLTeRtts6FZjSj2mLpsO4CS2GBafs,14
152
+ datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml,sha256=YXGYz3szsA92Qejdm3KdKYac3aFPPlnfnhipufGYzsA,35
153
+ datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml,sha256=9LFcjdkGYtz2WifCB-6avrvB-5TZUGBCJFQQbuFoQPk,910
154
+ datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml,sha256=ecV69-l6qQS2jIagh1SuehxfLdIBeR49uekhC6DB6EM,33
155
+ datapipeline/templates/stubs/dto.py.j2,sha256=MizqUzY4eGXiIHzGBovXoPHqhVno791Bi6PCGigVqww,908
156
+ datapipeline/templates/stubs/filter.py.j2,sha256=3LgRgAL_HRaENOOqQx8NdeM1AUy-T0rtHVTA7N2oWOs,466
157
+ datapipeline/templates/stubs/loader_synthetic.py.j2,sha256=9SQBeTBGlZmKs6nSYBKd8nbOPcFHgDx17Mh8xOEQnvs,1285
158
+ datapipeline/templates/stubs/mapper.py.j2,sha256=eonMmBgql-XFnxcQ5mRONyPCJShhQAp1jqYSF_1Pcvo,783
159
+ datapipeline/templates/stubs/parser.py.j2,sha256=Ie6ykkT4YTNlRTlbagleHnFukwewHRTq7C7Tbg_P_9Y,674
160
+ datapipeline/templates/stubs/parser_custom.py.j2,sha256=0Nytq43JdTZoyRj-4Mz6HWdMTmOP3VlFuYOB_A_13Vg,580
161
+ datapipeline/templates/stubs/record.py.j2,sha256=xiDMMbYmoReBy0KXRoFcd9FuUoLi9kYzlMFmFjdE4WE,662
162
+ datapipeline/templates/stubs/source.yaml.j2,sha256=pKxqYuJsD5TkVHjT4UrwWQ2RFc0JoL0w3YnZqZgf5J0,410
163
+ datapipeline/transforms/filter.py,sha256=Jt8wTEIqWqe34s7GVVekcR8OdRozs317sj7Uw08GNOA,1433
164
+ datapipeline/transforms/sequence.py,sha256=tZiqFB_aZdVji2uEaFkUyah8k4AYX9IxPMoLBbOCfYg,1579
165
+ datapipeline/transforms/utils.py,sha256=ts6dULY2Pc5fFs7AMd3goN4hDzQkv-6CDLdRH41lG9I,721
166
+ datapipeline/transforms/vector_utils.py,sha256=PcStTwRaaunONKZJuwv79bjdfaDcamLcwNLRHjZ5yXw,927
167
+ datapipeline/transforms/debug/identity.py,sha256=6bwnEYhMBYw0YPrMccrZPXDOQM4r_-odsKo8Hhpbz10,2515
168
+ datapipeline/transforms/debug/lint.py,sha256=v7aLig0y7K_Wqc4W37ZVfUmyLXOV8gcgDMBHm-QzUo0,3124
169
+ datapipeline/transforms/feature/model.py,sha256=gB-GP80_P7bzEKJFSM4leRke75yiD4-S5eJ1p8g3JU8,382
170
+ datapipeline/transforms/feature/scaler.py,sha256=-NRQCz_BUG5X7sg4adMXxZ-1AOAhdmdHET1uIkBPWR0,8305
171
+ datapipeline/transforms/record/floor_time.py,sha256=Nk_srdwNMuxqRCguxjvFKB7rfzMu1SB1pDYVh4cdV4Q,617
172
+ datapipeline/transforms/record/lag.py,sha256=5wrPyVNFvidvdQddnK6ZeUOI5I8rfXEbzIg6tzKiJu4,536
173
+ datapipeline/transforms/stream/dedupe.py,sha256=VyKI8hMcekBntjD3WjIBykMiPs8RNkxQSpd9SCwxihA,787
174
+ datapipeline/transforms/stream/ensure_ticks.py,sha256=hxnG3yHabt4HeOYjWyhMrIBxG1ZbG1uj8vEM4WtGEfA,1185
175
+ datapipeline/transforms/stream/fill.py,sha256=N_ybLUCvaMVvKsFP8-HcGuKqV9hXAnYmV7zyUB-Ugys,3500
176
+ datapipeline/transforms/stream/granularity.py,sha256=PzHDGDwyn8P07BCbcFZaorS_7lbAbEdMLqD9Wy61y0M,3376
177
+ datapipeline/transforms/vector/__init__.py,sha256=nKBaksXv_rBF2BUT-IFJTbbRBiwkpq6k39cLFS8CfXw,245
178
+ datapipeline/transforms/vector/common.py,sha256=24iX5EviN9BKi9-GJIty57LGOvEVD9PueZflyoGS5Nc,3616
179
+ datapipeline/transforms/vector/ensure_schema.py,sha256=AByCuHhnC7T4CWgk5oUC59-oom_LzC0aER1WBASEkDs,7240
180
+ datapipeline/transforms/vector/fill.py,sha256=1bWXbVABnyndv4O01cZN0oVS2-X_HVAEGUmwywazs_w,3065
181
+ datapipeline/transforms/vector/replace.py,sha256=gYTZx0CTkdSsuXUiAvP7dtcyEHEQv2UJeIEiowGDzhw,2019
182
+ datapipeline/transforms/vector/drop/__init__.py,sha256=NsGv9v7n13uPsRWGTSLKWPVaXocQ-zx9WE3Ez5hQt3U,151
183
+ datapipeline/transforms/vector/drop/horizontal.py,sha256=c6bsOaKrppEfBQoNGOaWLbKUHWPlLCo9jscBDYMvEp8,2567
184
+ datapipeline/transforms/vector/drop/orchestrator.py,sha256=smd3z9Oz5iEzkahButE5UsK1nPrSUw6B5CPxtfj2K8g,1986
185
+ datapipeline/transforms/vector/drop/vertical.py,sha256=5VBtilX48Pw6Fc1e0UGk6IwIUoPbkm98t2N7EPql1gA,6858
186
+ datapipeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
187
+ datapipeline/utils/load.py,sha256=FfW_UOD5NhxDg4DarRSzVbjGbv7An_bKK-IdUUOrFhs,1970
188
+ datapipeline/utils/paths.py,sha256=5Y5rhNbjTiybUHfq9VfRMJ4gUfN9UltonM-4MABEG8w,798
189
+ datapipeline/utils/pickle_model.py,sha256=Uyd4AajInyTUpWfSJDDEGLinXeQkHjQUNnyla0owtA4,854
190
+ datapipeline/utils/placeholders.py,sha256=epZQ7NifUWI7_7hZKGEkCBDOaMnN9LiqJdI2gvBAEgE,890
191
+ datapipeline/utils/rich_compat.py,sha256=4ZfR82gG1vAVUiILVINqcRReqoUiRPmQOlLLBXz-pC0,1166
192
+ datapipeline/utils/time.py,sha256=vOqa2arqwEqbDo-JWEhOFPMnI1E4Ib3i1L-Rt-cGH8c,1072
193
+ datapipeline/utils/window.py,sha256=J5CkEIdY5iZd1QY9wawmHpBXpCp2FzHOHXhjYTCZWl8,2576
194
+ jerry_thomas-1.0.1.dist-info/licenses/LICENSE,sha256=pkBMylAJF5yChHAkdxwFhEptLGx13i-XFEKh-Sh6DkM,1073
195
+ jerry_thomas-1.0.1.dist-info/METADATA,sha256=0Gm6Iwl-lnRDaDFMllduKj-S-4mJvrFGLcXpH9D86Yg,33507
196
+ jerry_thomas-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
197
+ jerry_thomas-1.0.1.dist-info/entry_points.txt,sha256=jsJFp_2aEEhKkL2I3Yc4yPSODy9ggDZmLeV75KPjb9A,1672
198
+ jerry_thomas-1.0.1.dist-info/top_level.txt,sha256=N8aoNPdPyHefODO4YAm7tqTaUcw0e8LDcqycFTf8TbM,13
199
+ jerry_thomas-1.0.1.dist-info/RECORD,,
@@ -2,16 +2,17 @@
2
2
  jerry = datapipeline.cli.app:main
3
3
 
4
4
  [datapipeline.loaders]
5
- composed.loader = datapipeline.sources.factory:build_loader
6
- synthetic.time = datapipeline.sources.synthetic.time.loader:make_time_loader
5
+ core.io = datapipeline.sources.factory:build_loader
6
+ core.synthetic.ticks = datapipeline.sources.synthetic.time.loader:make_time_loader
7
7
 
8
8
  [datapipeline.mappers]
9
9
  encode_time = datapipeline.mappers.synthetic.time:encode
10
+ identity = datapipeline.mappers.noop:identity
10
11
  time.synthetic = datapipeline.mappers.noop:identity
11
12
 
12
13
  [datapipeline.parsers]
14
+ core.synthetic.ticks = datapipeline.sources.synthetic.time.parser:TimeRowParser
13
15
  identity = datapipeline.parsers.identity:IdentityParser
14
- synthetic.time = datapipeline.sources.synthetic.time.parser:TimeRowParser
15
16
 
16
17
  [datapipeline.transforms.debug]
17
18
  identity = datapipeline.transforms.debug.identity:IdentityGuardTransform
@@ -27,13 +28,13 @@ floor_time = datapipeline.transforms.record.floor_time:floor_time
27
28
  lag = datapipeline.transforms.record.lag:apply_lag
28
29
 
29
30
  [datapipeline.transforms.stream]
30
- ensure_ticks = datapipeline.transforms.stream.ensure_ticks:ensure_ticks
31
+ dedupe = datapipeline.transforms.stream.dedupe:FeatureDeduplicateTransform
32
+ ensure_cadence = datapipeline.transforms.stream.ensure_ticks:ensure_cadence
31
33
  fill = datapipeline.transforms.stream.fill:FillTransformer
32
34
  granularity = datapipeline.transforms.stream.granularity:FeatureGranularityTransform
33
35
  lint = datapipeline.transforms.stream.lint:StreamLint
34
36
 
35
37
  [datapipeline.transforms.vector]
36
- drop_missing = datapipeline.transforms.vector:VectorDropMissingTransform
37
- fill_constant = datapipeline.transforms.vector:VectorFillConstantTransform
38
- fill_history = datapipeline.transforms.vector:VectorFillHistoryTransform
39
- fill_horizontal = datapipeline.transforms.vector:VectorFillAcrossPartitionsTransform
38
+ drop = datapipeline.transforms.vector:VectorDropTransform
39
+ fill = datapipeline.transforms.vector:VectorFillTransform
40
+ replace = datapipeline.transforms.vector:VectorReplaceTransform
@@ -1,186 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import hashlib
4
- from pathlib import Path
5
- from typing import Dict, Iterable, Iterator, Sequence, Tuple
6
-
7
- from datapipeline.config.build import BuildConfig
8
- from datapipeline.config.dataset.loader import load_dataset
9
- from datapipeline.pipeline.context import PipelineContext
10
- from datapipeline.pipeline.pipelines import build_vector_pipeline
11
- from datapipeline.pipeline.split import build_labeler
12
- from datapipeline.runtime import Runtime
13
- from datapipeline.services.constants import PARTIONED_IDS, SCALER_STATISTICS
14
- from datapipeline.services.project_paths import read_project
15
- from datapipeline.utils.paths import ensure_parent
16
- from datapipeline.transforms.feature.scaler import StandardScaler
17
-
18
-
19
- def _resolve_relative(project_yaml: Path, value: str) -> Path:
20
- path = Path(value)
21
- return path if path.is_absolute() else (project_yaml.parent / path)
22
-
23
-
24
- def _normalized_label(path: Path, base_dir: Path) -> str:
25
- try:
26
- return str(path.resolve().relative_to(base_dir))
27
- except ValueError:
28
- return str(path.resolve())
29
-
30
-
31
- def _hash_file(hasher, path: Path, base_dir: Path) -> None:
32
- hasher.update(_normalized_label(path, base_dir).encode("utf-8"))
33
- hasher.update(b"\0")
34
- hasher.update(path.read_bytes())
35
- hasher.update(b"\0")
36
-
37
-
38
- def _yaml_files(directory: Path) -> Iterable[Path]:
39
- if not directory.exists():
40
- return []
41
- return sorted(p for p in directory.rglob("*.y*ml") if p.is_file())
42
-
43
-
44
- def compute_config_hash(project_yaml: Path, build_config_path: Path) -> str:
45
- """Compute a deterministic hash across relevant config inputs."""
46
-
47
- hasher = hashlib.sha256()
48
- base_dir = project_yaml.parent.resolve()
49
- cfg = read_project(project_yaml)
50
-
51
- required: Sequence[Path] = [
52
- project_yaml.resolve(),
53
- build_config_path.resolve(),
54
- _resolve_relative(project_yaml, cfg.paths.dataset).resolve(),
55
- _resolve_relative(project_yaml, cfg.paths.postprocess).resolve(),
56
- ]
57
-
58
- for path in required:
59
- if not path.exists():
60
- raise FileNotFoundError(f"Expected config file missing: {path}")
61
- _hash_file(hasher, path, base_dir)
62
-
63
- for dir_value in (cfg.paths.sources, cfg.paths.streams):
64
- directory = _resolve_relative(project_yaml, dir_value)
65
- hasher.update(
66
- f"[dir]{_normalized_label(directory, base_dir)}".encode("utf-8"))
67
- if not directory.exists():
68
- hasher.update(b"[missing]")
69
- continue
70
- for path in _yaml_files(directory):
71
- _hash_file(hasher, path, base_dir)
72
-
73
- return hasher.hexdigest()
74
-
75
-
76
- def _collect_partitioned_ids(runtime: Runtime, include_targets: bool) -> Sequence[str]:
77
- dataset = load_dataset(runtime.project_yaml, "vectors")
78
- feature_cfgs = list(dataset.features or [])
79
- if include_targets:
80
- feature_cfgs += list(dataset.targets or [])
81
-
82
- sanitized = [cfg.model_copy(update={"scale": False})
83
- for cfg in feature_cfgs]
84
-
85
- ids: set[str] = set()
86
- context = PipelineContext(runtime)
87
- vectors = build_vector_pipeline(
88
- context, sanitized, dataset.group_by, stage=None)
89
- for _, vector in vectors:
90
- ids.update(vector.values.keys())
91
- return sorted(ids)
92
-
93
-
94
- def materialize_partitioned_ids(runtime: Runtime, config: BuildConfig) -> Tuple[str, int]:
95
- """Write the partitioned-id list and return (relative_path, count)."""
96
-
97
- task_cfg = config.partitioned_ids
98
- ids = _collect_partitioned_ids(
99
- runtime, include_targets=task_cfg.include_targets)
100
-
101
- relative_path = Path(task_cfg.output)
102
- destination = (runtime.artifacts_root / relative_path).resolve()
103
- ensure_parent(destination)
104
-
105
- with destination.open("w", encoding="utf-8") as fh:
106
- for fid in ids:
107
- fh.write(f"{fid}\n")
108
-
109
- return str(relative_path), len(ids)
110
-
111
-
112
- def materialize_scaler_statistics(runtime: Runtime, config: BuildConfig) -> Tuple[str, Dict[str, object]] | None:
113
- task_cfg = config.scaler
114
- if not task_cfg.enabled:
115
- return None
116
-
117
- dataset = load_dataset(runtime.project_yaml, "vectors")
118
- feature_cfgs = list(dataset.features)
119
- if not feature_cfgs and not task_cfg.include_targets:
120
- return None
121
-
122
- if task_cfg.include_targets:
123
- feature_cfgs += list(dataset.targets or [])
124
-
125
- sanitized_cfgs = [cfg.model_copy(
126
- update={"scale": False}) for cfg in feature_cfgs]
127
-
128
- context = PipelineContext(runtime)
129
- vectors = build_vector_pipeline(
130
- context, sanitized_cfgs, dataset.group_by, stage=None)
131
-
132
- cfg = getattr(runtime, "split", None)
133
- labeler = build_labeler(cfg) if cfg else None
134
- if not labeler and task_cfg.split_label != "all":
135
- raise RuntimeError(
136
- f"Cannot compute scaler statistics for split '{task_cfg.split_label}' "
137
- "when no split configuration is defined in the project."
138
- )
139
-
140
- def _train_stream() -> Iterator[tuple[object, object]]:
141
- for group_key, vector in vectors:
142
- if labeler and labeler.label(group_key, vector) != task_cfg.split_label:
143
- continue
144
- yield group_key, vector
145
-
146
- scaler = StandardScaler()
147
- total_observations = scaler.fit(_train_stream())
148
-
149
- if not scaler.statistics:
150
- raise RuntimeError(
151
- f"No scaler statistics computed for split '{task_cfg.split_label}'."
152
- )
153
-
154
- relative_path = Path(task_cfg.output)
155
- destination = (runtime.artifacts_root / relative_path).resolve()
156
- ensure_parent(destination)
157
-
158
- scaler.save(destination)
159
-
160
- meta: Dict[str, object] = {
161
- "features": len(scaler.statistics),
162
- "split": task_cfg.split_label,
163
- "observations": total_observations,
164
- }
165
-
166
- return str(relative_path), meta
167
-
168
-
169
- def execute_build(runtime: Runtime, config: BuildConfig) -> Dict[str, Dict[str, object]]:
170
- """Materialize artifacts described by build.yaml."""
171
- artifacts: Dict[str, Dict[str, object]] = {}
172
-
173
- rel_path, count = materialize_partitioned_ids(runtime, config)
174
- artifacts[PARTIONED_IDS] = {
175
- "relative_path": rel_path,
176
- "count": count,
177
- }
178
-
179
- scaler_result = materialize_scaler_statistics(runtime, config)
180
- if scaler_result:
181
- rel_path, meta = scaler_result
182
- scaler_meta = {"relative_path": rel_path}
183
- scaler_meta.update(meta)
184
- artifacts[SCALER_STATISTICS] = scaler_meta
185
-
186
- return artifacts
@@ -1,128 +0,0 @@
1
- import sys
2
- from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
3
- from datapipeline.services.entrypoints import read_group_entries
4
- from datapipeline.services.constants import FILTERS_GROUP
5
- from datapipeline.services.project_paths import (
6
- sources_dir as resolve_sources_dir,
7
- streams_dir as resolve_streams_dir,
8
- ensure_project_scaffold,
9
- )
10
- from datapipeline.services.scaffold.mappers import attach_source_to_domain
11
- import re
12
-
13
-
14
- def _pick_from_list(prompt: str, options: list[str]) -> str:
15
- print(prompt, file=sys.stderr)
16
- for i, opt in enumerate(options, 1):
17
- print(f" [{i}] {opt}", file=sys.stderr)
18
- while True:
19
- sel = input("> ").strip()
20
- if sel.isdigit():
21
- idx = int(sel)
22
- if 1 <= idx <= len(options):
23
- return options[idx - 1]
24
- print("Please enter a number from the list.", file=sys.stderr)
25
-
26
-
27
- def handle() -> None:
28
- root_dir, name, pyproject = pkg_root(None)
29
-
30
- # Discover sources by scanning sources_dir YAMLs
31
- # Default to dataset-scoped project config
32
- proj_path = root_dir / "config" / "datasets" / "default" / "project.yaml"
33
- # Ensure a minimal project scaffold so we can resolve dirs interactively
34
- ensure_project_scaffold(proj_path)
35
- sources_dir = resolve_sources_dir(proj_path)
36
- source_options = []
37
- if sources_dir.exists():
38
- source_options = sorted(p.stem for p in sources_dir.glob("*.y*ml"))
39
- if not source_options:
40
- print("[error] No sources found. Create one first (jerry source add ...)")
41
- raise SystemExit(2)
42
-
43
- src_key = _pick_from_list("Select a source to link:", source_options)
44
- # Expect aliases from sources_dir filenames: provider_dataset.yaml
45
- parts = src_key.split("_", 1)
46
- if len(parts) != 2:
47
- print("[error] Source alias must be 'provider_dataset' (from sources/<alias>.yaml)", file=sys.stderr)
48
- raise SystemExit(2)
49
- provider, dataset = parts[0], parts[1]
50
-
51
- # Discover domains by scanning the package, fallback to EPs if needed
52
- base = resolve_base_pkg_dir(root_dir, name)
53
- domain_options = []
54
- for dirname in ("domains",):
55
- dom_dir = base / dirname
56
- if dom_dir.exists():
57
- domain_options.extend(
58
- [p.name for p in dom_dir.iterdir() if p.is_dir()
59
- and (p / "model.py").exists()]
60
- )
61
- domain_options = sorted(set(domain_options))
62
- if not domain_options:
63
- domain_options = sorted(
64
- read_group_entries(pyproject, FILTERS_GROUP).keys())
65
- if not domain_options:
66
- print("[error] No domains found. Create one first (jerry domain add ...)")
67
- raise SystemExit(2)
68
-
69
- dom_name = _pick_from_list("Select a domain to link to:", domain_options)
70
-
71
- # create mapper + EP (domain.origin)
72
- attach_source_to_domain(
73
- domain=dom_name,
74
- provider=provider,
75
- dataset=dataset,
76
- root=None,
77
- )
78
-
79
- def _slug(s: str) -> str:
80
- s = s.strip().lower()
81
- s = re.sub(r"[^a-z0-9]+", "_", s)
82
- return s.strip("_")
83
- ep_key = f"{_slug(dom_name)}.{_slug(provider)}"
84
- print(f"[ok] Registered mapper entry point as '{ep_key}'.")
85
-
86
- # Inject per-file canonical stream into streams directory
87
- streams_path = resolve_streams_dir(proj_path)
88
-
89
- canonical_alias = ep_key
90
- mapper_ep = ep_key
91
- # Write a single-file canonical spec into streams directory, matching
92
- # ContractConfig schema with helpful commented placeholders per stage.
93
- try:
94
- # Ensure streams_path is a directory path
95
- streams_dir = streams_path if streams_path.is_dir() else streams_path.parent
96
- streams_dir.mkdir(parents=True, exist_ok=True)
97
- cfile = streams_dir / f"{canonical_alias}.yaml"
98
- # Build a richer scaffold as YAML text to preserve comments
99
- scaffold = f"""
100
- source_id: {src_key}
101
- stream_id: {canonical_alias}
102
-
103
- mapper:
104
- entrypoint: {mapper_ep}
105
- args: {{}}
106
-
107
- # partition_by: <field or [fields]>
108
- # sort_batch_size: 100000 # in-memory sort chunk size
109
-
110
- record: # record-level transforms
111
- - filter: {{ operator: ge, field: time, comparand: "${{start_time}}" }}
112
- - filter: {{ operator: le, field: time, comparand: "${{end_time}}" }}
113
- # - floor_time: {{ resolution: 10m }}
114
- # - lag: {{ lag: 10m }}
115
-
116
- # stream: # per-feature transforms (input sorted by id,time)
117
- # - ensure_ticks: {{ tick: 10m }}
118
- # - granularity: {{ mode: first }}
119
- # - fill: {{ statistic: median, window: 6, min_samples: 1 }}
120
-
121
- # debug: # optional validation-only checks
122
- # - lint: {{ mode: warn, tick: 10m }}
123
- """
124
- with cfile.open("w", encoding="utf-8") as f:
125
- f.write(scaffold)
126
- print(f"[new] Created canonical spec: {cfile}")
127
- except Exception as e:
128
- print(f"[error] Failed to write canonical spec: {e}", file=sys.stderr)