jerry-thomas 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipeline/analysis/vector/collector.py +120 -17
- datapipeline/analysis/vector/matrix.py +33 -8
- datapipeline/analysis/vector/report.py +162 -32
- datapipeline/build/tasks/__init__.py +11 -0
- datapipeline/build/tasks/config.py +74 -0
- datapipeline/build/tasks/metadata.py +170 -0
- datapipeline/build/tasks/scaler.py +73 -0
- datapipeline/build/tasks/schema.py +60 -0
- datapipeline/build/tasks/utils.py +169 -0
- datapipeline/cli/app.py +304 -127
- datapipeline/cli/commands/build.py +240 -16
- datapipeline/cli/commands/contract.py +367 -0
- datapipeline/cli/commands/domain.py +8 -3
- datapipeline/cli/commands/inspect.py +401 -149
- datapipeline/cli/commands/list_.py +30 -7
- datapipeline/cli/commands/plugin.py +1 -1
- datapipeline/cli/commands/run.py +227 -241
- datapipeline/cli/commands/run_config.py +101 -0
- datapipeline/cli/commands/serve_pipeline.py +156 -0
- datapipeline/cli/commands/source.py +44 -8
- datapipeline/cli/visuals/__init__.py +4 -2
- datapipeline/cli/visuals/common.py +239 -0
- datapipeline/cli/visuals/labels.py +15 -15
- datapipeline/cli/visuals/runner.py +66 -0
- datapipeline/cli/visuals/sections.py +20 -0
- datapipeline/cli/visuals/sources.py +132 -119
- datapipeline/cli/visuals/sources_basic.py +260 -0
- datapipeline/cli/visuals/sources_off.py +76 -0
- datapipeline/cli/visuals/sources_rich.py +414 -0
- datapipeline/config/catalog.py +37 -3
- datapipeline/config/context.py +214 -0
- datapipeline/config/dataset/loader.py +21 -4
- datapipeline/config/dataset/normalize.py +4 -4
- datapipeline/config/metadata.py +43 -0
- datapipeline/config/postprocess.py +2 -2
- datapipeline/config/project.py +3 -2
- datapipeline/config/resolution.py +129 -0
- datapipeline/config/tasks.py +309 -0
- datapipeline/config/workspace.py +155 -0
- datapipeline/domain/__init__.py +12 -0
- datapipeline/domain/record.py +11 -0
- datapipeline/domain/sample.py +54 -0
- datapipeline/integrations/ml/adapter.py +34 -20
- datapipeline/integrations/ml/pandas_support.py +0 -2
- datapipeline/integrations/ml/rows.py +1 -6
- datapipeline/integrations/ml/torch_support.py +1 -3
- datapipeline/io/factory.py +112 -0
- datapipeline/io/output.py +132 -0
- datapipeline/io/protocols.py +21 -0
- datapipeline/io/serializers.py +219 -0
- datapipeline/io/sinks/__init__.py +23 -0
- datapipeline/io/sinks/base.py +2 -0
- datapipeline/io/sinks/files.py +79 -0
- datapipeline/io/sinks/rich.py +57 -0
- datapipeline/io/sinks/stdout.py +18 -0
- datapipeline/io/writers/__init__.py +14 -0
- datapipeline/io/writers/base.py +28 -0
- datapipeline/io/writers/csv_writer.py +25 -0
- datapipeline/io/writers/jsonl.py +52 -0
- datapipeline/io/writers/pickle_writer.py +30 -0
- datapipeline/pipeline/artifacts.py +58 -0
- datapipeline/pipeline/context.py +66 -7
- datapipeline/pipeline/observability.py +65 -0
- datapipeline/pipeline/pipelines.py +65 -13
- datapipeline/pipeline/split.py +11 -10
- datapipeline/pipeline/stages.py +127 -16
- datapipeline/pipeline/utils/keygen.py +20 -7
- datapipeline/pipeline/utils/memory_sort.py +22 -10
- datapipeline/pipeline/utils/transform_utils.py +22 -0
- datapipeline/runtime.py +5 -2
- datapipeline/services/artifacts.py +12 -6
- datapipeline/services/bootstrap/config.py +25 -0
- datapipeline/services/bootstrap/core.py +52 -37
- datapipeline/services/constants.py +6 -5
- datapipeline/services/factories.py +123 -1
- datapipeline/services/project_paths.py +43 -16
- datapipeline/services/runs.py +208 -0
- datapipeline/services/scaffold/domain.py +3 -2
- datapipeline/services/scaffold/filter.py +3 -2
- datapipeline/services/scaffold/mappers.py +9 -6
- datapipeline/services/scaffold/plugin.py +3 -3
- datapipeline/services/scaffold/source.py +93 -56
- datapipeline/sources/{composed_loader.py → data_loader.py} +9 -9
- datapipeline/sources/decoders.py +83 -18
- datapipeline/sources/factory.py +26 -16
- datapipeline/sources/models/__init__.py +2 -2
- datapipeline/sources/models/generator.py +0 -7
- datapipeline/sources/models/loader.py +3 -3
- datapipeline/sources/models/parsing_error.py +24 -0
- datapipeline/sources/models/source.py +6 -6
- datapipeline/sources/synthetic/time/loader.py +14 -2
- datapipeline/sources/transports.py +74 -37
- datapipeline/templates/plugin_skeleton/README.md +74 -30
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +31 -0
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +30 -0
- datapipeline/templates/plugin_skeleton/example/dataset.yaml +18 -0
- datapipeline/templates/plugin_skeleton/example/postprocess.yaml +29 -0
- datapipeline/templates/plugin_skeleton/{config/datasets/default → example}/project.yaml +11 -8
- datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml +12 -0
- datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +3 -0
- datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +9 -0
- datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +2 -0
- datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +4 -0
- datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +28 -0
- datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +4 -0
- datapipeline/templates/plugin_skeleton/jerry.yaml +28 -0
- datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +31 -0
- datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +30 -0
- datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +18 -0
- datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +29 -0
- datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +22 -0
- datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +12 -0
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +3 -0
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +9 -0
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +2 -0
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +4 -0
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +28 -0
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +4 -0
- datapipeline/templates/stubs/dto.py.j2 +2 -0
- datapipeline/templates/stubs/mapper.py.j2 +5 -4
- datapipeline/templates/stubs/parser.py.j2 +2 -0
- datapipeline/templates/stubs/record.py.j2 +2 -0
- datapipeline/templates/stubs/source.yaml.j2 +2 -3
- datapipeline/transforms/debug/lint.py +26 -41
- datapipeline/transforms/feature/scaler.py +89 -13
- datapipeline/transforms/record/floor_time.py +4 -4
- datapipeline/transforms/sequence.py +2 -35
- datapipeline/transforms/stream/dedupe.py +24 -0
- datapipeline/transforms/stream/ensure_ticks.py +7 -6
- datapipeline/transforms/vector/__init__.py +5 -0
- datapipeline/transforms/vector/common.py +98 -0
- datapipeline/transforms/vector/drop/__init__.py +4 -0
- datapipeline/transforms/vector/drop/horizontal.py +79 -0
- datapipeline/transforms/vector/drop/orchestrator.py +59 -0
- datapipeline/transforms/vector/drop/vertical.py +182 -0
- datapipeline/transforms/vector/ensure_schema.py +184 -0
- datapipeline/transforms/vector/fill.py +87 -0
- datapipeline/transforms/vector/replace.py +62 -0
- datapipeline/utils/load.py +24 -3
- datapipeline/utils/rich_compat.py +38 -0
- datapipeline/utils/window.py +76 -0
- jerry_thomas-1.0.0.dist-info/METADATA +825 -0
- jerry_thomas-1.0.0.dist-info/RECORD +199 -0
- {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/entry_points.txt +9 -8
- datapipeline/build/tasks.py +0 -186
- datapipeline/cli/commands/link.py +0 -128
- datapipeline/cli/commands/writers.py +0 -138
- datapipeline/config/build.py +0 -64
- datapipeline/config/run.py +0 -116
- datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml +0 -24
- datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml +0 -23
- datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +0 -9
- datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -14
- datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +0 -13
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +0 -10
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +0 -10
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +0 -10
- datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml +0 -11
- datapipeline/transforms/vector.py +0 -210
- jerry_thomas-0.3.0.dist-info/METADATA +0 -502
- jerry_thomas-0.3.0.dist-info/RECORD +0 -139
- {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/WHEEL +0 -0
- {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
datapipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
datapipeline/plugins.py,sha256=Y0QfI313t5_w_m1ayQVEuac3lJ4YR_OSIYZol35ZOTk,838
|
|
3
|
+
datapipeline/runtime.py,sha256=yfSlQaq9OdjVVuqRtWzxLdw1ku4boZoONfCYQIMfe3A,2622
|
|
4
|
+
datapipeline/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
datapipeline/analysis/vector_analyzer.py,sha256=D6eDW0H55QGnWiULXJEirMjw6MeBcwjJ4zfS7M6tx98,175
|
|
6
|
+
datapipeline/analysis/vector/collector.py,sha256=J-a42GiTqjGlhXFfz3LCa2QzeWqRTOoaGE4E_HGKhOs,14762
|
|
7
|
+
datapipeline/analysis/vector/matrix.py,sha256=pzaMmEMD09cOlRbBzuD0Lgc91dtKnCdCkJAXtkcCOPc,18469
|
|
8
|
+
datapipeline/analysis/vector/report.py,sha256=OsSarYX4-CWwdKI3b35qBqrhHxShavFS9CeYKZ7WTRs,16709
|
|
9
|
+
datapipeline/build/__init__.py,sha256=XbuHhJzIRTNnOOJMYa_BHiA0P8yPrEk4tuVXnD2NQbI,109
|
|
10
|
+
datapipeline/build/state.py,sha256=XsL2CtQl7x80OpE1SJE42D-ig3lBMPr7_HSlpq5xwU4,1826
|
|
11
|
+
datapipeline/build/tasks/__init__.py,sha256=-HRDBwLY9eLsKxjDzsC-E10y_ytfhOs9yXVA_Nyt4_o,319
|
|
12
|
+
datapipeline/build/tasks/config.py,sha256=8cuX5nEWFesWoZHWfeTNMlXqsE_dsPgYf6x2eyz1l0c,2312
|
|
13
|
+
datapipeline/build/tasks/metadata.py,sha256=3eHI1vBRwm-fT342gu1wgj7oXNXKZ94D30wkdAVA7kM,5783
|
|
14
|
+
datapipeline/build/tasks/scaler.py,sha256=knJbdeGdDvYZ4O15ra4mnVkmLZbRZbCdJdjfELe_LnU,2554
|
|
15
|
+
datapipeline/build/tasks/schema.py,sha256=kJnAlD_Z8Pd_c9kJH5bDVixB_Vi_mBkKWpjW7eZru1s,2163
|
|
16
|
+
datapipeline/build/tasks/utils.py,sha256=iFMJ8hWk1iRnQ1bUz0huiKlWyevt3x0g8Vh6PNzlMU8,6335
|
|
17
|
+
datapipeline/cli/app.py,sha256=yjaKjEbQbDUAvigU5D8Q2UPbKFRs9l8bHVIG-56Nrsc,24056
|
|
18
|
+
datapipeline/cli/commands/build.py,sha256=OPJ-r3WWAzWwa1wHK0zxnQhuM_1h1mSfNpPRl_Bqrf8,8979
|
|
19
|
+
datapipeline/cli/commands/contract.py,sha256=bPWhWZgdnkk_Ajlm9zUJrvZ3SlyXVqBxx-IJV1zZ5kM,14953
|
|
20
|
+
datapipeline/cli/commands/domain.py,sha256=JdOMlfpZP996kuNGePdjCGMKrqezo-cX8lhlOfd9F44,479
|
|
21
|
+
datapipeline/cli/commands/filter.py,sha256=vhoCIETJNUJmiI37ZdBNaeJAm6O4AU_tveJxVj47S8A,307
|
|
22
|
+
datapipeline/cli/commands/inspect.py,sha256=aatn_olRcFaLyya6r2QMlzzAzlbguEtQ7mKRxoEOFAA,16066
|
|
23
|
+
datapipeline/cli/commands/list_.py,sha256=m9o_exiiC_aiQXsR4lZv_QmN1hfpSNq4ICvYLgiS2e8,1605
|
|
24
|
+
datapipeline/cli/commands/plugin.py,sha256=RER1aBiiITYyvCtWxaqAiEqUGJGzKhw6oWSeajtd5ns,374
|
|
25
|
+
datapipeline/cli/commands/run.py,sha256=TmbyggYOlF972oxwLhh-r27ggeWARg0_WfCMQJAudS8,8348
|
|
26
|
+
datapipeline/cli/commands/run_config.py,sha256=zeXCuDz1ez6Zd6Tq2N0S-YIPs1ZQ8U3fN3lvvd56108,3194
|
|
27
|
+
datapipeline/cli/commands/serve_pipeline.py,sha256=7i1HbuFIbYKkM-aQ2BrDN57K1kFv6NJ4EAN6NOz4aFE,5036
|
|
28
|
+
datapipeline/cli/commands/source.py,sha256=OyDOZm93Lbj6avbAefQPWX87WfRWE1phCPVPB-dNVc4,2073
|
|
29
|
+
datapipeline/cli/visuals/__init__.py,sha256=CUxCoMoU96FQonq6V_i_HBUwuwoWjML5X-_MZDF_i8M,371
|
|
30
|
+
datapipeline/cli/visuals/common.py,sha256=p66-3WBMfl7_3UVIfsrkXnzpE9BsHinpmkHWOZaK00c,8173
|
|
31
|
+
datapipeline/cli/visuals/labels.py,sha256=oK1PpgMoGhlwfyTqiXuaaDm65gDYqv9R-Ac8NqYPhHE,2680
|
|
32
|
+
datapipeline/cli/visuals/runner.py,sha256=GtQcFjmYfVuNMmmp3uoJo0sXJOGeCoN6EOUUB6kzWSU,2085
|
|
33
|
+
datapipeline/cli/visuals/sections.py,sha256=ZK02cjxd5FJAF-IJXqj9loWSrlWwMfYJlbsCfmLBJ5A,614
|
|
34
|
+
datapipeline/cli/visuals/sources.py,sha256=m0nNmRSlSNWTyGj_MF3PS_m9hXKtJv63bP_9-SKn0Xc,5144
|
|
35
|
+
datapipeline/cli/visuals/sources_basic.py,sha256=1-1JJ77AdiQv0iC4qpvb71cICmheInusIJFCc3csWLs,9086
|
|
36
|
+
datapipeline/cli/visuals/sources_off.py,sha256=8nWuwsj-XBsjcfZ9FBpuxC3vhC6mPPObjVsh1EUparM,2651
|
|
37
|
+
datapipeline/cli/visuals/sources_rich.py,sha256=W2ziT0Sb0QKhEbEjPYVUMY0TOrOhey0X3LXjNSwQHLg,16577
|
|
38
|
+
datapipeline/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
+
datapipeline/config/catalog.py,sha256=2eMwiCEof3WWKASfhxLNaZHLHCqXBoUtdJ1LvHUT74Y,2552
|
|
40
|
+
datapipeline/config/context.py,sha256=JzkhFHeSYuyID5L2FPSinA3S0Gx7BNsXXfy_adAjNHI,6857
|
|
41
|
+
datapipeline/config/metadata.py,sha256=Vckaz6P7_MparAR3IOWR5K5S5mqqul2F6wEg3DWMyzw,1309
|
|
42
|
+
datapipeline/config/postprocess.py,sha256=67ukmtGNHFmZd8bv-POWOE_HjE5T5AXQaOypCZG1d-o,361
|
|
43
|
+
datapipeline/config/project.py,sha256=nc2n9g-02pOmuErzHHxJasP4wn04rB7rXqIUieYg_vQ,997
|
|
44
|
+
datapipeline/config/resolution.py,sha256=syS9fLWNl17NrA-GPpAk3pbFtL4I7hQDJdfWaeldqXs,3477
|
|
45
|
+
datapipeline/config/split.py,sha256=VFYRF6Fz5xLTqqxIt3RVGB4kwlnHH8CxjOddEAJYG5Q,1048
|
|
46
|
+
datapipeline/config/tasks.py,sha256=qRHV6vEOWgpfknSplfV7RbCAdcIsAVcfk2NZU92zRsE,9777
|
|
47
|
+
datapipeline/config/workspace.py,sha256=mcZxnEtV2rNkb7TeeE7P_C-LIgQ27e10CawW1w4H7w0,5131
|
|
48
|
+
datapipeline/config/dataset/dataset.py,sha256=Q9cb5QoDtyPb4pbD9mSTZcJmXQhdEWwDLS52xKAcqXg,562
|
|
49
|
+
datapipeline/config/dataset/feature.py,sha256=2Hxz0FXZskLI4ICXhmlG6b1Vvxzh0Ql9e6BwjMRtzSs,346
|
|
50
|
+
datapipeline/config/dataset/loader.py,sha256=Eh_F56o242ptEIsR22kC7HkNi6SggpRf10Gpfc3ipTo,1063
|
|
51
|
+
datapipeline/config/dataset/normalize.py,sha256=5IFGYhRrJ4JMWLLy-qRc6W8p6FZr8T-Fz2FCxp6MHCA,803
|
|
52
|
+
datapipeline/domain/__init__.py,sha256=rfZZpfvozmQNKhBabzgC9g4urMbchjDXbbl54sNtxZQ,262
|
|
53
|
+
datapipeline/domain/feature.py,sha256=7BOI4H458BKU8B9vqdfez7WOO1YKiF6lt0oy7PMbqrQ,295
|
|
54
|
+
datapipeline/domain/record.py,sha256=VY2vxpVJGpn0sztI5mvD0oPdxy5auLJ8huKUR1VCgZA,1062
|
|
55
|
+
datapipeline/domain/sample.py,sha256=yjmxPJmjHwrw9xJR5hxFr1XKelpJEOZI7XrLlqsrzy4,1558
|
|
56
|
+
datapipeline/domain/vector.py,sha256=apK1iu7tca2k2xgNGJAAQfKhirno7ZKZ3pDheKf9euM,1041
|
|
57
|
+
datapipeline/filters/filters.py,sha256=dM6U-QpGCQQ4-CMBTJgWZp2zH2TVTk6uYOqGPC5NBCY,2649
|
|
58
|
+
datapipeline/integrations/__init__.py,sha256=tjTLsIa6NRWKI05wjwPAUuXozDA-gP98SccFJ9lYHs8,410
|
|
59
|
+
datapipeline/integrations/ml/__init__.py,sha256=oflJXnjQEn1Zv0Vho10mc2y3D6UkKusNZwE5yUtatb8,463
|
|
60
|
+
datapipeline/integrations/ml/adapter.py,sha256=X9UGbNev4eN-KhL8KAaSPKLrpkrgJA3c0sTgFRDAMv0,4591
|
|
61
|
+
datapipeline/integrations/ml/pandas_support.py,sha256=HEX-Dx9RG17uCKSiZ7M4gMCoZbMQTa_3xhlC0s6bIYM,1229
|
|
62
|
+
datapipeline/integrations/ml/rows.py,sha256=OhziMyP6uvFFErYYJkaQkaXQ4oX-jAXnIDazLUfhP5A,1995
|
|
63
|
+
datapipeline/integrations/ml/torch_support.py,sha256=RCQaOCaggddtAo67-ThkdX_GgVJyLCHGZs3YyOszF04,2703
|
|
64
|
+
datapipeline/io/factory.py,sha256=xChYRxe1SRxHj8SXNirPEi2J20AOH3968yN92BRykr8,3903
|
|
65
|
+
datapipeline/io/output.py,sha256=tbtE4iJTDNtQcburA6W25eGH0gX-hAoVMLas5slkVAA,4003
|
|
66
|
+
datapipeline/io/protocols.py,sha256=vHjXhuV2r1Lo7k8SJuPH0WL2EXG_nm3DBpSowobUZ2U,512
|
|
67
|
+
datapipeline/io/serializers.py,sha256=5g59YwEL4-FT2r5kSj3UoQCKFkn-va2EMA8z79YzA18,6380
|
|
68
|
+
datapipeline/io/sinks/__init__.py,sha256=7l-LmJAjuNrQZWMDFMXdjbZQ4Pq-iWMaN_3GcUvWntw,517
|
|
69
|
+
datapipeline/io/sinks/base.py,sha256=cXG6VXop0RVL1K4xpSaFq1scylhb6N6dsg6UMrQGw54,49
|
|
70
|
+
datapipeline/io/sinks/files.py,sha256=UgXXj8NxjvdOrwpJt5YNTgG1gW89YYCVpVkSg1eGgKI,1975
|
|
71
|
+
datapipeline/io/sinks/rich.py,sha256=hZNMttsqaMSUsQmCu6kubzkYbUGDTbTYBYnDwcFsEp0,1486
|
|
72
|
+
datapipeline/io/sinks/stdout.py,sha256=64VUdf_YghxTCjVyYcpBQpC_Pt5rPQrYejRg_0_cF7A,382
|
|
73
|
+
datapipeline/io/writers/__init__.py,sha256=V8228IYVxP4ay6yG8HF_ukBDseAERrqlWC4gbGDBmoc,397
|
|
74
|
+
datapipeline/io/writers/base.py,sha256=kUaFv6XOoUjYw5pE7XFUel5ptdEhuY03VTqajortUZY,814
|
|
75
|
+
datapipeline/io/writers/csv_writer.py,sha256=FL2qiS8Hr273lGcN6pQXGOSufcM06ApVZnPmNhuAwjQ,833
|
|
76
|
+
datapipeline/io/writers/jsonl.py,sha256=SP2yPgH4B_Xrr7GJFVVIsTxarNKAFbEB-0RS1F0fD-g,1736
|
|
77
|
+
datapipeline/io/writers/pickle_writer.py,sha256=omXSeGbrcSWwNBodwJNCBok0mW167xciT5S8w_w5xCo,928
|
|
78
|
+
datapipeline/mappers/noop.py,sha256=L8bH1QVbLH-ogIam0ppYdx7KuWQ7Dj44lvD8tvNlY0Q,111
|
|
79
|
+
datapipeline/mappers/synthetic/time.py,sha256=lt1pC0May6Y4E8bZO4sERm3D04_r-qv63Y5fwrtCaBQ,639
|
|
80
|
+
datapipeline/parsers/identity.py,sha256=pdGuz0SSQGfySPpvZSnLgfTXTkC36x-7dQMMei3XhsU,321
|
|
81
|
+
datapipeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
|
+
datapipeline/pipeline/artifacts.py,sha256=mD31N-tlFR3EePVHNaxyA3Diiqab9Kyc0Gh6jCX_z-g,1492
|
|
83
|
+
datapipeline/pipeline/context.py,sha256=-W8QvGm32QGmBziEuzl-BitscuxGPb9bgQYDhRC1tkc,4377
|
|
84
|
+
datapipeline/pipeline/observability.py,sha256=y5LWgY3vjlhA5paslWVkjtMjur8yAGXjFhYNrfuJUNg,2043
|
|
85
|
+
datapipeline/pipeline/pipelines.py,sha256=Ilys2Cyqee5kHQ_gTIwWr4UZNDvyhU_xxUhBj42b5yI,4274
|
|
86
|
+
datapipeline/pipeline/split.py,sha256=TCzOhd8PF81IcUzUdPSz0hs3pIHi9V4IhXbSY2ZHK3Q,6090
|
|
87
|
+
datapipeline/pipeline/stages.py,sha256=yWl7nCJt_kOh9VVLgM6fDFM0Ajgh0GCwtvA-gSDRHTs,9493
|
|
88
|
+
datapipeline/pipeline/utils/keygen.py,sha256=v2JJagJAE9iYfLtbl4uxoAEXZN_ALH0xdHhPDhNfKwU,1909
|
|
89
|
+
datapipeline/pipeline/utils/memory_sort.py,sha256=hS61n2CeIITRqffE1ftvn2IdqQp1IXYhuN4MJqncKvk,1155
|
|
90
|
+
datapipeline/pipeline/utils/ordering.py,sha256=ZX18I7GFtvyMFJB96vWQpTOGwljjeJ6ppCg5a3Av3es,1450
|
|
91
|
+
datapipeline/pipeline/utils/transform_utils.py,sha256=q4bxQ0NFC4G7IeRSSL4ZzQ7vvVkxAnovflhEtfVUXyU,4221
|
|
92
|
+
datapipeline/registries/registry.py,sha256=MWWOHz2wT1oHQmovodtEreEuQhvH-i11Y2yXUUgZJhQ,641
|
|
93
|
+
datapipeline/services/artifacts.py,sha256=5mqNs5G53RqOYlMGvF0-_ZZA1M8mMMUXip1HuFhckjI,2930
|
|
94
|
+
datapipeline/services/constants.py,sha256=OVUqBBDkpl-A_f71uT8QUwd_50fgN1pA6uL4Yv8ZpUE,517
|
|
95
|
+
datapipeline/services/entrypoints.py,sha256=NKcSbhGRtBLQXGf-TdujwbVSRH1zb5J-S2jxFPnk6HQ,2504
|
|
96
|
+
datapipeline/services/factories.py,sha256=4Udq2LBRHNJmBTZiXrbqmV6PVbesg2c5Nndh6CpYDnE,6011
|
|
97
|
+
datapipeline/services/paths.py,sha256=xHxos62Y2gjhLggrnrmRqPiLMseK10OX17NJjnVk8wE,966
|
|
98
|
+
datapipeline/services/project_paths.py,sha256=qWM5WN0aKB4KwkwXgZQywBFPu1Cfh9mUiAQZghRwNOs,4264
|
|
99
|
+
datapipeline/services/runs.py,sha256=_xcrgZXb3sFfRN1ohvTLicQHq7_33g62SCt_JXCOzqE,6185
|
|
100
|
+
datapipeline/services/bootstrap/__init__.py,sha256=Mc2w2S69kU1hnzCvsGMhFqyNoNMXPwQtxprAkGN-sYE,245
|
|
101
|
+
datapipeline/services/bootstrap/config.py,sha256=122JNE7gZF1mohAI1gvX8H6i0JTql_Mm9bWcTpoD77c,4936
|
|
102
|
+
datapipeline/services/bootstrap/core.py,sha256=7iWf05GRdIOvgEHKOoq5NUFAMNhkd_kTe3zunkJqOHw,7394
|
|
103
|
+
datapipeline/services/scaffold/__init__.py,sha256=PaQNtYki9Kc7mQPnUtKDPU-rKohLHoXLvFVwdHdbXNM,68
|
|
104
|
+
datapipeline/services/scaffold/domain.py,sha256=mww7HhZ1ZepNvn2tHczpLZH0y3Ej7vgDGVLepFkTgIY,946
|
|
105
|
+
datapipeline/services/scaffold/filter.py,sha256=EwLFeI3cRoHw-hYE3jlLfqV0DKk9Z8EnWyVymJmOppA,1084
|
|
106
|
+
datapipeline/services/scaffold/mappers.py,sha256=kkkJ-UB51B2yawRoUst3CGExn3gRYPm5d_3kbujPVMQ,1960
|
|
107
|
+
datapipeline/services/scaffold/plugin.py,sha256=bocH4Z1k3ReqnfLSzdXLiQP4SG9R97kZ5hj14_7GYFM,1699
|
|
108
|
+
datapipeline/services/scaffold/source.py,sha256=w0w-oID9x48eMV6c0FhqW1myAWh_ELYmFojsdfVlaTk,6649
|
|
109
|
+
datapipeline/services/scaffold/templates.py,sha256=B3YnZpFUZLynijJosTNxZQLXnPP_Y_t1RHqfI1lGOxU,634
|
|
110
|
+
datapipeline/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
|
+
datapipeline/sources/data_loader.py,sha256=q-gAQDOkMwIvY5b-T0j6P_Bj716R5I7jIoyHOleTB2s,1358
|
|
112
|
+
datapipeline/sources/decoders.py,sha256=yH4uVDg0Hh6sUpk04W3u1dtJi_Xv-vPhvlMEIGs7-zs,3984
|
|
113
|
+
datapipeline/sources/factory.py,sha256=UkcrycyodBBGt7Q5EO8EirH4obnOQH8SqrFedjMEIR0,2410
|
|
114
|
+
datapipeline/sources/transports.py,sha256=o32uvCRWps-voresZ2gGwhXTiRpmDIWdyM_IE2zY0H8,3308
|
|
115
|
+
datapipeline/sources/models/__init__.py,sha256=_DVhnet2HMvw-H-UEFQeEXCwro6Qg1ws0iBgMSKbBbM,399
|
|
116
|
+
datapipeline/sources/models/base.py,sha256=MAUawd11fII-mxxuSPM4f6H1t1tbyZX_QWhoAgeYUcU,238
|
|
117
|
+
datapipeline/sources/models/generator.py,sha256=OTJEcbpRp6pPZyG_8sds2x-15LF-SvAR5yblivG1E2g,508
|
|
118
|
+
datapipeline/sources/models/loader.py,sha256=VMWfEzrBvKdtRPjixPbttTochO3IULdglJ01769310E,1028
|
|
119
|
+
datapipeline/sources/models/parser.py,sha256=Ts31aksHLDCw5ovF2D99w9g_j-NnEiZ8x0JHtUxmmXs,226
|
|
120
|
+
datapipeline/sources/models/parsing_error.py,sha256=41pmauyqNK75Hke-rauRRNc-UveNXt8czxCViyZidvs,734
|
|
121
|
+
datapipeline/sources/models/source.py,sha256=lcAcbwM-HrMVO3uEWTpbvqY42g74JZWKD-KJ89Lsjys,913
|
|
122
|
+
datapipeline/sources/models/synthetic.py,sha256=FLF2Jvdc06VCriTCliThuQTUXd6NrXIQpksIL8gBIH8,288
|
|
123
|
+
datapipeline/sources/synthetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
|
+
datapipeline/sources/synthetic/time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
+
datapipeline/sources/synthetic/time/loader.py,sha256=X_NQJFAHL8wHV5TxbLhRwqGfFJPOw6qVToBkKFD3r_k,2003
|
|
126
|
+
datapipeline/sources/synthetic/time/parser.py,sha256=d3GZMQ7L1Qi4LeEm7U3y0_pk0RdhskioQukYyqyoqic,343
|
|
127
|
+
datapipeline/templates/plugin_skeleton/README.md,sha256=lRaDeFPK-r1qPUYmPkTztfHptVfbxlKmCOgKaAJJaHc,7490
|
|
128
|
+
datapipeline/templates/plugin_skeleton/jerry.yaml,sha256=ubM5REHH5gEqbCBuUIBXRsSx6iuJjQ96yzn7RQBDMpk,705
|
|
129
|
+
datapipeline/templates/plugin_skeleton/pyproject.toml,sha256=0lmO5Aia9tB81Ez4SxP56DGisekx-palMmGCUzmAl4E,259
|
|
130
|
+
datapipeline/templates/plugin_skeleton/example/dataset.yaml,sha256=cSKk8IyoJebdc9b959Sw7gDfBXl2BT8hktyZ4Z43Nog,471
|
|
131
|
+
datapipeline/templates/plugin_skeleton/example/postprocess.yaml,sha256=yUYr5c6YtBeF_rm_ENsOMkn_sOAChzbqhL98WKr0CRw,710
|
|
132
|
+
datapipeline/templates/plugin_skeleton/example/project.yaml,sha256=4WyrlBCZZ47ceOw9nV8QgqU7_jH5Fu8-z4SJGKbPYK8,845
|
|
133
|
+
datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml,sha256=oHesyqPHQ6KYJeVNxZlB75Pw0BTezr2U9IhsdM6YQ7E,842
|
|
134
|
+
datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml,sha256=jtxL89bgdi84iUp4lcMUCZZGcxXsSmTaFOyfe5rxX-M,1579
|
|
135
|
+
datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml,sha256=bouMA0PdRETU67wkh8HTs7vzr0UkKcVXDjAoVChdHAc,210
|
|
136
|
+
datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml,sha256=IMeokmMOFwY1jrXauoFtTFV6gtdos9xFob7nCrqfkPA,104
|
|
137
|
+
datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml,sha256=xNl8JmJ4ogHtfq1jpNqMvH4GYWgxFJ0vMqB6XI3aM-g,237
|
|
138
|
+
datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml,sha256=Y5X3lOM-0FwKNjFLTeRtts6FZjSj2mLpsO4CS2GBafs,14
|
|
139
|
+
datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml,sha256=YXGYz3szsA92Qejdm3KdKYac3aFPPlnfnhipufGYzsA,35
|
|
140
|
+
datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml,sha256=9LFcjdkGYtz2WifCB-6avrvB-5TZUGBCJFQQbuFoQPk,910
|
|
141
|
+
datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml,sha256=ecV69-l6qQS2jIagh1SuehxfLdIBeR49uekhC6DB6EM,33
|
|
142
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
|
+
datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml,sha256=cSKk8IyoJebdc9b959Sw7gDfBXl2BT8hktyZ4Z43Nog,471
|
|
144
|
+
datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml,sha256=yUYr5c6YtBeF_rm_ENsOMkn_sOAChzbqhL98WKr0CRw,710
|
|
145
|
+
datapipeline/templates/plugin_skeleton/your-dataset/project.yaml,sha256=u9LtgLt6OdLsPd7r4bmrujg87WX8NzQBNIvI3gs3QgQ,827
|
|
146
|
+
datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml,sha256=oHesyqPHQ6KYJeVNxZlB75Pw0BTezr2U9IhsdM6YQ7E,842
|
|
147
|
+
datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml,sha256=jtxL89bgdi84iUp4lcMUCZZGcxXsSmTaFOyfe5rxX-M,1579
|
|
148
|
+
datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml,sha256=bouMA0PdRETU67wkh8HTs7vzr0UkKcVXDjAoVChdHAc,210
|
|
149
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml,sha256=IMeokmMOFwY1jrXauoFtTFV6gtdos9xFob7nCrqfkPA,104
|
|
150
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml,sha256=xNl8JmJ4ogHtfq1jpNqMvH4GYWgxFJ0vMqB6XI3aM-g,237
|
|
151
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml,sha256=Y5X3lOM-0FwKNjFLTeRtts6FZjSj2mLpsO4CS2GBafs,14
|
|
152
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml,sha256=YXGYz3szsA92Qejdm3KdKYac3aFPPlnfnhipufGYzsA,35
|
|
153
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml,sha256=9LFcjdkGYtz2WifCB-6avrvB-5TZUGBCJFQQbuFoQPk,910
|
|
154
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml,sha256=ecV69-l6qQS2jIagh1SuehxfLdIBeR49uekhC6DB6EM,33
|
|
155
|
+
datapipeline/templates/stubs/dto.py.j2,sha256=MizqUzY4eGXiIHzGBovXoPHqhVno791Bi6PCGigVqww,908
|
|
156
|
+
datapipeline/templates/stubs/filter.py.j2,sha256=3LgRgAL_HRaENOOqQx8NdeM1AUy-T0rtHVTA7N2oWOs,466
|
|
157
|
+
datapipeline/templates/stubs/loader_synthetic.py.j2,sha256=9SQBeTBGlZmKs6nSYBKd8nbOPcFHgDx17Mh8xOEQnvs,1285
|
|
158
|
+
datapipeline/templates/stubs/mapper.py.j2,sha256=eonMmBgql-XFnxcQ5mRONyPCJShhQAp1jqYSF_1Pcvo,783
|
|
159
|
+
datapipeline/templates/stubs/parser.py.j2,sha256=Ie6ykkT4YTNlRTlbagleHnFukwewHRTq7C7Tbg_P_9Y,674
|
|
160
|
+
datapipeline/templates/stubs/parser_custom.py.j2,sha256=0Nytq43JdTZoyRj-4Mz6HWdMTmOP3VlFuYOB_A_13Vg,580
|
|
161
|
+
datapipeline/templates/stubs/record.py.j2,sha256=xiDMMbYmoReBy0KXRoFcd9FuUoLi9kYzlMFmFjdE4WE,662
|
|
162
|
+
datapipeline/templates/stubs/source.yaml.j2,sha256=pKxqYuJsD5TkVHjT4UrwWQ2RFc0JoL0w3YnZqZgf5J0,410
|
|
163
|
+
datapipeline/transforms/filter.py,sha256=Jt8wTEIqWqe34s7GVVekcR8OdRozs317sj7Uw08GNOA,1433
|
|
164
|
+
datapipeline/transforms/sequence.py,sha256=tZiqFB_aZdVji2uEaFkUyah8k4AYX9IxPMoLBbOCfYg,1579
|
|
165
|
+
datapipeline/transforms/utils.py,sha256=ts6dULY2Pc5fFs7AMd3goN4hDzQkv-6CDLdRH41lG9I,721
|
|
166
|
+
datapipeline/transforms/vector_utils.py,sha256=PcStTwRaaunONKZJuwv79bjdfaDcamLcwNLRHjZ5yXw,927
|
|
167
|
+
datapipeline/transforms/debug/identity.py,sha256=6bwnEYhMBYw0YPrMccrZPXDOQM4r_-odsKo8Hhpbz10,2515
|
|
168
|
+
datapipeline/transforms/debug/lint.py,sha256=v7aLig0y7K_Wqc4W37ZVfUmyLXOV8gcgDMBHm-QzUo0,3124
|
|
169
|
+
datapipeline/transforms/feature/model.py,sha256=gB-GP80_P7bzEKJFSM4leRke75yiD4-S5eJ1p8g3JU8,382
|
|
170
|
+
datapipeline/transforms/feature/scaler.py,sha256=-NRQCz_BUG5X7sg4adMXxZ-1AOAhdmdHET1uIkBPWR0,8305
|
|
171
|
+
datapipeline/transforms/record/floor_time.py,sha256=Nk_srdwNMuxqRCguxjvFKB7rfzMu1SB1pDYVh4cdV4Q,617
|
|
172
|
+
datapipeline/transforms/record/lag.py,sha256=5wrPyVNFvidvdQddnK6ZeUOI5I8rfXEbzIg6tzKiJu4,536
|
|
173
|
+
datapipeline/transforms/stream/dedupe.py,sha256=VyKI8hMcekBntjD3WjIBykMiPs8RNkxQSpd9SCwxihA,787
|
|
174
|
+
datapipeline/transforms/stream/ensure_ticks.py,sha256=hxnG3yHabt4HeOYjWyhMrIBxG1ZbG1uj8vEM4WtGEfA,1185
|
|
175
|
+
datapipeline/transforms/stream/fill.py,sha256=N_ybLUCvaMVvKsFP8-HcGuKqV9hXAnYmV7zyUB-Ugys,3500
|
|
176
|
+
datapipeline/transforms/stream/granularity.py,sha256=PzHDGDwyn8P07BCbcFZaorS_7lbAbEdMLqD9Wy61y0M,3376
|
|
177
|
+
datapipeline/transforms/vector/__init__.py,sha256=nKBaksXv_rBF2BUT-IFJTbbRBiwkpq6k39cLFS8CfXw,245
|
|
178
|
+
datapipeline/transforms/vector/common.py,sha256=24iX5EviN9BKi9-GJIty57LGOvEVD9PueZflyoGS5Nc,3616
|
|
179
|
+
datapipeline/transforms/vector/ensure_schema.py,sha256=AByCuHhnC7T4CWgk5oUC59-oom_LzC0aER1WBASEkDs,7240
|
|
180
|
+
datapipeline/transforms/vector/fill.py,sha256=1bWXbVABnyndv4O01cZN0oVS2-X_HVAEGUmwywazs_w,3065
|
|
181
|
+
datapipeline/transforms/vector/replace.py,sha256=gYTZx0CTkdSsuXUiAvP7dtcyEHEQv2UJeIEiowGDzhw,2019
|
|
182
|
+
datapipeline/transforms/vector/drop/__init__.py,sha256=NsGv9v7n13uPsRWGTSLKWPVaXocQ-zx9WE3Ez5hQt3U,151
|
|
183
|
+
datapipeline/transforms/vector/drop/horizontal.py,sha256=c6bsOaKrppEfBQoNGOaWLbKUHWPlLCo9jscBDYMvEp8,2567
|
|
184
|
+
datapipeline/transforms/vector/drop/orchestrator.py,sha256=smd3z9Oz5iEzkahButE5UsK1nPrSUw6B5CPxtfj2K8g,1986
|
|
185
|
+
datapipeline/transforms/vector/drop/vertical.py,sha256=5VBtilX48Pw6Fc1e0UGk6IwIUoPbkm98t2N7EPql1gA,6858
|
|
186
|
+
datapipeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
|
+
datapipeline/utils/load.py,sha256=FfW_UOD5NhxDg4DarRSzVbjGbv7An_bKK-IdUUOrFhs,1970
|
|
188
|
+
datapipeline/utils/paths.py,sha256=5Y5rhNbjTiybUHfq9VfRMJ4gUfN9UltonM-4MABEG8w,798
|
|
189
|
+
datapipeline/utils/pickle_model.py,sha256=Uyd4AajInyTUpWfSJDDEGLinXeQkHjQUNnyla0owtA4,854
|
|
190
|
+
datapipeline/utils/placeholders.py,sha256=epZQ7NifUWI7_7hZKGEkCBDOaMnN9LiqJdI2gvBAEgE,890
|
|
191
|
+
datapipeline/utils/rich_compat.py,sha256=4ZfR82gG1vAVUiILVINqcRReqoUiRPmQOlLLBXz-pC0,1166
|
|
192
|
+
datapipeline/utils/time.py,sha256=vOqa2arqwEqbDo-JWEhOFPMnI1E4Ib3i1L-Rt-cGH8c,1072
|
|
193
|
+
datapipeline/utils/window.py,sha256=J5CkEIdY5iZd1QY9wawmHpBXpCp2FzHOHXhjYTCZWl8,2576
|
|
194
|
+
jerry_thomas-1.0.0.dist-info/licenses/LICENSE,sha256=pkBMylAJF5yChHAkdxwFhEptLGx13i-XFEKh-Sh6DkM,1073
|
|
195
|
+
jerry_thomas-1.0.0.dist-info/METADATA,sha256=lcnvJMQUbZ_cECGZhmkZz0zOvGZlVOtym3A1X_SaVNw,33507
|
|
196
|
+
jerry_thomas-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
197
|
+
jerry_thomas-1.0.0.dist-info/entry_points.txt,sha256=jsJFp_2aEEhKkL2I3Yc4yPSODy9ggDZmLeV75KPjb9A,1672
|
|
198
|
+
jerry_thomas-1.0.0.dist-info/top_level.txt,sha256=N8aoNPdPyHefODO4YAm7tqTaUcw0e8LDcqycFTf8TbM,13
|
|
199
|
+
jerry_thomas-1.0.0.dist-info/RECORD,,
|
|
@@ -2,16 +2,17 @@
|
|
|
2
2
|
jerry = datapipeline.cli.app:main
|
|
3
3
|
|
|
4
4
|
[datapipeline.loaders]
|
|
5
|
-
|
|
6
|
-
synthetic.
|
|
5
|
+
core.io = datapipeline.sources.factory:build_loader
|
|
6
|
+
core.synthetic.ticks = datapipeline.sources.synthetic.time.loader:make_time_loader
|
|
7
7
|
|
|
8
8
|
[datapipeline.mappers]
|
|
9
9
|
encode_time = datapipeline.mappers.synthetic.time:encode
|
|
10
|
+
identity = datapipeline.mappers.noop:identity
|
|
10
11
|
time.synthetic = datapipeline.mappers.noop:identity
|
|
11
12
|
|
|
12
13
|
[datapipeline.parsers]
|
|
14
|
+
core.synthetic.ticks = datapipeline.sources.synthetic.time.parser:TimeRowParser
|
|
13
15
|
identity = datapipeline.parsers.identity:IdentityParser
|
|
14
|
-
synthetic.time = datapipeline.sources.synthetic.time.parser:TimeRowParser
|
|
15
16
|
|
|
16
17
|
[datapipeline.transforms.debug]
|
|
17
18
|
identity = datapipeline.transforms.debug.identity:IdentityGuardTransform
|
|
@@ -27,13 +28,13 @@ floor_time = datapipeline.transforms.record.floor_time:floor_time
|
|
|
27
28
|
lag = datapipeline.transforms.record.lag:apply_lag
|
|
28
29
|
|
|
29
30
|
[datapipeline.transforms.stream]
|
|
30
|
-
|
|
31
|
+
dedupe = datapipeline.transforms.stream.dedupe:FeatureDeduplicateTransform
|
|
32
|
+
ensure_cadence = datapipeline.transforms.stream.ensure_ticks:ensure_cadence
|
|
31
33
|
fill = datapipeline.transforms.stream.fill:FillTransformer
|
|
32
34
|
granularity = datapipeline.transforms.stream.granularity:FeatureGranularityTransform
|
|
33
35
|
lint = datapipeline.transforms.stream.lint:StreamLint
|
|
34
36
|
|
|
35
37
|
[datapipeline.transforms.vector]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
fill_horizontal = datapipeline.transforms.vector:VectorFillAcrossPartitionsTransform
|
|
38
|
+
drop = datapipeline.transforms.vector:VectorDropTransform
|
|
39
|
+
fill = datapipeline.transforms.vector:VectorFillTransform
|
|
40
|
+
replace = datapipeline.transforms.vector:VectorReplaceTransform
|
datapipeline/build/tasks.py
DELETED
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import hashlib
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Dict, Iterable, Iterator, Sequence, Tuple
|
|
6
|
-
|
|
7
|
-
from datapipeline.config.build import BuildConfig
|
|
8
|
-
from datapipeline.config.dataset.loader import load_dataset
|
|
9
|
-
from datapipeline.pipeline.context import PipelineContext
|
|
10
|
-
from datapipeline.pipeline.pipelines import build_vector_pipeline
|
|
11
|
-
from datapipeline.pipeline.split import build_labeler
|
|
12
|
-
from datapipeline.runtime import Runtime
|
|
13
|
-
from datapipeline.services.constants import PARTIONED_IDS, SCALER_STATISTICS
|
|
14
|
-
from datapipeline.services.project_paths import read_project
|
|
15
|
-
from datapipeline.utils.paths import ensure_parent
|
|
16
|
-
from datapipeline.transforms.feature.scaler import StandardScaler
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def _resolve_relative(project_yaml: Path, value: str) -> Path:
|
|
20
|
-
path = Path(value)
|
|
21
|
-
return path if path.is_absolute() else (project_yaml.parent / path)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _normalized_label(path: Path, base_dir: Path) -> str:
|
|
25
|
-
try:
|
|
26
|
-
return str(path.resolve().relative_to(base_dir))
|
|
27
|
-
except ValueError:
|
|
28
|
-
return str(path.resolve())
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def _hash_file(hasher, path: Path, base_dir: Path) -> None:
|
|
32
|
-
hasher.update(_normalized_label(path, base_dir).encode("utf-8"))
|
|
33
|
-
hasher.update(b"\0")
|
|
34
|
-
hasher.update(path.read_bytes())
|
|
35
|
-
hasher.update(b"\0")
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _yaml_files(directory: Path) -> Iterable[Path]:
|
|
39
|
-
if not directory.exists():
|
|
40
|
-
return []
|
|
41
|
-
return sorted(p for p in directory.rglob("*.y*ml") if p.is_file())
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def compute_config_hash(project_yaml: Path, build_config_path: Path) -> str:
|
|
45
|
-
"""Compute a deterministic hash across relevant config inputs."""
|
|
46
|
-
|
|
47
|
-
hasher = hashlib.sha256()
|
|
48
|
-
base_dir = project_yaml.parent.resolve()
|
|
49
|
-
cfg = read_project(project_yaml)
|
|
50
|
-
|
|
51
|
-
required: Sequence[Path] = [
|
|
52
|
-
project_yaml.resolve(),
|
|
53
|
-
build_config_path.resolve(),
|
|
54
|
-
_resolve_relative(project_yaml, cfg.paths.dataset).resolve(),
|
|
55
|
-
_resolve_relative(project_yaml, cfg.paths.postprocess).resolve(),
|
|
56
|
-
]
|
|
57
|
-
|
|
58
|
-
for path in required:
|
|
59
|
-
if not path.exists():
|
|
60
|
-
raise FileNotFoundError(f"Expected config file missing: {path}")
|
|
61
|
-
_hash_file(hasher, path, base_dir)
|
|
62
|
-
|
|
63
|
-
for dir_value in (cfg.paths.sources, cfg.paths.streams):
|
|
64
|
-
directory = _resolve_relative(project_yaml, dir_value)
|
|
65
|
-
hasher.update(
|
|
66
|
-
f"[dir]{_normalized_label(directory, base_dir)}".encode("utf-8"))
|
|
67
|
-
if not directory.exists():
|
|
68
|
-
hasher.update(b"[missing]")
|
|
69
|
-
continue
|
|
70
|
-
for path in _yaml_files(directory):
|
|
71
|
-
_hash_file(hasher, path, base_dir)
|
|
72
|
-
|
|
73
|
-
return hasher.hexdigest()
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def _collect_partitioned_ids(runtime: Runtime, include_targets: bool) -> Sequence[str]:
|
|
77
|
-
dataset = load_dataset(runtime.project_yaml, "vectors")
|
|
78
|
-
feature_cfgs = list(dataset.features or [])
|
|
79
|
-
if include_targets:
|
|
80
|
-
feature_cfgs += list(dataset.targets or [])
|
|
81
|
-
|
|
82
|
-
sanitized = [cfg.model_copy(update={"scale": False})
|
|
83
|
-
for cfg in feature_cfgs]
|
|
84
|
-
|
|
85
|
-
ids: set[str] = set()
|
|
86
|
-
context = PipelineContext(runtime)
|
|
87
|
-
vectors = build_vector_pipeline(
|
|
88
|
-
context, sanitized, dataset.group_by, stage=None)
|
|
89
|
-
for _, vector in vectors:
|
|
90
|
-
ids.update(vector.values.keys())
|
|
91
|
-
return sorted(ids)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def materialize_partitioned_ids(runtime: Runtime, config: BuildConfig) -> Tuple[str, int]:
|
|
95
|
-
"""Write the partitioned-id list and return (relative_path, count)."""
|
|
96
|
-
|
|
97
|
-
task_cfg = config.partitioned_ids
|
|
98
|
-
ids = _collect_partitioned_ids(
|
|
99
|
-
runtime, include_targets=task_cfg.include_targets)
|
|
100
|
-
|
|
101
|
-
relative_path = Path(task_cfg.output)
|
|
102
|
-
destination = (runtime.artifacts_root / relative_path).resolve()
|
|
103
|
-
ensure_parent(destination)
|
|
104
|
-
|
|
105
|
-
with destination.open("w", encoding="utf-8") as fh:
|
|
106
|
-
for fid in ids:
|
|
107
|
-
fh.write(f"{fid}\n")
|
|
108
|
-
|
|
109
|
-
return str(relative_path), len(ids)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def materialize_scaler_statistics(runtime: Runtime, config: BuildConfig) -> Tuple[str, Dict[str, object]] | None:
|
|
113
|
-
task_cfg = config.scaler
|
|
114
|
-
if not task_cfg.enabled:
|
|
115
|
-
return None
|
|
116
|
-
|
|
117
|
-
dataset = load_dataset(runtime.project_yaml, "vectors")
|
|
118
|
-
feature_cfgs = list(dataset.features)
|
|
119
|
-
if not feature_cfgs and not task_cfg.include_targets:
|
|
120
|
-
return None
|
|
121
|
-
|
|
122
|
-
if task_cfg.include_targets:
|
|
123
|
-
feature_cfgs += list(dataset.targets or [])
|
|
124
|
-
|
|
125
|
-
sanitized_cfgs = [cfg.model_copy(
|
|
126
|
-
update={"scale": False}) for cfg in feature_cfgs]
|
|
127
|
-
|
|
128
|
-
context = PipelineContext(runtime)
|
|
129
|
-
vectors = build_vector_pipeline(
|
|
130
|
-
context, sanitized_cfgs, dataset.group_by, stage=None)
|
|
131
|
-
|
|
132
|
-
cfg = getattr(runtime, "split", None)
|
|
133
|
-
labeler = build_labeler(cfg) if cfg else None
|
|
134
|
-
if not labeler and task_cfg.split_label != "all":
|
|
135
|
-
raise RuntimeError(
|
|
136
|
-
f"Cannot compute scaler statistics for split '{task_cfg.split_label}' "
|
|
137
|
-
"when no split configuration is defined in the project."
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
def _train_stream() -> Iterator[tuple[object, object]]:
|
|
141
|
-
for group_key, vector in vectors:
|
|
142
|
-
if labeler and labeler.label(group_key, vector) != task_cfg.split_label:
|
|
143
|
-
continue
|
|
144
|
-
yield group_key, vector
|
|
145
|
-
|
|
146
|
-
scaler = StandardScaler()
|
|
147
|
-
total_observations = scaler.fit(_train_stream())
|
|
148
|
-
|
|
149
|
-
if not scaler.statistics:
|
|
150
|
-
raise RuntimeError(
|
|
151
|
-
f"No scaler statistics computed for split '{task_cfg.split_label}'."
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
relative_path = Path(task_cfg.output)
|
|
155
|
-
destination = (runtime.artifacts_root / relative_path).resolve()
|
|
156
|
-
ensure_parent(destination)
|
|
157
|
-
|
|
158
|
-
scaler.save(destination)
|
|
159
|
-
|
|
160
|
-
meta: Dict[str, object] = {
|
|
161
|
-
"features": len(scaler.statistics),
|
|
162
|
-
"split": task_cfg.split_label,
|
|
163
|
-
"observations": total_observations,
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
return str(relative_path), meta
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
def execute_build(runtime: Runtime, config: BuildConfig) -> Dict[str, Dict[str, object]]:
|
|
170
|
-
"""Materialize artifacts described by build.yaml."""
|
|
171
|
-
artifacts: Dict[str, Dict[str, object]] = {}
|
|
172
|
-
|
|
173
|
-
rel_path, count = materialize_partitioned_ids(runtime, config)
|
|
174
|
-
artifacts[PARTIONED_IDS] = {
|
|
175
|
-
"relative_path": rel_path,
|
|
176
|
-
"count": count,
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
scaler_result = materialize_scaler_statistics(runtime, config)
|
|
180
|
-
if scaler_result:
|
|
181
|
-
rel_path, meta = scaler_result
|
|
182
|
-
scaler_meta = {"relative_path": rel_path}
|
|
183
|
-
scaler_meta.update(meta)
|
|
184
|
-
artifacts[SCALER_STATISTICS] = scaler_meta
|
|
185
|
-
|
|
186
|
-
return artifacts
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
|
|
3
|
-
from datapipeline.services.entrypoints import read_group_entries
|
|
4
|
-
from datapipeline.services.constants import FILTERS_GROUP
|
|
5
|
-
from datapipeline.services.project_paths import (
|
|
6
|
-
sources_dir as resolve_sources_dir,
|
|
7
|
-
streams_dir as resolve_streams_dir,
|
|
8
|
-
ensure_project_scaffold,
|
|
9
|
-
)
|
|
10
|
-
from datapipeline.services.scaffold.mappers import attach_source_to_domain
|
|
11
|
-
import re
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def _pick_from_list(prompt: str, options: list[str]) -> str:
|
|
15
|
-
print(prompt, file=sys.stderr)
|
|
16
|
-
for i, opt in enumerate(options, 1):
|
|
17
|
-
print(f" [{i}] {opt}", file=sys.stderr)
|
|
18
|
-
while True:
|
|
19
|
-
sel = input("> ").strip()
|
|
20
|
-
if sel.isdigit():
|
|
21
|
-
idx = int(sel)
|
|
22
|
-
if 1 <= idx <= len(options):
|
|
23
|
-
return options[idx - 1]
|
|
24
|
-
print("Please enter a number from the list.", file=sys.stderr)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def handle() -> None:
|
|
28
|
-
root_dir, name, pyproject = pkg_root(None)
|
|
29
|
-
|
|
30
|
-
# Discover sources by scanning sources_dir YAMLs
|
|
31
|
-
# Default to dataset-scoped project config
|
|
32
|
-
proj_path = root_dir / "config" / "datasets" / "default" / "project.yaml"
|
|
33
|
-
# Ensure a minimal project scaffold so we can resolve dirs interactively
|
|
34
|
-
ensure_project_scaffold(proj_path)
|
|
35
|
-
sources_dir = resolve_sources_dir(proj_path)
|
|
36
|
-
source_options = []
|
|
37
|
-
if sources_dir.exists():
|
|
38
|
-
source_options = sorted(p.stem for p in sources_dir.glob("*.y*ml"))
|
|
39
|
-
if not source_options:
|
|
40
|
-
print("[error] No sources found. Create one first (jerry source add ...)")
|
|
41
|
-
raise SystemExit(2)
|
|
42
|
-
|
|
43
|
-
src_key = _pick_from_list("Select a source to link:", source_options)
|
|
44
|
-
# Expect aliases from sources_dir filenames: provider_dataset.yaml
|
|
45
|
-
parts = src_key.split("_", 1)
|
|
46
|
-
if len(parts) != 2:
|
|
47
|
-
print("[error] Source alias must be 'provider_dataset' (from sources/<alias>.yaml)", file=sys.stderr)
|
|
48
|
-
raise SystemExit(2)
|
|
49
|
-
provider, dataset = parts[0], parts[1]
|
|
50
|
-
|
|
51
|
-
# Discover domains by scanning the package, fallback to EPs if needed
|
|
52
|
-
base = resolve_base_pkg_dir(root_dir, name)
|
|
53
|
-
domain_options = []
|
|
54
|
-
for dirname in ("domains",):
|
|
55
|
-
dom_dir = base / dirname
|
|
56
|
-
if dom_dir.exists():
|
|
57
|
-
domain_options.extend(
|
|
58
|
-
[p.name for p in dom_dir.iterdir() if p.is_dir()
|
|
59
|
-
and (p / "model.py").exists()]
|
|
60
|
-
)
|
|
61
|
-
domain_options = sorted(set(domain_options))
|
|
62
|
-
if not domain_options:
|
|
63
|
-
domain_options = sorted(
|
|
64
|
-
read_group_entries(pyproject, FILTERS_GROUP).keys())
|
|
65
|
-
if not domain_options:
|
|
66
|
-
print("[error] No domains found. Create one first (jerry domain add ...)")
|
|
67
|
-
raise SystemExit(2)
|
|
68
|
-
|
|
69
|
-
dom_name = _pick_from_list("Select a domain to link to:", domain_options)
|
|
70
|
-
|
|
71
|
-
# create mapper + EP (domain.origin)
|
|
72
|
-
attach_source_to_domain(
|
|
73
|
-
domain=dom_name,
|
|
74
|
-
provider=provider,
|
|
75
|
-
dataset=dataset,
|
|
76
|
-
root=None,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
def _slug(s: str) -> str:
|
|
80
|
-
s = s.strip().lower()
|
|
81
|
-
s = re.sub(r"[^a-z0-9]+", "_", s)
|
|
82
|
-
return s.strip("_")
|
|
83
|
-
ep_key = f"{_slug(dom_name)}.{_slug(provider)}"
|
|
84
|
-
print(f"[ok] Registered mapper entry point as '{ep_key}'.")
|
|
85
|
-
|
|
86
|
-
# Inject per-file canonical stream into streams directory
|
|
87
|
-
streams_path = resolve_streams_dir(proj_path)
|
|
88
|
-
|
|
89
|
-
canonical_alias = ep_key
|
|
90
|
-
mapper_ep = ep_key
|
|
91
|
-
# Write a single-file canonical spec into streams directory, matching
|
|
92
|
-
# ContractConfig schema with helpful commented placeholders per stage.
|
|
93
|
-
try:
|
|
94
|
-
# Ensure streams_path is a directory path
|
|
95
|
-
streams_dir = streams_path if streams_path.is_dir() else streams_path.parent
|
|
96
|
-
streams_dir.mkdir(parents=True, exist_ok=True)
|
|
97
|
-
cfile = streams_dir / f"{canonical_alias}.yaml"
|
|
98
|
-
# Build a richer scaffold as YAML text to preserve comments
|
|
99
|
-
scaffold = f"""
|
|
100
|
-
source_id: {src_key}
|
|
101
|
-
stream_id: {canonical_alias}
|
|
102
|
-
|
|
103
|
-
mapper:
|
|
104
|
-
entrypoint: {mapper_ep}
|
|
105
|
-
args: {{}}
|
|
106
|
-
|
|
107
|
-
# partition_by: <field or [fields]>
|
|
108
|
-
# sort_batch_size: 100000 # in-memory sort chunk size
|
|
109
|
-
|
|
110
|
-
record: # record-level transforms
|
|
111
|
-
- filter: {{ operator: ge, field: time, comparand: "${{start_time}}" }}
|
|
112
|
-
- filter: {{ operator: le, field: time, comparand: "${{end_time}}" }}
|
|
113
|
-
# - floor_time: {{ resolution: 10m }}
|
|
114
|
-
# - lag: {{ lag: 10m }}
|
|
115
|
-
|
|
116
|
-
# stream: # per-feature transforms (input sorted by id,time)
|
|
117
|
-
# - ensure_ticks: {{ tick: 10m }}
|
|
118
|
-
# - granularity: {{ mode: first }}
|
|
119
|
-
# - fill: {{ statistic: median, window: 6, min_samples: 1 }}
|
|
120
|
-
|
|
121
|
-
# debug: # optional validation-only checks
|
|
122
|
-
# - lint: {{ mode: warn, tick: 10m }}
|
|
123
|
-
"""
|
|
124
|
-
with cfile.open("w", encoding="utf-8") as f:
|
|
125
|
-
f.write(scaffold)
|
|
126
|
-
print(f"[new] Created canonical spec: {cfile}")
|
|
127
|
-
except Exception as e:
|
|
128
|
-
print(f"[error] Failed to write canonical spec: {e}", file=sys.stderr)
|