jerry-thomas 1.0.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipeline/analysis/vector/collector.py +0 -1
- datapipeline/build/tasks/config.py +0 -2
- datapipeline/build/tasks/metadata.py +0 -2
- datapipeline/build/tasks/scaler.py +0 -2
- datapipeline/build/tasks/schema.py +0 -2
- datapipeline/build/tasks/utils.py +0 -2
- datapipeline/cli/app.py +201 -81
- datapipeline/cli/commands/contract.py +145 -283
- datapipeline/cli/commands/demo.py +13 -0
- datapipeline/cli/commands/domain.py +4 -4
- datapipeline/cli/commands/dto.py +11 -0
- datapipeline/cli/commands/filter.py +2 -2
- datapipeline/cli/commands/inspect.py +0 -68
- datapipeline/cli/commands/list_.py +30 -13
- datapipeline/cli/commands/loader.py +11 -0
- datapipeline/cli/commands/mapper.py +82 -0
- datapipeline/cli/commands/parser.py +45 -0
- datapipeline/cli/commands/run_config.py +1 -3
- datapipeline/cli/commands/serve_pipeline.py +5 -7
- datapipeline/cli/commands/source.py +106 -18
- datapipeline/cli/commands/stream.py +292 -0
- datapipeline/cli/visuals/common.py +0 -2
- datapipeline/cli/visuals/sections.py +0 -2
- datapipeline/cli/workspace_utils.py +0 -3
- datapipeline/config/context.py +0 -2
- datapipeline/config/dataset/feature.py +1 -0
- datapipeline/config/metadata.py +0 -2
- datapipeline/config/project.py +0 -2
- datapipeline/config/resolution.py +10 -2
- datapipeline/config/tasks.py +9 -9
- datapipeline/domain/feature.py +3 -0
- datapipeline/domain/record.py +7 -7
- datapipeline/domain/sample.py +0 -2
- datapipeline/domain/vector.py +6 -8
- datapipeline/integrations/ml/adapter.py +0 -2
- datapipeline/integrations/ml/pandas_support.py +0 -2
- datapipeline/integrations/ml/rows.py +0 -2
- datapipeline/integrations/ml/torch_support.py +0 -2
- datapipeline/io/output.py +0 -2
- datapipeline/io/serializers.py +26 -16
- datapipeline/mappers/synthetic/time.py +9 -2
- datapipeline/pipeline/artifacts.py +3 -5
- datapipeline/pipeline/observability.py +0 -2
- datapipeline/pipeline/pipelines.py +118 -34
- datapipeline/pipeline/stages.py +54 -18
- datapipeline/pipeline/utils/spool_cache.py +142 -0
- datapipeline/pipeline/utils/transform_utils.py +27 -2
- datapipeline/services/artifacts.py +1 -4
- datapipeline/services/constants.py +1 -0
- datapipeline/services/factories.py +4 -6
- datapipeline/services/paths.py +10 -1
- datapipeline/services/project_paths.py +0 -2
- datapipeline/services/runs.py +0 -2
- datapipeline/services/scaffold/contract_yaml.py +76 -0
- datapipeline/services/scaffold/demo.py +141 -0
- datapipeline/services/scaffold/discovery.py +115 -0
- datapipeline/services/scaffold/domain.py +21 -13
- datapipeline/services/scaffold/dto.py +31 -0
- datapipeline/services/scaffold/filter.py +2 -1
- datapipeline/services/scaffold/layout.py +96 -0
- datapipeline/services/scaffold/loader.py +61 -0
- datapipeline/services/scaffold/mapper.py +116 -0
- datapipeline/services/scaffold/parser.py +56 -0
- datapipeline/services/scaffold/plugin.py +14 -2
- datapipeline/services/scaffold/source_yaml.py +91 -0
- datapipeline/services/scaffold/stream_plan.py +129 -0
- datapipeline/services/scaffold/utils.py +187 -0
- datapipeline/sources/data_loader.py +0 -2
- datapipeline/sources/decoders.py +49 -8
- datapipeline/sources/factory.py +9 -6
- datapipeline/sources/foreach.py +18 -3
- datapipeline/sources/synthetic/time/parser.py +1 -1
- datapipeline/sources/transports.py +10 -4
- datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
- datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
- datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
- datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
- datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
- datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
- datapipeline/templates/plugin_skeleton/README.md +57 -136
- datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
- datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
- datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
- datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
- datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
- datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +9 -11
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +15 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
- datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
- datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
- datapipeline/templates/stubs/dto.py.j2 +2 -2
- datapipeline/templates/stubs/filter.py.j2 +1 -1
- datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
- datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
- datapipeline/templates/stubs/mappers/ingest.py.j2 +20 -0
- datapipeline/templates/stubs/parser.py.j2 +5 -1
- datapipeline/templates/stubs/record.py.j2 +1 -1
- datapipeline/templates/stubs/source.yaml.j2 +1 -1
- datapipeline/transforms/debug/identity.py +34 -16
- datapipeline/transforms/debug/lint.py +14 -11
- datapipeline/transforms/feature/scaler.py +5 -12
- datapipeline/transforms/filter.py +73 -17
- datapipeline/transforms/interfaces.py +58 -0
- datapipeline/transforms/record/floor_time.py +10 -7
- datapipeline/transforms/record/lag.py +8 -10
- datapipeline/transforms/sequence.py +2 -3
- datapipeline/transforms/stream/dedupe.py +5 -7
- datapipeline/transforms/stream/ensure_ticks.py +39 -24
- datapipeline/transforms/stream/fill.py +34 -25
- datapipeline/transforms/stream/filter.py +25 -0
- datapipeline/transforms/stream/floor_time.py +16 -0
- datapipeline/transforms/stream/granularity.py +52 -30
- datapipeline/transforms/stream/lag.py +17 -0
- datapipeline/transforms/stream/rolling.py +72 -0
- datapipeline/transforms/utils.py +42 -10
- datapipeline/transforms/vector/drop/horizontal.py +0 -3
- datapipeline/transforms/vector/drop/orchestrator.py +0 -3
- datapipeline/transforms/vector/drop/vertical.py +0 -2
- datapipeline/transforms/vector/ensure_schema.py +0 -2
- datapipeline/utils/paths.py +0 -2
- datapipeline/utils/placeholders.py +0 -2
- datapipeline/utils/rich_compat.py +0 -3
- datapipeline/utils/window.py +0 -2
- jerry_thomas-2.0.1.dist-info/METADATA +269 -0
- jerry_thomas-2.0.1.dist-info/RECORD +264 -0
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/WHEEL +1 -1
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/entry_points.txt +7 -3
- datapipeline/services/scaffold/mappers.py +0 -55
- datapipeline/services/scaffold/source.py +0 -191
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
- datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
- datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
- datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
- datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
- datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
- datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
- datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
- datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
- datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
- datapipeline/templates/stubs/mapper.py.j2 +0 -22
- jerry_thomas-1.0.3.dist-info/METADATA +0 -827
- jerry_thomas-1.0.3.dist-info/RECORD +0 -198
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
datapipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
datapipeline/plugins.py,sha256=Y0QfI313t5_w_m1ayQVEuac3lJ4YR_OSIYZol35ZOTk,838
|
|
3
|
+
datapipeline/runtime.py,sha256=yfSlQaq9OdjVVuqRtWzxLdw1ku4boZoONfCYQIMfe3A,2622
|
|
4
|
+
datapipeline/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
datapipeline/analysis/vector_analyzer.py,sha256=D6eDW0H55QGnWiULXJEirMjw6MeBcwjJ4zfS7M6tx98,175
|
|
6
|
+
datapipeline/analysis/vector/collector.py,sha256=qQrU5lb-zqlhcrJmWw3dwCISV-FQrkApUi5YA0eSViI,14727
|
|
7
|
+
datapipeline/analysis/vector/matrix.py,sha256=pzaMmEMD09cOlRbBzuD0Lgc91dtKnCdCkJAXtkcCOPc,18469
|
|
8
|
+
datapipeline/analysis/vector/report.py,sha256=OsSarYX4-CWwdKI3b35qBqrhHxShavFS9CeYKZ7WTRs,16709
|
|
9
|
+
datapipeline/build/__init__.py,sha256=XbuHhJzIRTNnOOJMYa_BHiA0P8yPrEk4tuVXnD2NQbI,109
|
|
10
|
+
datapipeline/build/state.py,sha256=XsL2CtQl7x80OpE1SJE42D-ig3lBMPr7_HSlpq5xwU4,1826
|
|
11
|
+
datapipeline/build/tasks/__init__.py,sha256=-HRDBwLY9eLsKxjDzsC-E10y_ytfhOs9yXVA_Nyt4_o,319
|
|
12
|
+
datapipeline/build/tasks/config.py,sha256=N2AtihY5YMq3xFzitd0gcmHldnmRNPC5wE_OUCdyJBQ,2276
|
|
13
|
+
datapipeline/build/tasks/metadata.py,sha256=s0I36RRV2pWdSMDY7qfazlSuaOlb0YtxiSuC6nMtzUs,5747
|
|
14
|
+
datapipeline/build/tasks/scaler.py,sha256=FiHHS8wKWHeZ0cAA8Iw-DE26w3zUvEi5VZAM_RJ3u8s,2518
|
|
15
|
+
datapipeline/build/tasks/schema.py,sha256=wK_mWcAZXxJGvSRSZYbn8jrrG4ENY3D1tx66gedbRJM,2127
|
|
16
|
+
datapipeline/build/tasks/utils.py,sha256=Kg7ueWYR8Df9CIP94NJqQfVtofYHMADYs74Ykjur3XQ,6299
|
|
17
|
+
datapipeline/cli/app.py,sha256=ck8B8fZaCcXMsy1LL7xVCAvs83MABkky-2O_RPnXyPg,27951
|
|
18
|
+
datapipeline/cli/workspace_utils.py,sha256=ed4659AtWKdT9lchAmpc3uz5rTLS9H595RBXNtZyfXU,767
|
|
19
|
+
datapipeline/cli/commands/build.py,sha256=OPJ-r3WWAzWwa1wHK0zxnQhuM_1h1mSfNpPRl_Bqrf8,8979
|
|
20
|
+
datapipeline/cli/commands/contract.py,sha256=2rlN-d6WqRdfCmyh6y9-hI75-J4kS2BxPECKyraw3wM,8266
|
|
21
|
+
datapipeline/cli/commands/demo.py,sha256=tBwAjhIKYZgKIMZvDE1OjCJY7NGWC_47yfhN3FJ3J9w,449
|
|
22
|
+
datapipeline/cli/commands/domain.py,sha256=rcAXhWJSnyPIZm5sVwvwiNh969QF21CKPEsj2b8VGuQ,498
|
|
23
|
+
datapipeline/cli/commands/dto.py,sha256=37z1c1LTHzxt_Rux-zyEm_4GgPrKoqYXjqfk8w9IwAQ,374
|
|
24
|
+
datapipeline/cli/commands/filter.py,sha256=rNChtMGWQTuoJ1NNH86lNj3l8wH0UymNfIZ4mFH0kYA,332
|
|
25
|
+
datapipeline/cli/commands/inspect.py,sha256=H7Mbhvh0gZ_xsZ4_ToAICC14NowyCqviD2KAETTixb0,13970
|
|
26
|
+
datapipeline/cli/commands/list_.py,sha256=vd83WZ4YIlEjrdWEmwjRVugdQkGwf-iWid9y7ud7WdQ,2215
|
|
27
|
+
datapipeline/cli/commands/loader.py,sha256=JFNObC_6m7sN0oklnj3C6GH9muMq2ydQQfnJEdbWTNk,430
|
|
28
|
+
datapipeline/cli/commands/mapper.py,sha256=MJnAzuZ6F2dYAo-NOVH1xloT1R5pFcmt-9LItcpPTyo,2449
|
|
29
|
+
datapipeline/cli/commands/parser.py,sha256=2eILI2ZcPaCZIARImKVN3EljgFqk_WmOqK110sQG5jU,1377
|
|
30
|
+
datapipeline/cli/commands/plugin.py,sha256=jUMBrxLw0QX61a2wf7rRGAWFg42xLZkI-C-HyUHiob4,427
|
|
31
|
+
datapipeline/cli/commands/run.py,sha256=TmbyggYOlF972oxwLhh-r27ggeWARg0_WfCMQJAudS8,8348
|
|
32
|
+
datapipeline/cli/commands/run_config.py,sha256=Ix4N-Zn-leb6MaQ7A0cZfVu9-yOaNme5rg5qR0rWXZ4,3158
|
|
33
|
+
datapipeline/cli/commands/serve_pipeline.py,sha256=9kxdc2D1qtDy6L-8ron-UuHc9qTng6xhZ43l00JqoEU,5004
|
|
34
|
+
datapipeline/cli/commands/source.py,sha256=0mhzRT7J2Nu7HKNORsvoaEBVOBP9CmN1L1_JVNXV0q4,5904
|
|
35
|
+
datapipeline/cli/commands/stream.py,sha256=30hYLjoRV4xdyFbK146eXnbhMDC1p8HeXQKZ8TmcCTg,9835
|
|
36
|
+
datapipeline/cli/visuals/__init__.py,sha256=CUxCoMoU96FQonq6V_i_HBUwuwoWjML5X-_MZDF_i8M,371
|
|
37
|
+
datapipeline/cli/visuals/common.py,sha256=NFyYzJnWEpP6vuhRXl6LRBbGTMk_12Ya1ckBK7kzpls,10298
|
|
38
|
+
datapipeline/cli/visuals/labels.py,sha256=rfilkKeTna25ZnSw2UlQ7apMK0DeDvTIRUCkaAOkM_I,1748
|
|
39
|
+
datapipeline/cli/visuals/runner.py,sha256=GtQcFjmYfVuNMmmp3uoJo0sXJOGeCoN6EOUUB6kzWSU,2085
|
|
40
|
+
datapipeline/cli/visuals/sections.py,sha256=o-AGWVuwQaYeGR87Ru6DgucUA92GhdfywXWDjTqjV2w,578
|
|
41
|
+
datapipeline/cli/visuals/sources.py,sha256=m0nNmRSlSNWTyGj_MF3PS_m9hXKtJv63bP_9-SKn0Xc,5144
|
|
42
|
+
datapipeline/cli/visuals/sources_basic.py,sha256=1-1JJ77AdiQv0iC4qpvb71cICmheInusIJFCc3csWLs,9086
|
|
43
|
+
datapipeline/cli/visuals/sources_off.py,sha256=8nWuwsj-XBsjcfZ9FBpuxC3vhC6mPPObjVsh1EUparM,2651
|
|
44
|
+
datapipeline/cli/visuals/sources_rich.py,sha256=GtEEO43PckQZFCOcMrZlX2JsrhIktb0Ixs6KXYMiLR8,16772
|
|
45
|
+
datapipeline/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
+
datapipeline/config/catalog.py,sha256=2eMwiCEof3WWKASfhxLNaZHLHCqXBoUtdJ1LvHUT74Y,2552
|
|
47
|
+
datapipeline/config/context.py,sha256=cfqhMfjXzHbiVaVkcuKPO_0KE6tbgs_SCgqshqTUDlQ,6821
|
|
48
|
+
datapipeline/config/metadata.py,sha256=SuLcADiuhNk4eekdfcwugMRPvmaelzGOVxBo_f8o9HY,1273
|
|
49
|
+
datapipeline/config/postprocess.py,sha256=67ukmtGNHFmZd8bv-POWOE_HjE5T5AXQaOypCZG1d-o,361
|
|
50
|
+
datapipeline/config/project.py,sha256=Br3kI7Z2EzdiTl6HVJPNROHnRfG4IVGK5oGRQVXbRw8,961
|
|
51
|
+
datapipeline/config/resolution.py,sha256=8X09qWV2NTj_v-T4OL99Ra1uoOLon0cse_Fq6tGCl6U,3880
|
|
52
|
+
datapipeline/config/split.py,sha256=VFYRF6Fz5xLTqqxIt3RVGB4kwlnHH8CxjOddEAJYG5Q,1048
|
|
53
|
+
datapipeline/config/tasks.py,sha256=RgUJfGXGj_7iwsiF8wpxM2He5U2r4OaWoGISLsWj0gs,9714
|
|
54
|
+
datapipeline/config/workspace.py,sha256=Wjbhpshe3cc0labHNt5xCjGDejygMDlEw8CeiLEJMpM,5688
|
|
55
|
+
datapipeline/config/dataset/dataset.py,sha256=2alhzYJjIck5VceCkyPTo_Wtc8jwAfbkKR00AK2-P3k,564
|
|
56
|
+
datapipeline/config/dataset/feature.py,sha256=RJn1l_epAUc9Rs6ttPRNzyMG1ZPfZRRkltUqT40Gn3g,361
|
|
57
|
+
datapipeline/config/dataset/loader.py,sha256=Eh_F56o242ptEIsR22kC7HkNi6SggpRf10Gpfc3ipTo,1063
|
|
58
|
+
datapipeline/config/dataset/normalize.py,sha256=BEGuZeevd9RFBxaxPqULRw54E_UfX5r-sUqiWLq5P_A,1012
|
|
59
|
+
datapipeline/domain/__init__.py,sha256=rfZZpfvozmQNKhBabzgC9g4urMbchjDXbbl54sNtxZQ,262
|
|
60
|
+
datapipeline/domain/feature.py,sha256=ZW91yU_Zaiw2GiZ4UNP-fDA7XimbtY7-gukeWgte18g,355
|
|
61
|
+
datapipeline/domain/record.py,sha256=v1QorYRnMcViIBAMiqmBo_pmVAkyxsaF6EFmqNHHXXo,1043
|
|
62
|
+
datapipeline/domain/sample.py,sha256=_mW3vPo8SVTWvNIMJKuKOR34xMBh0ELrZhY23XatcAY,1522
|
|
63
|
+
datapipeline/domain/vector.py,sha256=niKL0I1qOZa-9NC11zu-bXlj0V9IJ1Al_l_cexT6kpI,936
|
|
64
|
+
datapipeline/filters/filters.py,sha256=dM6U-QpGCQQ4-CMBTJgWZp2zH2TVTk6uYOqGPC5NBCY,2649
|
|
65
|
+
datapipeline/integrations/__init__.py,sha256=tjTLsIa6NRWKI05wjwPAUuXozDA-gP98SccFJ9lYHs8,410
|
|
66
|
+
datapipeline/integrations/ml/__init__.py,sha256=oflJXnjQEn1Zv0Vho10mc2y3D6UkKusNZwE5yUtatb8,463
|
|
67
|
+
datapipeline/integrations/ml/adapter.py,sha256=-VVsV6uvJOS8LyGZ0r1YJde4bF790wbc2wbs1U61LSI,4555
|
|
68
|
+
datapipeline/integrations/ml/pandas_support.py,sha256=nsi-xQ0u34PVh7cqAvzDQnzuknvNP5ZLUID6glcCiro,1193
|
|
69
|
+
datapipeline/integrations/ml/rows.py,sha256=dHmG2wn8FSKTuRH0XJ5AY78oSz52Cs7HFquzDY1o1j4,1959
|
|
70
|
+
datapipeline/integrations/ml/torch_support.py,sha256=06aoonaM56EMwqcEg1mNHJ0u3xqwa_0rD6OIYI2k-aI,2667
|
|
71
|
+
datapipeline/io/factory.py,sha256=xChYRxe1SRxHj8SXNirPEi2J20AOH3968yN92BRykr8,3903
|
|
72
|
+
datapipeline/io/output.py,sha256=3eQKLy0ld2M7ynPjo1Z5lTRGj6LOHUqdJU0MxX5EoJw,3967
|
|
73
|
+
datapipeline/io/protocols.py,sha256=vHjXhuV2r1Lo7k8SJuPH0WL2EXG_nm3DBpSowobUZ2U,512
|
|
74
|
+
datapipeline/io/serializers.py,sha256=wciI7mxDiLxctnL57oSmUOg8jO_lR8N2jrVnrIKjI4A,6839
|
|
75
|
+
datapipeline/io/sinks/__init__.py,sha256=7l-LmJAjuNrQZWMDFMXdjbZQ4Pq-iWMaN_3GcUvWntw,517
|
|
76
|
+
datapipeline/io/sinks/base.py,sha256=cXG6VXop0RVL1K4xpSaFq1scylhb6N6dsg6UMrQGw54,49
|
|
77
|
+
datapipeline/io/sinks/files.py,sha256=UgXXj8NxjvdOrwpJt5YNTgG1gW89YYCVpVkSg1eGgKI,1975
|
|
78
|
+
datapipeline/io/sinks/rich.py,sha256=hZNMttsqaMSUsQmCu6kubzkYbUGDTbTYBYnDwcFsEp0,1486
|
|
79
|
+
datapipeline/io/sinks/stdout.py,sha256=64VUdf_YghxTCjVyYcpBQpC_Pt5rPQrYejRg_0_cF7A,382
|
|
80
|
+
datapipeline/io/writers/__init__.py,sha256=V8228IYVxP4ay6yG8HF_ukBDseAERrqlWC4gbGDBmoc,397
|
|
81
|
+
datapipeline/io/writers/base.py,sha256=kUaFv6XOoUjYw5pE7XFUel5ptdEhuY03VTqajortUZY,814
|
|
82
|
+
datapipeline/io/writers/csv_writer.py,sha256=FL2qiS8Hr273lGcN6pQXGOSufcM06ApVZnPmNhuAwjQ,833
|
|
83
|
+
datapipeline/io/writers/jsonl.py,sha256=SP2yPgH4B_Xrr7GJFVVIsTxarNKAFbEB-0RS1F0fD-g,1736
|
|
84
|
+
datapipeline/io/writers/pickle_writer.py,sha256=omXSeGbrcSWwNBodwJNCBok0mW167xciT5S8w_w5xCo,928
|
|
85
|
+
datapipeline/mappers/noop.py,sha256=L8bH1QVbLH-ogIam0ppYdx7KuWQ7Dj44lvD8tvNlY0Q,111
|
|
86
|
+
datapipeline/mappers/synthetic/time.py,sha256=1_r5SUv2tOtmgWKunAkS_mE1uB8GroYvyi__cctUaL0,748
|
|
87
|
+
datapipeline/parsers/identity.py,sha256=pdGuz0SSQGfySPpvZSnLgfTXTkC36x-7dQMMei3XhsU,321
|
|
88
|
+
datapipeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
+
datapipeline/pipeline/artifacts.py,sha256=BwcRtOUE7mACK9TeJzv-WZ8bRJblA6SsOVPw6INkUz0,1456
|
|
90
|
+
datapipeline/pipeline/context.py,sha256=-W8QvGm32QGmBziEuzl-BitscuxGPb9bgQYDhRC1tkc,4377
|
|
91
|
+
datapipeline/pipeline/observability.py,sha256=BMuQazS9pKE559ew3ENsku03P8Fd79yA5ofSxHnF2xo,2007
|
|
92
|
+
datapipeline/pipeline/pipelines.py,sha256=lHcNb5n3l6OYgPOsdQnZJha6eHzSIDjUmV4HJQ0jEZc,6807
|
|
93
|
+
datapipeline/pipeline/split.py,sha256=TCzOhd8PF81IcUzUdPSz0hs3pIHi9V4IhXbSY2ZHK3Q,6090
|
|
94
|
+
datapipeline/pipeline/stages.py,sha256=w_5Q6L2qpzBlHVeO9nWc3625jRgipwW4Ldl0PylmF8I,10816
|
|
95
|
+
datapipeline/pipeline/utils/keygen.py,sha256=v2JJagJAE9iYfLtbl4uxoAEXZN_ALH0xdHhPDhNfKwU,1909
|
|
96
|
+
datapipeline/pipeline/utils/memory_sort.py,sha256=hS61n2CeIITRqffE1ftvn2IdqQp1IXYhuN4MJqncKvk,1155
|
|
97
|
+
datapipeline/pipeline/utils/ordering.py,sha256=ZX18I7GFtvyMFJB96vWQpTOGwljjeJ6ppCg5a3Av3es,1450
|
|
98
|
+
datapipeline/pipeline/utils/spool_cache.py,sha256=2T6bpYUPKztbjRzt5ghlIs62AhLCL7pZTYdy3eIgxo4,3714
|
|
99
|
+
datapipeline/pipeline/utils/transform_utils.py,sha256=9eTtypmsLw7G-mi7eR_0t6uKmwPVcFCvrG-cGa6aSu8,5035
|
|
100
|
+
datapipeline/registries/registry.py,sha256=MWWOHz2wT1oHQmovodtEreEuQhvH-i11Y2yXUUgZJhQ,641
|
|
101
|
+
datapipeline/services/artifacts.py,sha256=Ujc41F44_zcc6ndU3nXYXCQ-pWpNtBwNH5F2gXuFadE,2810
|
|
102
|
+
datapipeline/services/constants.py,sha256=qdfIb1uc_ewWLtzDAGR0YvcUAqFCxb8IRoyzbp_j8ZI,570
|
|
103
|
+
datapipeline/services/entrypoints.py,sha256=NKcSbhGRtBLQXGf-TdujwbVSRH1zb5J-S2jxFPnk6HQ,2504
|
|
104
|
+
datapipeline/services/factories.py,sha256=WTEFiQ_IJaSG-yTLyi1CpQOrP_LoUEDpco6zYygjIxk,5868
|
|
105
|
+
datapipeline/services/paths.py,sha256=3wydhJoyFzVAGi_DnF1xjPuF7rECk9rDJzEi2e5_55c,1319
|
|
106
|
+
datapipeline/services/project_paths.py,sha256=_ThKuXxh8TJFoChm6zL4s3qDsIihMmQy4FM0Pm6wW9E,4228
|
|
107
|
+
datapipeline/services/runs.py,sha256=9HGbJYFtFt_on1F5nTKoxvK7NCYxz3a4xos1TLJfBXg,6149
|
|
108
|
+
datapipeline/services/bootstrap/__init__.py,sha256=Mc2w2S69kU1hnzCvsGMhFqyNoNMXPwQtxprAkGN-sYE,245
|
|
109
|
+
datapipeline/services/bootstrap/config.py,sha256=122JNE7gZF1mohAI1gvX8H6i0JTql_Mm9bWcTpoD77c,4936
|
|
110
|
+
datapipeline/services/bootstrap/core.py,sha256=7iWf05GRdIOvgEHKOoq5NUFAMNhkd_kTe3zunkJqOHw,7394
|
|
111
|
+
datapipeline/services/scaffold/__init__.py,sha256=PaQNtYki9Kc7mQPnUtKDPU-rKohLHoXLvFVwdHdbXNM,68
|
|
112
|
+
datapipeline/services/scaffold/contract_yaml.py,sha256=SVznKjH1rmoy4DOJGGS6iAMrk0kHCvHHscbnGQJuPBM,2420
|
|
113
|
+
datapipeline/services/scaffold/demo.py,sha256=N219eGt0BrwvneTobGGTOxhtKIb_QJrm5JgQkfgh1lc,4615
|
|
114
|
+
datapipeline/services/scaffold/discovery.py,sha256=1fB7gKqI0gMNRdwCjeURddvs7zBSTG2c6Xh2Fa1jli4,3895
|
|
115
|
+
datapipeline/services/scaffold/domain.py,sha256=oyhO3Tx3OZKfLjtHRher5RuZ40kAIeiNl6l7AWf6Mlc,1039
|
|
116
|
+
datapipeline/services/scaffold/dto.py,sha256=qey-M5faA32eIotcAL_imctEHF9owbGK548lEcHkJto,873
|
|
117
|
+
datapipeline/services/scaffold/filter.py,sha256=b5_tMMUflL5LlN_a_NFbUmHIJp0jD8ZSbrS5eRNy64g,1142
|
|
118
|
+
datapipeline/services/scaffold/layout.py,sha256=de9tmM9dKw3jQ1qFd9Lecfqs6rIzwjIOMCSradLFiJI,2542
|
|
119
|
+
datapipeline/services/scaffold/loader.py,sha256=wlpaY86-5WeFfjkzqnuZUhI1qrCM5gBpN3sIbMCSC0E,1578
|
|
120
|
+
datapipeline/services/scaffold/mapper.py,sha256=-HmX-MvgOdCui31wLRUCu_gyo2JbAjBXdnMguyGvxLE,3663
|
|
121
|
+
datapipeline/services/scaffold/parser.py,sha256=Fi5nsjYUSwgf4RR85iv2cdbhFfg6okOFaW0ysvWgJ6c,1545
|
|
122
|
+
datapipeline/services/scaffold/plugin.py,sha256=0RC9C3m2IZsUm2hy85W1ce4MstR63x64imEZ7gv5FDo,3845
|
|
123
|
+
datapipeline/services/scaffold/source_yaml.py,sha256=mMLeVsY7SNefFP32d5eXxUGKwIwLzSMNvm0faWgUSKw,2849
|
|
124
|
+
datapipeline/services/scaffold/stream_plan.py,sha256=fKAKLtOrlitACVDDlVpKCeTxnG1VlQlLrah1LVMeWl8,4496
|
|
125
|
+
datapipeline/services/scaffold/templates.py,sha256=B3YnZpFUZLynijJosTNxZQLXnPP_Y_t1RHqfI1lGOxU,634
|
|
126
|
+
datapipeline/services/scaffold/utils.py,sha256=y6drKUjU1rgVXHfMBashrpgfcKUEw620O43hDF3WOg0,5154
|
|
127
|
+
datapipeline/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
|
+
datapipeline/sources/data_loader.py,sha256=BEGMeMUIgh1nDLDNQWxgy9iT54fQliv60s6ptZwcGDM,1322
|
|
129
|
+
datapipeline/sources/decoders.py,sha256=7oTld-q-sCsSbQqgvsfJAF21SGaHAybIU4JnLVn5cdY,5641
|
|
130
|
+
datapipeline/sources/factory.py,sha256=N2bIHH6NxKK43ybFBuQXteDnS32KMA9mYzrFfEU_jzU,2744
|
|
131
|
+
datapipeline/sources/foreach.py,sha256=DeMv8HGtZ2orvC_2EGCyqcIzWsabfAs7_lXUOzUNQu0,6612
|
|
132
|
+
datapipeline/sources/transports.py,sha256=kr4gLeR0yJLyIJ998XDGtO_PHRAY24IAbBtYu3KrPys,3445
|
|
133
|
+
datapipeline/sources/models/__init__.py,sha256=_DVhnet2HMvw-H-UEFQeEXCwro6Qg1ws0iBgMSKbBbM,399
|
|
134
|
+
datapipeline/sources/models/base.py,sha256=MAUawd11fII-mxxuSPM4f6H1t1tbyZX_QWhoAgeYUcU,238
|
|
135
|
+
datapipeline/sources/models/generator.py,sha256=OTJEcbpRp6pPZyG_8sds2x-15LF-SvAR5yblivG1E2g,508
|
|
136
|
+
datapipeline/sources/models/loader.py,sha256=VMWfEzrBvKdtRPjixPbttTochO3IULdglJ01769310E,1028
|
|
137
|
+
datapipeline/sources/models/parser.py,sha256=Ts31aksHLDCw5ovF2D99w9g_j-NnEiZ8x0JHtUxmmXs,226
|
|
138
|
+
datapipeline/sources/models/parsing_error.py,sha256=41pmauyqNK75Hke-rauRRNc-UveNXt8czxCViyZidvs,734
|
|
139
|
+
datapipeline/sources/models/source.py,sha256=lcAcbwM-HrMVO3uEWTpbvqY42g74JZWKD-KJ89Lsjys,913
|
|
140
|
+
datapipeline/sources/models/synthetic.py,sha256=FLF2Jvdc06VCriTCliThuQTUXd6NrXIQpksIL8gBIH8,288
|
|
141
|
+
datapipeline/sources/synthetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
142
|
+
datapipeline/sources/synthetic/time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
|
+
datapipeline/sources/synthetic/time/loader.py,sha256=X_NQJFAHL8wHV5TxbLhRwqGfFJPOw6qVToBkKFD3r_k,2003
|
|
144
|
+
datapipeline/sources/synthetic/time/parser.py,sha256=iyxiGgEKYS--V9LXgMxIKzWcu-4RNTikoqJcJ7h04IU,334
|
|
145
|
+
datapipeline/templates/demo_skeleton/demo/dataset.yaml,sha256=giyd8fvwUGoxAvsqjNjT43VAsp9R9poMYskGP1D5PW0,395
|
|
146
|
+
datapipeline/templates/demo_skeleton/demo/postprocess.yaml,sha256=9g6Yla2a0EstC3MECqRBspEXz3RIeTrn5lVRWJUNFpI,623
|
|
147
|
+
datapipeline/templates/demo_skeleton/demo/project.yaml,sha256=WlsyEXIcPkuJRY2RwxspoSF4wtRmBZJGParzwj_v8xw,452
|
|
148
|
+
datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml,sha256=x7aOKzfaPS2WDKUII-GN4zJvxWwD82OY-FtEtSPEl0Q,1159
|
|
149
|
+
datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml,sha256=Ml5fnl8DMtWIZsm49eIYQDzJZfeCUv74lsmTHnpmojg,622
|
|
150
|
+
datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml,sha256=Iqy9NBHKRnbEc4_n6lCMUAeUJQfj8KFm6xxDzA2pbyk,618
|
|
151
|
+
datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl,sha256=YCTGxvvrEpQmfM7tO5riV4EDR5jGHMe-FxiRnctoomQ,2664
|
|
152
|
+
datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl,sha256=ZvnenUFlcJRrNXKzDGdLVYI1no5wE_pjbVERvg5biLk,2659
|
|
153
|
+
datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml,sha256=-oAPvQqgEpLw4xTSeR1Io-j3pcFepG0r0r0Roda-RsY,470
|
|
154
|
+
datapipeline/templates/demo_skeleton/demo/sources/synthetic.ticks.yaml,sha256=EZ0IK5FmOpXOuD9Mvp-t5qr_OuJWill8Vfu5m0lBjzc,291
|
|
155
|
+
datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml,sha256=bCjlBbQNhRMjH9XuFXqdhMjQLRFqyFnKeSITHCNL9og,95
|
|
156
|
+
datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml,sha256=E4WJTjAbZ4cC4lhADGVft8YxwiiaRWzl3FeaNJ0_mAo,110
|
|
157
|
+
datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml,sha256=nqZPeWCpONnTXR2wOgcPz9EFr8V5r4pSg318XRfpZuM,91
|
|
158
|
+
datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml,sha256=nJzKTjIt7AwkWRSt6uzAkX31lhSpyJkgg--Zi1Fm6Tk,111
|
|
159
|
+
datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml,sha256=dZbT8rxY7nHXjm513Gfodd5b2RqiCtvJ-GfZd_271Pc,113
|
|
160
|
+
datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml,sha256=8KSHCzSBN0-_YZocSR8gNi0zYCNZnEUAOj4st5GOvUs,109
|
|
161
|
+
datapipeline/templates/demo_skeleton/scripts/run_dataframe.py,sha256=mre5BlGESMxuD-BMbnXr_DMDF42nhxJEgptYpUlb7SY,441
|
|
162
|
+
datapipeline/templates/demo_skeleton/scripts/run_torch.py,sha256=ZFvhBvsW53ETgsXn42GMZ2licPch4XhtMkidGDUiWq0,519
|
|
163
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py,sha256=7mlSnmG2vBGaImOZQrL-nUL7BDSYKlNVA_SWV-uCdl0,353
|
|
166
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py,sha256=wU1woaH-X-12CdRINt0cJgbrSiw8YlmqqSH8pKub91U,281
|
|
168
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py,sha256=gWEUPXHt7YGeHC2CWf3k2j_yya5g6TdOkgPNrI8A2G0,864
|
|
170
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
+
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py,sha256=eZCtm82JeQSxqHeKhMeqPMvgUFOgb1WInJcb1cDivJ0,1481
|
|
172
|
+
datapipeline/templates/plugin_skeleton/README.md,sha256=qn_G8EAZyKvXnuO1S6rQ8FbTE_CdC7EFL6jLt4J1bsA,2081
|
|
173
|
+
datapipeline/templates/plugin_skeleton/jerry.yaml,sha256=BchBoJcK2KFzB-LZ9gsGaPBBtndFWEgiqOQ-AXg-GdY,441
|
|
174
|
+
datapipeline/templates/plugin_skeleton/pyproject.toml,sha256=gMSwoLK_Xv51ecQq5ufKpXj-Oi7JNRroK7cmP0nTsvY,259
|
|
175
|
+
datapipeline/templates/plugin_skeleton/reference/jerry.yaml,sha256=l56fs4gT1iIcKIGQ2sLJAJO8bnyIMb58lIwL8Wqh6Zs,1383
|
|
176
|
+
datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml,sha256=R0ZX8H_Ns0f0-E2hHcTzgX3vza6D38MLmMXNcX5gflQ,1029
|
|
177
|
+
datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml,sha256=3jXbflYVqy8AA5nVjyg71aKTdi4QXAo8X4lDO5u8Dp8,1144
|
|
178
|
+
datapipeline/templates/plugin_skeleton/reference/reference/project.yaml,sha256=mlNG8e72zVWvWs_mt2o6WAI1mg91QyRlIj6q0CMsJl8,1402
|
|
179
|
+
datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml,sha256=W5w4qKClxz_keSPFU0cXT5R9K78MXOKI-90iwdpy8qk,1251
|
|
180
|
+
datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml,sha256=SY8gwtFGv9Y_w9bI5i-eYUuaquNouHa_7p89v2gvnpY,1385
|
|
181
|
+
datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml,sha256=txmhp7Gmwpbg0rOGtSrsyZb7ZZ6iqHpGPbypJDiWdmk,1463
|
|
182
|
+
datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml,sha256=ZnQMze5vdA7wP5OlJayuAXy3_Y70p1QTI8FRmX7OV-A,897
|
|
183
|
+
datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml,sha256=4nVMRrpSmC5EkDOomHGCdJmleWCX1mddTjDOMrDw7C0,783
|
|
184
|
+
datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml,sha256=L0rKQcsogFntouEiMfSyudLvwUehRLL6EtsciZ7BSB0,639
|
|
185
|
+
datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml,sha256=TOayPGhaHaUMBtkddXOIDTgeUywQU-AVGGmwpc-emEo,646
|
|
186
|
+
datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml,sha256=9qwKeDkRhMjYL5qgfrH1pYVFakVvkHSfNSayxXzOov0,516
|
|
187
|
+
datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml,sha256=vqyYh8U1wl6CXKOiW7Xuq42-VVX4-Njdn_56HYRfSiQ,443
|
|
188
|
+
datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml,sha256=gM8xKv5yI9rko4yOE60CgWDVcrBVQG4SwqLDnIARLLI,498
|
|
189
|
+
datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml,sha256=9WVV1sBeM1tCVtE8sCITMw3j0pTW_HNBArvhqAA-aBw,423
|
|
190
|
+
datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml,sha256=0T9a4TpdTQj9Aaa0SFqCPkZRUSSBmqWnfFyKUru1ojs,413
|
|
191
|
+
datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml,sha256=7ShVkEoZwqLO2m-45uLjLvsbbFbeRogCcU_G6UvU_K4,1271
|
|
192
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
193
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py,sha256=x-xAELjmZPsC6uWsA3am7Ku8C1XrJo9fCAudwCFSuYk,53
|
|
194
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
195
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
196
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py,sha256=wRlfUlkslnXK5R63J78rlykPSGlrzL2D1P_gm1cPcm0,46
|
|
197
|
+
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
198
|
+
datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml,sha256=js877ylqGFJBS7adzGVpFsXFVzofhEWLnuyKgF_YnwY,408
|
|
199
|
+
datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml,sha256=MFbgkHZ-MYKk_hYoEARE5ot9dVVb1w4D-jIvTYW8fxs,337
|
|
200
|
+
datapipeline/templates/plugin_skeleton/your-dataset/project.yaml,sha256=zt1QIkgNij546vocQnOzmOaCwPapLuIfv_mFnaCY4-U,497
|
|
201
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml,sha256=bCjlBbQNhRMjH9XuFXqdhMjQLRFqyFnKeSITHCNL9og,95
|
|
202
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml,sha256=E4WJTjAbZ4cC4lhADGVft8YxwiiaRWzl3FeaNJ0_mAo,110
|
|
203
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml,sha256=nqZPeWCpONnTXR2wOgcPz9EFr8V5r4pSg318XRfpZuM,91
|
|
204
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml,sha256=nJzKTjIt7AwkWRSt6uzAkX31lhSpyJkgg--Zi1Fm6Tk,111
|
|
205
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml,sha256=dZbT8rxY7nHXjm513Gfodd5b2RqiCtvJ-GfZd_271Pc,113
|
|
206
|
+
datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml,sha256=8KSHCzSBN0-_YZocSR8gNi0zYCNZnEUAOj4st5GOvUs,109
|
|
207
|
+
datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml,sha256=3VQn-1aox7dh7lE648m2z8isoqVl9n-Vq35xfNRLwtM,239
|
|
208
|
+
datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml,sha256=N1F-Xz3GaBn2H1p7uKzhkhKCQV8QVR0t76XD6wmFtXA,3
|
|
209
|
+
datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml,sha256=nGzWBDlLhSGX_Nn7KKc55DM3NxNmWCPwQ1aRV0Pe2Dw,410
|
|
210
|
+
datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml,sha256=TdqwfDpGn-byv23d0pQV_GD38hTIzUe0G2Z1ffwSR0Y,187
|
|
211
|
+
datapipeline/templates/stubs/dto.py.j2,sha256=iayNfcsfkdL5NE58erk8LL25Kf6FMlHzbdfYbgoehCA,841
|
|
212
|
+
datapipeline/templates/stubs/filter.py.j2,sha256=OE_HT4uxC8Fl928tuWwDZ-_QjLXNXWhVUq42DWkqaBc,504
|
|
213
|
+
datapipeline/templates/stubs/loader_synthetic.py.j2,sha256=9SQBeTBGlZmKs6nSYBKd8nbOPcFHgDx17Mh8xOEQnvs,1285
|
|
214
|
+
datapipeline/templates/stubs/parser.py.j2,sha256=LiTjv_CzdnE_M7_6Q7adVFIpX8-Vyg-ogM9vrPxHQNs,804
|
|
215
|
+
datapipeline/templates/stubs/parser_custom.py.j2,sha256=0Nytq43JdTZoyRj-4Mz6HWdMTmOP3VlFuYOB_A_13Vg,580
|
|
216
|
+
datapipeline/templates/stubs/record.py.j2,sha256=xB0KicFZTjU-pzKiuwV4O1QBSNPvHnehwaKPnYdwAZo,637
|
|
217
|
+
datapipeline/templates/stubs/source.yaml.j2,sha256=iMWN915cJfz-KzvEqHQgFdkWxf7kdTsPzClb2JvXE8Q,420
|
|
218
|
+
datapipeline/templates/stubs/contracts/composed.yaml.j2,sha256=n-t4w-eziZP6vVBVI_TwImSJWzdVJxQch9-OdAvPpuI,285
|
|
219
|
+
datapipeline/templates/stubs/contracts/ingest.yaml.j2,sha256=P6GoItw_lSVnadPE8Tvp6SwAgjCbH7wkD87Jhnl1OcY,1045
|
|
220
|
+
datapipeline/templates/stubs/loaders/basic.py.j2,sha256=fj9gm9iSzCxwDB6FLlN68G5AR-vm2dRJqLYCLFXW6Wo,303
|
|
221
|
+
datapipeline/templates/stubs/mappers/composed.py.j2,sha256=fWm2xNUZ_KFGJQc0izF2zJ9JZ8o4L1TDJtMI05KqKjc,522
|
|
222
|
+
datapipeline/templates/stubs/mappers/ingest.py.j2,sha256=6DR35on0VkyaGm3yWpQXUK7MaaPuYnnRyPSbbFjiiB4,655
|
|
223
|
+
datapipeline/transforms/filter.py,sha256=6BEwEy3_vlQ0mU00MkYkpgDVZrgUXSDcQ--BZANlNUw,2867
|
|
224
|
+
datapipeline/transforms/interfaces.py,sha256=rbq5vosNOCMqXOYyPrOX2PZVV-kmnwcaRMJUKANG9mk,1617
|
|
225
|
+
datapipeline/transforms/sequence.py,sha256=mhj5qkD2nUtZ2Kkfrm9ogYh8Mlmv3fDKf-aphB4-gOo,1639
|
|
226
|
+
datapipeline/transforms/utils.py,sha256=kJohiXdXkjvYbL3DllIL-cTM1WT6t8P2pZfnHtupJsc,1845
|
|
227
|
+
datapipeline/transforms/vector_utils.py,sha256=PcStTwRaaunONKZJuwv79bjdfaDcamLcwNLRHjZ5yXw,927
|
|
228
|
+
datapipeline/transforms/debug/identity.py,sha256=vF6e7IzHPjiwbao4AQ_TBsFhN_NmIBXa8ePWFD12rCo,3096
|
|
229
|
+
datapipeline/transforms/debug/lint.py,sha256=Pu9T6pIMOQVGvgMoOn6pTcaSvlMaaFc5Atpp_pH1iQk,3320
|
|
230
|
+
datapipeline/transforms/feature/model.py,sha256=gB-GP80_P7bzEKJFSM4leRke75yiD4-S5eJ1p8g3JU8,382
|
|
231
|
+
datapipeline/transforms/feature/scaler.py,sha256=8UR6-4m4IWSM3KdrzfW57SoRjRKGqt5tmGVqThCri6A,8060
|
|
232
|
+
datapipeline/transforms/record/floor_time.py,sha256=ifIpRXPJ7CcjdyJ5YAYqmC6fgm37Ix_O5S7y8t0g2HI,736
|
|
233
|
+
datapipeline/transforms/record/lag.py,sha256=2hwFocq9yslJynuIx0AlhOuxzhXEe093UzNj4PUIJ_s,550
|
|
234
|
+
datapipeline/transforms/stream/dedupe.py,sha256=xOfXbEFMygvIjVr4fP7gExKgYzvuyu4JB7myC7AkYW4,743
|
|
235
|
+
datapipeline/transforms/stream/ensure_ticks.py,sha256=xU-A84JDzQ_qohY01BrJEtioxH0SYTqOVzEsQ_Nt3sM,1791
|
|
236
|
+
datapipeline/transforms/stream/fill.py,sha256=ImnYeRJodpTFHPTpvVy6M0X1ALiIv9IVChjBgeGTIYo,3588
|
|
237
|
+
datapipeline/transforms/stream/filter.py,sha256=gMcSc5CucxN_5aktvnLM-0HKRPX86zIMu8zRWS8xwnQ,866
|
|
238
|
+
datapipeline/transforms/stream/floor_time.py,sha256=wGC8EkBnfavcwTx0MCoX17K8Z9rwupqo5AxtXZmUPaM,572
|
|
239
|
+
datapipeline/transforms/stream/granularity.py,sha256=NoBJVoaG1A0HAyPeVgRnGfRWSBKDIXAi1xFeJD0oo-M,4229
|
|
240
|
+
datapipeline/transforms/stream/lag.py,sha256=XyYUD0L6NmyZgRfbiTb-uiOAf43IRd2j8TwAoT0D6eM,576
|
|
241
|
+
datapipeline/transforms/stream/rolling.py,sha256=mBGJf28R61pwZaUmhdIshjldVCQ3JXNQGydbIkkXHY4,2510
|
|
242
|
+
datapipeline/transforms/vector/__init__.py,sha256=nKBaksXv_rBF2BUT-IFJTbbRBiwkpq6k39cLFS8CfXw,245
|
|
243
|
+
datapipeline/transforms/vector/common.py,sha256=24iX5EviN9BKi9-GJIty57LGOvEVD9PueZflyoGS5Nc,3616
|
|
244
|
+
datapipeline/transforms/vector/ensure_schema.py,sha256=_r0p0g2VK4VUmmnfkuzj0F1zV6T7xLT1hlTR-CxlNy4,7204
|
|
245
|
+
datapipeline/transforms/vector/fill.py,sha256=1bWXbVABnyndv4O01cZN0oVS2-X_HVAEGUmwywazs_w,3065
|
|
246
|
+
datapipeline/transforms/vector/replace.py,sha256=gYTZx0CTkdSsuXUiAvP7dtcyEHEQv2UJeIEiowGDzhw,2019
|
|
247
|
+
datapipeline/transforms/vector/drop/__init__.py,sha256=NsGv9v7n13uPsRWGTSLKWPVaXocQ-zx9WE3Ez5hQt3U,151
|
|
248
|
+
datapipeline/transforms/vector/drop/horizontal.py,sha256=e9Iyf6cDdKhp3WB38hBLd3av8P3_CC127ngCmkGWGWc,2530
|
|
249
|
+
datapipeline/transforms/vector/drop/orchestrator.py,sha256=WgckA2bUn3BFT-eHy44gSYJaF5H8UXDjzfmQ8v517X0,1949
|
|
250
|
+
datapipeline/transforms/vector/drop/vertical.py,sha256=8j1qGwMqeTMa5i41zzE4y4PacZ0IZW4y3d9k1X1SloY,6822
|
|
251
|
+
datapipeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
252
|
+
datapipeline/utils/load.py,sha256=FfW_UOD5NhxDg4DarRSzVbjGbv7An_bKK-IdUUOrFhs,1970
|
|
253
|
+
datapipeline/utils/paths.py,sha256=iP68o2Qewj9fRNdlVZkNZez3mkEdDxWQf5Tmdl01R6Y,762
|
|
254
|
+
datapipeline/utils/pickle_model.py,sha256=Uyd4AajInyTUpWfSJDDEGLinXeQkHjQUNnyla0owtA4,854
|
|
255
|
+
datapipeline/utils/placeholders.py,sha256=SAR8G37DFesznu4T87lD9Bvut5aCmriZBQNdfuiUK-o,854
|
|
256
|
+
datapipeline/utils/rich_compat.py,sha256=GwQGoHEXiX8wq-crYMXdDqbkKreHQnFK-6E8e8FYaCU,1129
|
|
257
|
+
datapipeline/utils/time.py,sha256=vOqa2arqwEqbDo-JWEhOFPMnI1E4Ib3i1L-Rt-cGH8c,1072
|
|
258
|
+
datapipeline/utils/window.py,sha256=g8hR_7IiLKVywlpPK2xhHx9QTOlNMiQuJrTL96Zs8gg,2540
|
|
259
|
+
jerry_thomas-2.0.1.dist-info/licenses/LICENSE,sha256=pkBMylAJF5yChHAkdxwFhEptLGx13i-XFEKh-Sh6DkM,1073
|
|
260
|
+
jerry_thomas-2.0.1.dist-info/METADATA,sha256=n9xxlQzghlj5OhtSTDhRNnBoCkFhOaePFm28DnvPZTU,13554
|
|
261
|
+
jerry_thomas-2.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
262
|
+
jerry_thomas-2.0.1.dist-info/entry_points.txt,sha256=BCvH4YYRe4ts9VSYjgm4x1-2CpKCkLLVKOD5ZU0AQzU,2020
|
|
263
|
+
jerry_thomas-2.0.1.dist-info/top_level.txt,sha256=N8aoNPdPyHefODO4YAm7tqTaUcw0e8LDcqycFTf8TbM,13
|
|
264
|
+
jerry_thomas-2.0.1.dist-info/RECORD,,
|
|
@@ -25,15 +25,19 @@ sequence = datapipeline.transforms.sequence:WindowTransformer
|
|
|
25
25
|
|
|
26
26
|
[datapipeline.transforms.record]
|
|
27
27
|
filter = datapipeline.transforms.filter:filter
|
|
28
|
-
floor_time = datapipeline.transforms.record.floor_time:
|
|
29
|
-
lag = datapipeline.transforms.record.lag:
|
|
28
|
+
floor_time = datapipeline.transforms.record.floor_time:FloorTimeRecordTransform
|
|
29
|
+
lag = datapipeline.transforms.record.lag:LagRecordTransform
|
|
30
30
|
|
|
31
31
|
[datapipeline.transforms.stream]
|
|
32
32
|
dedupe = datapipeline.transforms.stream.dedupe:FeatureDeduplicateTransform
|
|
33
|
-
ensure_cadence = datapipeline.transforms.stream.ensure_ticks:
|
|
33
|
+
ensure_cadence = datapipeline.transforms.stream.ensure_ticks:EnsureCadenceTransform
|
|
34
34
|
fill = datapipeline.transforms.stream.fill:FillTransformer
|
|
35
|
+
filter = datapipeline.transforms.stream.filter:FilterTransform
|
|
36
|
+
floor_time = datapipeline.transforms.stream.floor_time:FloorTimeTransform
|
|
35
37
|
granularity = datapipeline.transforms.stream.granularity:FeatureGranularityTransform
|
|
38
|
+
lag = datapipeline.transforms.stream.lag:LagTransform
|
|
36
39
|
lint = datapipeline.transforms.stream.lint:StreamLint
|
|
40
|
+
rolling = datapipeline.transforms.stream.rolling:RollingTransformer
|
|
37
41
|
|
|
38
42
|
[datapipeline.transforms.vector]
|
|
39
43
|
drop = datapipeline.transforms.vector:VectorDropTransform
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from typing import Optional
|
|
3
|
-
import re
|
|
4
|
-
from ..constants import MAPPERS_GROUP
|
|
5
|
-
from ..paths import pkg_root, resolve_base_pkg_dir
|
|
6
|
-
from ..entrypoints import inject_ep
|
|
7
|
-
from .templates import render, camel
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def _slug(s: str) -> str:
|
|
11
|
-
s = s.strip().lower()
|
|
12
|
-
s = re.sub(r"[^a-z0-9]+", "_", s)
|
|
13
|
-
return s.strip("_")
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def attach_source_to_domain(*, domain: str, provider: str, dataset: str, root: Optional[Path]) -> None:
|
|
17
|
-
root_dir, name, pyproject = pkg_root(root)
|
|
18
|
-
base = resolve_base_pkg_dir(root_dir, name)
|
|
19
|
-
package_name = base.name
|
|
20
|
-
mappers_root = base / MAPPERS_GROUP
|
|
21
|
-
_ = _slug(provider)
|
|
22
|
-
ds = _slug(dataset)
|
|
23
|
-
dom = _slug(domain)
|
|
24
|
-
|
|
25
|
-
# Option B layout: mappers/{provider}/{dataset}/to_{domain}.py
|
|
26
|
-
pkg_dir = mappers_root / provider / dataset
|
|
27
|
-
pkg_dir.mkdir(parents=True, exist_ok=True)
|
|
28
|
-
(mappers_root / "__init__.py").touch(exist_ok=True)
|
|
29
|
-
(mappers_root / provider / "__init__.py").touch(exist_ok=True)
|
|
30
|
-
(mappers_root / provider / dataset / "__init__.py").touch(exist_ok=True)
|
|
31
|
-
|
|
32
|
-
module_name = f"to_{dom}"
|
|
33
|
-
path = pkg_dir / f"{module_name}.py"
|
|
34
|
-
if not path.exists():
|
|
35
|
-
function_name = "map"
|
|
36
|
-
path.write_text(render(
|
|
37
|
-
"mapper.py.j2",
|
|
38
|
-
PACKAGE_NAME=package_name,
|
|
39
|
-
ORIGIN=provider,
|
|
40
|
-
DATASET=dataset,
|
|
41
|
-
TARGET_DOMAIN=dom,
|
|
42
|
-
FUNCTION_NAME=function_name,
|
|
43
|
-
DomainConfig=f"{camel(domain)}Config",
|
|
44
|
-
DomainRecord=f"{camel(domain)}Record",
|
|
45
|
-
OriginDTO=f"{camel(provider)}{camel(dataset)}DTO",
|
|
46
|
-
time_aware=True,
|
|
47
|
-
))
|
|
48
|
-
print(f"[new] {path}")
|
|
49
|
-
|
|
50
|
-
# Register the mapper EP as domain.dataset
|
|
51
|
-
ep_key = f"{dom}.{ds}"
|
|
52
|
-
ep_target = f"{package_name}.mappers.{provider}.{dataset}.{module_name}:map"
|
|
53
|
-
toml = (root_dir / "pyproject.toml").read_text()
|
|
54
|
-
toml = inject_ep(toml, MAPPERS_GROUP, ep_key, ep_target)
|
|
55
|
-
(root_dir / "pyproject.toml").write_text(toml)
|
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
from ..constants import LOADERS_GROUP, PARSERS_GROUP
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
from datapipeline.services.scaffold.templates import camel, render
|
|
6
|
-
|
|
7
|
-
from ..constants import DEFAULT_IO_LOADER_EP
|
|
8
|
-
from ..entrypoints import inject_ep
|
|
9
|
-
from ..paths import pkg_root, resolve_base_pkg_dir
|
|
10
|
-
from datapipeline.services.project_paths import (
|
|
11
|
-
sources_dir as resolve_sources_dir,
|
|
12
|
-
ensure_project_scaffold,
|
|
13
|
-
resolve_project_yaml_path,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def _class_prefix(provider: str, dataset: str) -> str:
|
|
18
|
-
"""Single place to define class-prefix naming."""
|
|
19
|
-
return f"{camel(provider)}{camel(dataset)}"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def _source_alias(provider: str, dataset: str) -> str:
|
|
23
|
-
return f"{provider}.{dataset}"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _write_if_missing(path: Path, text: str) -> None:
|
|
27
|
-
"""Write file only if it does not exist; echo a friendly message."""
|
|
28
|
-
if not path.exists():
|
|
29
|
-
path.write_text(text)
|
|
30
|
-
print(f"[new] {path}")
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def _render_loader_stub(transport: str, loader_class: str,
|
|
34
|
-
*, fmt: Optional[str]) -> str | None:
|
|
35
|
-
"""Render loader stub from Jinja templates for supported transports."""
|
|
36
|
-
if transport == "synthetic":
|
|
37
|
-
return render("loader_synthetic.py.j2", CLASS_NAME=loader_class)
|
|
38
|
-
return None
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def _update_ep(toml_text: str, provider: str, dataset: str, pkg_name: str,
|
|
42
|
-
transport: str, parser_class: str, loader_class: str) -> tuple[str, str]:
|
|
43
|
-
"""
|
|
44
|
-
Inject parser EP (always). Returns (updated_toml, ep_key).
|
|
45
|
-
"""
|
|
46
|
-
ep_key = f"{provider}.{dataset}"
|
|
47
|
-
toml_text = inject_ep(
|
|
48
|
-
toml_text, PARSERS_GROUP, ep_key,
|
|
49
|
-
f"{pkg_name}.sources.{provider}.{dataset}.parser:{parser_class}"
|
|
50
|
-
)
|
|
51
|
-
if transport in {"synthetic"}:
|
|
52
|
-
toml_text = inject_ep(
|
|
53
|
-
toml_text, LOADERS_GROUP, ep_key,
|
|
54
|
-
f"{pkg_name}.sources.{provider}.{dataset}.loader:{loader_class}"
|
|
55
|
-
)
|
|
56
|
-
return toml_text, ep_key
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _loader_ep_and_args(transport: str, fmt: Optional[str], ep_key: Optional[str]) -> tuple[str, dict]:
|
|
60
|
-
"""Return (loader EP name, default args) for the YAML snippet."""
|
|
61
|
-
if transport == "fs":
|
|
62
|
-
args = {
|
|
63
|
-
"transport": "fs",
|
|
64
|
-
"format": fmt or "<FORMAT (csv|json|json-lines|pickle)>",
|
|
65
|
-
"path": "<PATH OR GLOB>",
|
|
66
|
-
"glob": False,
|
|
67
|
-
"encoding": "utf-8",
|
|
68
|
-
}
|
|
69
|
-
if fmt == "csv":
|
|
70
|
-
args["delimiter"] = ","
|
|
71
|
-
return DEFAULT_IO_LOADER_EP, args
|
|
72
|
-
if transport == "synthetic":
|
|
73
|
-
if ep_key is None:
|
|
74
|
-
raise ValueError("synthetic transport requires scaffolding a loader entrypoint")
|
|
75
|
-
return ep_key, {"start": "<ISO8601>", "end": "<ISO8601>", "frequency": "1h"}
|
|
76
|
-
if transport == "http":
|
|
77
|
-
args = {
|
|
78
|
-
"transport": "http",
|
|
79
|
-
"format": fmt or "<FORMAT (json|json-lines|csv)>",
|
|
80
|
-
"url": "<https://api.example.com/data.json>",
|
|
81
|
-
"headers": {},
|
|
82
|
-
"params": {},
|
|
83
|
-
"encoding": "utf-8",
|
|
84
|
-
}
|
|
85
|
-
if fmt == "csv":
|
|
86
|
-
args["delimiter"] = ","
|
|
87
|
-
return DEFAULT_IO_LOADER_EP, args
|
|
88
|
-
if ep_key is None:
|
|
89
|
-
raise ValueError(f"unsupported transport '{transport}' for identity scaffold")
|
|
90
|
-
return ep_key, {}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def create_source(
|
|
94
|
-
*,
|
|
95
|
-
provider: str,
|
|
96
|
-
dataset: str,
|
|
97
|
-
transport: str,
|
|
98
|
-
format: Optional[str],
|
|
99
|
-
root: Optional[Path],
|
|
100
|
-
identity: bool = False,
|
|
101
|
-
project_yaml: Optional[Path] = None,
|
|
102
|
-
) -> None:
|
|
103
|
-
root_dir, name, _ = pkg_root(root)
|
|
104
|
-
base = resolve_base_pkg_dir(root_dir, name)
|
|
105
|
-
package_name = base.name
|
|
106
|
-
|
|
107
|
-
alias = _source_alias(provider, dataset)
|
|
108
|
-
parser_ep: str
|
|
109
|
-
parser_args: dict
|
|
110
|
-
ep_key: Optional[str] = None
|
|
111
|
-
|
|
112
|
-
if identity:
|
|
113
|
-
if transport == "synthetic":
|
|
114
|
-
raise ValueError(
|
|
115
|
-
"identity parser scaffold is not supported for synthetic sources; "
|
|
116
|
-
"generate the standard parser instead."
|
|
117
|
-
)
|
|
118
|
-
parser_ep = "identity"
|
|
119
|
-
parser_args = {}
|
|
120
|
-
else:
|
|
121
|
-
src_pkg_dir = base / "sources" / provider / dataset
|
|
122
|
-
src_pkg_dir.mkdir(parents=True, exist_ok=True)
|
|
123
|
-
(src_pkg_dir / "__init__.py").touch(exist_ok=True)
|
|
124
|
-
|
|
125
|
-
class_prefix = _class_prefix(provider, dataset)
|
|
126
|
-
dto_class = f"{class_prefix}DTO"
|
|
127
|
-
parser_class = f"{class_prefix}Parser"
|
|
128
|
-
loader_class = f"{class_prefix}DataLoader"
|
|
129
|
-
|
|
130
|
-
# DTO
|
|
131
|
-
dto_path = src_pkg_dir / "dto.py"
|
|
132
|
-
_write_if_missing(dto_path, render(
|
|
133
|
-
"dto.py.j2",
|
|
134
|
-
PACKAGE_NAME=package_name, ORIGIN=provider, DOMAIN=dataset,
|
|
135
|
-
CLASS_NAME=dto_class, time_aware=True
|
|
136
|
-
))
|
|
137
|
-
|
|
138
|
-
# Parser
|
|
139
|
-
parser_path = src_pkg_dir / "parser.py"
|
|
140
|
-
_write_if_missing(parser_path, render(
|
|
141
|
-
"parser.py.j2",
|
|
142
|
-
PACKAGE_NAME=package_name, ORIGIN=provider, DOMAIN=dataset,
|
|
143
|
-
CLASS_NAME=parser_class, DTO_CLASS=dto_class, time_aware=True
|
|
144
|
-
))
|
|
145
|
-
|
|
146
|
-
# Optional loader stub: synthetic (http uses core IO loader by default)
|
|
147
|
-
if transport in {"synthetic"}:
|
|
148
|
-
loader_path = src_pkg_dir / "loader.py"
|
|
149
|
-
stub = _render_loader_stub(transport, loader_class, fmt=format)
|
|
150
|
-
if stub is not None:
|
|
151
|
-
_write_if_missing(loader_path, stub)
|
|
152
|
-
|
|
153
|
-
toml_path = root_dir / "pyproject.toml"
|
|
154
|
-
toml_text, ep_key = _update_ep(
|
|
155
|
-
toml_path.read_text(),
|
|
156
|
-
provider,
|
|
157
|
-
dataset,
|
|
158
|
-
package_name,
|
|
159
|
-
transport,
|
|
160
|
-
parser_class,
|
|
161
|
-
loader_class,
|
|
162
|
-
)
|
|
163
|
-
toml_path.write_text(toml_text)
|
|
164
|
-
|
|
165
|
-
parser_ep = ep_key
|
|
166
|
-
parser_args = {}
|
|
167
|
-
|
|
168
|
-
loader_ep, loader_args = _loader_ep_and_args(transport, format, ep_key)
|
|
169
|
-
|
|
170
|
-
# Resolve sources directory from a single dataset-scoped project config.
|
|
171
|
-
# If not present or invalid, let the exception bubble up to prompt the user
|
|
172
|
-
# to provide a valid project path.
|
|
173
|
-
proj_yaml = project_yaml.resolve() if project_yaml is not None else resolve_project_yaml_path(root_dir)
|
|
174
|
-
# Best-effort: create a minimal project scaffold if missing
|
|
175
|
-
ensure_project_scaffold(proj_yaml)
|
|
176
|
-
sources_dir = resolve_sources_dir(proj_yaml).resolve()
|
|
177
|
-
sources_dir.mkdir(parents=True, exist_ok=True)
|
|
178
|
-
src_cfg_path = sources_dir / f"{alias}.yaml"
|
|
179
|
-
if not src_cfg_path.exists():
|
|
180
|
-
src_cfg_path.write_text(render(
|
|
181
|
-
"source.yaml.j2",
|
|
182
|
-
id=alias,
|
|
183
|
-
parser_ep=parser_ep,
|
|
184
|
-
parser_args=parser_args,
|
|
185
|
-
loader_ep=loader_ep,
|
|
186
|
-
loader_args=loader_args,
|
|
187
|
-
default_io_loader_ep=DEFAULT_IO_LOADER_EP,
|
|
188
|
-
))
|
|
189
|
-
print(f"[new] {src_cfg_path.resolve()}")
|
|
190
|
-
elif identity:
|
|
191
|
-
print(f"[info] Source YAML already exists; skipped identity scaffold at {src_cfg_path.resolve()}")
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
kind: ingest
|
|
2
|
-
source: synthetic.ticks
|
|
3
|
-
id: time.ticks.hour_sin # format: domain.dataset.(variant)
|
|
4
|
-
|
|
5
|
-
# Fine-grained cadence for this stream. Defaults to the dataset group_by via project.globals.
|
|
6
|
-
cadence: ${group_by}
|
|
7
|
-
|
|
8
|
-
mapper:
|
|
9
|
-
entrypoint: encode_time
|
|
10
|
-
args: { mode: hour_sin }
|
|
11
|
-
|
|
12
|
-
# partition_by: field you want to partition
|
|
13
|
-
|
|
14
|
-
record:
|
|
15
|
-
- filter: { operator: ge, field: time, comparand: "${start_time}" }
|
|
16
|
-
- filter: { operator: le, field: time, comparand: "${end_time}" }
|
|
17
|
-
- floor_time: { cadence: "${cadence}" }
|
|
18
|
-
# - lag: { lag: "${cadence}" }
|
|
19
|
-
|
|
20
|
-
stream:
|
|
21
|
-
- dedupe: {}
|
|
22
|
-
- granularity: { mode: first }
|
|
23
|
-
- ensure_cadence: { cadence: "${cadence}" }
|
|
24
|
-
# Optional: fill gaps before downstream transforms:
|
|
25
|
-
# - fill: { statistic: median, window: 24, min_samples: 4 }
|
|
26
|
-
|
|
27
|
-
debug:
|
|
28
|
-
- lint: { mode: error, tick: "${cadence}" }
|
|
29
|
-
|
|
30
|
-
# sort_batch_size: 100000
|
|
31
|
-
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
kind: ingest
|
|
2
|
-
source: synthetic.ticks # raw source alias (see example/sources)
|
|
3
|
-
id: time.ticks.linear # canonical stream id (format: domain.dataset.(variant))
|
|
4
|
-
|
|
5
|
-
# Fine-grained cadence for this stream. Defaults to the dataset group_by via project.globals.
|
|
6
|
-
cadence: ${group_by}
|
|
7
|
-
|
|
8
|
-
mapper: # normalize/reshape DTO -> TemporalRecord
|
|
9
|
-
entrypoint: encode_time
|
|
10
|
-
args: { mode: linear }
|
|
11
|
-
# partition_by: station_id # optional: add partition suffixes to feature ids
|
|
12
|
-
|
|
13
|
-
record: # record-level transforms
|
|
14
|
-
- filter: { operator: ge, field: time, comparand: "${start_time}" }
|
|
15
|
-
- filter: { operator: le, field: time, comparand: "${end_time}" }
|
|
16
|
-
- floor_time: { cadence: "${cadence}" } # snap timestamps to cadence boundaries
|
|
17
|
-
# - lag: { lag: "${cadence}" } # optional: shift timestamps backwards
|
|
18
|
-
|
|
19
|
-
stream: # per-feature stream transforms (input sorted by id,time)
|
|
20
|
-
- dedupe: {} # drop exact-duplicate records per tick
|
|
21
|
-
- granularity: { mode: first } # aggregate duplicates within a tick
|
|
22
|
-
- ensure_cadence: { cadence: "${cadence}" } # insert missing ticks (value=None)
|
|
23
|
-
# Consider adding a fill transform to impute None values before sequence/windowing:
|
|
24
|
-
# - fill: { statistic: median, window: 6, min_samples: 1 }
|
|
25
|
-
|
|
26
|
-
debug: # optional validation-only transforms
|
|
27
|
-
- lint: { mode: error, tick: "${cadence}" } # strict cadence/order; value issues handled by downstream transforms
|
|
28
|
-
|
|
29
|
-
# sort_batch_size: 100000 # in-memory chunk size used by internal sorting
|
|
30
|
-
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
group_by: ${group_by}
|
|
2
|
-
|
|
3
|
-
features:
|
|
4
|
-
- id: time_linear
|
|
5
|
-
record_stream: time.ticks.linear
|
|
6
|
-
scale: true # optionally add with_mean/with_std overrides
|
|
7
|
-
# Sliding window over the regularized stream; cadence is enforced in the contract.
|
|
8
|
-
sequence: { size: 6, stride: 1 }
|
|
9
|
-
|
|
10
|
-
- id: time_hour_sin
|
|
11
|
-
record_stream: time.ticks.hour_sin
|
|
12
|
-
|
|
13
|
-
# - id: third_feature
|
|
14
|
-
# record_stream: anotherstream
|
|
15
|
-
# targets:
|
|
16
|
-
# - id: some_target
|
|
17
|
-
# record_stream: time.ticks.linear
|
|
18
|
-
|