jerry-thomas 1.0.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipeline/analysis/vector/collector.py +0 -1
- datapipeline/build/tasks/config.py +0 -2
- datapipeline/build/tasks/metadata.py +0 -2
- datapipeline/build/tasks/scaler.py +0 -2
- datapipeline/build/tasks/schema.py +0 -2
- datapipeline/build/tasks/utils.py +0 -2
- datapipeline/cli/app.py +201 -81
- datapipeline/cli/commands/contract.py +145 -283
- datapipeline/cli/commands/demo.py +13 -0
- datapipeline/cli/commands/domain.py +4 -4
- datapipeline/cli/commands/dto.py +11 -0
- datapipeline/cli/commands/filter.py +2 -2
- datapipeline/cli/commands/inspect.py +0 -68
- datapipeline/cli/commands/list_.py +30 -13
- datapipeline/cli/commands/loader.py +11 -0
- datapipeline/cli/commands/mapper.py +82 -0
- datapipeline/cli/commands/parser.py +45 -0
- datapipeline/cli/commands/run_config.py +1 -3
- datapipeline/cli/commands/serve_pipeline.py +5 -7
- datapipeline/cli/commands/source.py +106 -18
- datapipeline/cli/commands/stream.py +292 -0
- datapipeline/cli/visuals/common.py +0 -2
- datapipeline/cli/visuals/sections.py +0 -2
- datapipeline/cli/workspace_utils.py +0 -3
- datapipeline/config/context.py +0 -2
- datapipeline/config/dataset/feature.py +1 -0
- datapipeline/config/metadata.py +0 -2
- datapipeline/config/project.py +0 -2
- datapipeline/config/resolution.py +10 -2
- datapipeline/config/tasks.py +9 -9
- datapipeline/domain/feature.py +3 -0
- datapipeline/domain/record.py +7 -7
- datapipeline/domain/sample.py +0 -2
- datapipeline/domain/vector.py +6 -8
- datapipeline/integrations/ml/adapter.py +0 -2
- datapipeline/integrations/ml/pandas_support.py +0 -2
- datapipeline/integrations/ml/rows.py +0 -2
- datapipeline/integrations/ml/torch_support.py +0 -2
- datapipeline/io/output.py +0 -2
- datapipeline/io/serializers.py +26 -16
- datapipeline/mappers/synthetic/time.py +9 -2
- datapipeline/pipeline/artifacts.py +3 -5
- datapipeline/pipeline/observability.py +0 -2
- datapipeline/pipeline/pipelines.py +118 -34
- datapipeline/pipeline/stages.py +54 -18
- datapipeline/pipeline/utils/spool_cache.py +142 -0
- datapipeline/pipeline/utils/transform_utils.py +27 -2
- datapipeline/services/artifacts.py +1 -4
- datapipeline/services/constants.py +1 -0
- datapipeline/services/factories.py +4 -6
- datapipeline/services/paths.py +10 -1
- datapipeline/services/project_paths.py +0 -2
- datapipeline/services/runs.py +0 -2
- datapipeline/services/scaffold/contract_yaml.py +76 -0
- datapipeline/services/scaffold/demo.py +141 -0
- datapipeline/services/scaffold/discovery.py +115 -0
- datapipeline/services/scaffold/domain.py +21 -13
- datapipeline/services/scaffold/dto.py +31 -0
- datapipeline/services/scaffold/filter.py +2 -1
- datapipeline/services/scaffold/layout.py +96 -0
- datapipeline/services/scaffold/loader.py +61 -0
- datapipeline/services/scaffold/mapper.py +116 -0
- datapipeline/services/scaffold/parser.py +56 -0
- datapipeline/services/scaffold/plugin.py +14 -2
- datapipeline/services/scaffold/source_yaml.py +91 -0
- datapipeline/services/scaffold/stream_plan.py +129 -0
- datapipeline/services/scaffold/utils.py +187 -0
- datapipeline/sources/data_loader.py +0 -2
- datapipeline/sources/decoders.py +49 -8
- datapipeline/sources/factory.py +9 -6
- datapipeline/sources/foreach.py +18 -3
- datapipeline/sources/synthetic/time/parser.py +1 -1
- datapipeline/sources/transports.py +10 -4
- datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
- datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
- datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
- datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
- datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
- datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
- datapipeline/templates/plugin_skeleton/README.md +57 -136
- datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
- datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
- datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
- datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
- datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
- datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +9 -11
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +15 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
- datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
- datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
- datapipeline/templates/stubs/dto.py.j2 +2 -2
- datapipeline/templates/stubs/filter.py.j2 +1 -1
- datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
- datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
- datapipeline/templates/stubs/mappers/ingest.py.j2 +20 -0
- datapipeline/templates/stubs/parser.py.j2 +5 -1
- datapipeline/templates/stubs/record.py.j2 +1 -1
- datapipeline/templates/stubs/source.yaml.j2 +1 -1
- datapipeline/transforms/debug/identity.py +34 -16
- datapipeline/transforms/debug/lint.py +14 -11
- datapipeline/transforms/feature/scaler.py +5 -12
- datapipeline/transforms/filter.py +73 -17
- datapipeline/transforms/interfaces.py +58 -0
- datapipeline/transforms/record/floor_time.py +10 -7
- datapipeline/transforms/record/lag.py +8 -10
- datapipeline/transforms/sequence.py +2 -3
- datapipeline/transforms/stream/dedupe.py +5 -7
- datapipeline/transforms/stream/ensure_ticks.py +39 -24
- datapipeline/transforms/stream/fill.py +34 -25
- datapipeline/transforms/stream/filter.py +25 -0
- datapipeline/transforms/stream/floor_time.py +16 -0
- datapipeline/transforms/stream/granularity.py +52 -30
- datapipeline/transforms/stream/lag.py +17 -0
- datapipeline/transforms/stream/rolling.py +72 -0
- datapipeline/transforms/utils.py +42 -10
- datapipeline/transforms/vector/drop/horizontal.py +0 -3
- datapipeline/transforms/vector/drop/orchestrator.py +0 -3
- datapipeline/transforms/vector/drop/vertical.py +0 -2
- datapipeline/transforms/vector/ensure_schema.py +0 -2
- datapipeline/utils/paths.py +0 -2
- datapipeline/utils/placeholders.py +0 -2
- datapipeline/utils/rich_compat.py +0 -3
- datapipeline/utils/window.py +0 -2
- jerry_thomas-2.0.1.dist-info/METADATA +269 -0
- jerry_thomas-2.0.1.dist-info/RECORD +264 -0
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/WHEEL +1 -1
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/entry_points.txt +7 -3
- datapipeline/services/scaffold/mappers.py +0 -55
- datapipeline/services/scaffold/source.py +0 -191
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
- datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
- datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
- datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
- datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
- datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
- datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
- datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
- datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
- datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
- datapipeline/templates/stubs/mapper.py.j2 +0 -22
- jerry_thomas-1.0.3.dist-info/METADATA +0 -827
- jerry_thomas-1.0.3.dist-info/RECORD +0 -198
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/top_level.txt +0 -0
datapipeline/sources/decoders.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import Iterable, Iterator, Any, Optional
|
|
2
|
+
from typing import Iterable, Iterator, Any, Optional, Sequence
|
|
5
3
|
import codecs
|
|
6
4
|
import csv
|
|
7
5
|
import io
|
|
8
6
|
import json
|
|
9
7
|
import pickle
|
|
8
|
+
import itertools
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class Decoder(ABC):
|
|
@@ -32,7 +31,7 @@ def _iter_text_lines(chunks: Iterable[bytes], encoding: str) -> Iterator[str]:
|
|
|
32
31
|
idx = buffer.find("\n")
|
|
33
32
|
if idx == -1:
|
|
34
33
|
break
|
|
35
|
-
line, buffer = buffer[:idx], buffer[idx + 1
|
|
34
|
+
line, buffer = buffer[:idx], buffer[idx + 1:]
|
|
36
35
|
if line.endswith("\r"):
|
|
37
36
|
line = line[:-1]
|
|
38
37
|
yield line
|
|
@@ -53,26 +52,58 @@ def _read_all_text(chunks: Iterable[bytes], encoding: str) -> str:
|
|
|
53
52
|
|
|
54
53
|
|
|
55
54
|
class CsvDecoder(Decoder):
|
|
56
|
-
def __init__(
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
*,
|
|
58
|
+
delimiter: str = ";",
|
|
59
|
+
encoding: str = "utf-8",
|
|
60
|
+
error_prefixes: Optional[Sequence[str]] = None,
|
|
61
|
+
):
|
|
57
62
|
self.delimiter = delimiter
|
|
58
63
|
self.encoding = encoding
|
|
64
|
+
self._error_prefixes = [p.lower() for p in (error_prefixes or [])]
|
|
65
|
+
|
|
66
|
+
def _iter_lines(self, chunks: Iterable[bytes]) -> Iterator[str]:
|
|
67
|
+
lines = _iter_text_lines(chunks, self.encoding)
|
|
68
|
+
try:
|
|
69
|
+
first = next(lines)
|
|
70
|
+
except StopIteration:
|
|
71
|
+
return iter(())
|
|
72
|
+
if self._error_prefixes:
|
|
73
|
+
lowered = first.lstrip().lower()
|
|
74
|
+
if any(lowered.startswith(p) for p in self._error_prefixes):
|
|
75
|
+
raise ValueError(
|
|
76
|
+
f"csv response looks like error text: {first[:120]}")
|
|
77
|
+
return itertools.chain([first], lines)
|
|
59
78
|
|
|
60
79
|
def decode(self, chunks: Iterable[bytes]) -> Iterator[dict]:
|
|
61
|
-
reader = csv.DictReader(
|
|
80
|
+
reader = csv.DictReader(self._iter_lines(
|
|
81
|
+
chunks), delimiter=self.delimiter)
|
|
62
82
|
for row in reader:
|
|
63
83
|
yield row
|
|
64
84
|
|
|
65
85
|
def count(self, chunks: Iterable[bytes]) -> Optional[int]:
|
|
66
|
-
return sum(1 for _ in csv.DictReader(
|
|
86
|
+
return sum(1 for _ in csv.DictReader(self._iter_lines(chunks), delimiter=self.delimiter))
|
|
67
87
|
|
|
68
88
|
|
|
69
89
|
class JsonDecoder(Decoder):
|
|
70
|
-
def __init__(self, *, encoding: str = "utf-8"):
|
|
90
|
+
def __init__(self, *, encoding: str = "utf-8", array_field: Optional[str] = None):
|
|
71
91
|
self.encoding = encoding
|
|
92
|
+
self.array_field = array_field
|
|
72
93
|
|
|
73
94
|
def decode(self, chunks: Iterable[bytes]) -> Iterator[Any]:
|
|
74
95
|
text = _read_all_text(chunks, self.encoding)
|
|
75
96
|
data = json.loads(text)
|
|
97
|
+
if self.array_field:
|
|
98
|
+
if not isinstance(data, dict):
|
|
99
|
+
raise ValueError(
|
|
100
|
+
"json array_field requires a top-level object")
|
|
101
|
+
if self.array_field not in data:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
f"json array_field missing: {self.array_field}")
|
|
104
|
+
data = data[self.array_field]
|
|
105
|
+
if data is None:
|
|
106
|
+
return # TODO MAYBE we NEED DO DO SOMETHING ABOUT THIS so we dont silence it
|
|
76
107
|
if isinstance(data, list):
|
|
77
108
|
for item in data:
|
|
78
109
|
yield item
|
|
@@ -83,6 +114,16 @@ class JsonDecoder(Decoder):
|
|
|
83
114
|
def count(self, chunks: Iterable[bytes]) -> Optional[int]:
|
|
84
115
|
text = _read_all_text(chunks, self.encoding)
|
|
85
116
|
data = json.loads(text)
|
|
117
|
+
if self.array_field:
|
|
118
|
+
if not isinstance(data, dict):
|
|
119
|
+
raise ValueError(
|
|
120
|
+
"json array_field requires a top-level object")
|
|
121
|
+
if self.array_field not in data:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"json array_field missing: {self.array_field}")
|
|
124
|
+
data = data[self.array_field]
|
|
125
|
+
if data is None:
|
|
126
|
+
return 0
|
|
86
127
|
return len(data) if isinstance(data, list) else 1
|
|
87
128
|
|
|
88
129
|
|
datapipeline/sources/factory.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
from typing import Any, Dict
|
|
4
2
|
|
|
5
3
|
from datapipeline.sources.data_loader import DataLoader
|
|
@@ -19,7 +17,9 @@ def build_loader(*, transport: str, format: str | None = None, **kwargs: Any) ->
|
|
|
19
17
|
transport: "fs" | "http"
|
|
20
18
|
format: "csv" | "json" | "json-lines" | "pickle" (required for fs/http)
|
|
21
19
|
fs: path (str), glob (bool, optional), encoding (str, default utf-8), delimiter (csv only)
|
|
22
|
-
http: url (str), headers (dict, optional), params (dict, optional), encoding (str, default utf-8)
|
|
20
|
+
http: url (str), headers (dict, optional), params (dict, optional), encoding (str, default utf-8), timeout_seconds (float, optional)
|
|
21
|
+
csv: error_prefixes (list[str], optional)
|
|
22
|
+
json: array_field (str, optional)
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
t = (transport or "").lower()
|
|
@@ -40,16 +40,19 @@ def build_loader(*, transport: str, format: str | None = None, **kwargs: Any) ->
|
|
|
40
40
|
headers: Dict[str, str] = dict(kwargs.get("headers") or {})
|
|
41
41
|
params: Dict[str, Any] = dict(kwargs.get("params") or {})
|
|
42
42
|
encoding = kwargs.get("encoding", "utf-8")
|
|
43
|
-
|
|
43
|
+
timeout_seconds = kwargs.get("timeout_seconds")
|
|
44
|
+
source = HttpTransport(url, headers=headers, params=params, timeout_seconds=timeout_seconds)
|
|
44
45
|
else:
|
|
45
46
|
raise ValueError(f"unsupported transport: {transport}")
|
|
46
47
|
|
|
47
48
|
# Build decoder
|
|
48
49
|
if fmt == "csv":
|
|
49
50
|
delimiter = kwargs.get("delimiter", ";")
|
|
50
|
-
|
|
51
|
+
error_prefixes = kwargs.get("error_prefixes")
|
|
52
|
+
decoder = CsvDecoder(delimiter=delimiter, encoding=encoding, error_prefixes=error_prefixes)
|
|
51
53
|
elif fmt == "json":
|
|
52
|
-
|
|
54
|
+
array_field = kwargs.get("array_field")
|
|
55
|
+
decoder = JsonDecoder(encoding=encoding, array_field=array_field)
|
|
53
56
|
elif fmt == "json-lines":
|
|
54
57
|
decoder = JsonLinesDecoder(encoding=encoding)
|
|
55
58
|
elif fmt == "pickle":
|
datapipeline/sources/foreach.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
import re
|
|
2
|
+
import time
|
|
4
3
|
from typing import Any, Iterator, Mapping
|
|
5
4
|
|
|
6
5
|
from datapipeline.plugins import LOADERS_EP
|
|
@@ -49,11 +48,13 @@ class ForeachLoader(BaseDataLoader):
|
|
|
49
48
|
loader: Mapping[str, Any],
|
|
50
49
|
inject_field: str | None = None,
|
|
51
50
|
inject: Mapping[str, Any] | None = None,
|
|
51
|
+
throttle_seconds: float | None = None,
|
|
52
52
|
):
|
|
53
53
|
self._key, self._values = self._normalize_foreach(foreach)
|
|
54
54
|
self._loader_spec = self._normalize_loader_spec(loader)
|
|
55
55
|
self._inject_field = inject_field
|
|
56
56
|
self._inject = inject
|
|
57
|
+
self._throttle_seconds = self._normalize_throttle(throttle_seconds)
|
|
57
58
|
self._current_index: int | None = None
|
|
58
59
|
self._current_value: Any | None = None
|
|
59
60
|
self._current_args: dict[str, Any] | None = None
|
|
@@ -68,6 +69,8 @@ class ForeachLoader(BaseDataLoader):
|
|
|
68
69
|
|
|
69
70
|
def load(self) -> Iterator[Any]:
|
|
70
71
|
for i, value in enumerate(self._values, 1):
|
|
72
|
+
if self._throttle_seconds and i > 1:
|
|
73
|
+
time.sleep(self._throttle_seconds)
|
|
71
74
|
vars_ = {self._key: value}
|
|
72
75
|
loader_args = self._make_loader_args(vars_)
|
|
73
76
|
loader = self._build_loader(loader_args)
|
|
@@ -84,7 +87,9 @@ class ForeachLoader(BaseDataLoader):
|
|
|
84
87
|
|
|
85
88
|
def count(self):
|
|
86
89
|
total = 0
|
|
87
|
-
for value in self._values:
|
|
90
|
+
for i, value in enumerate(self._values, 1):
|
|
91
|
+
if self._throttle_seconds and i > 1:
|
|
92
|
+
time.sleep(self._throttle_seconds)
|
|
88
93
|
vars_ = {self._key: value}
|
|
89
94
|
loader_args = self._make_loader_args(vars_)
|
|
90
95
|
loader = self._build_loader(loader_args)
|
|
@@ -119,6 +124,16 @@ class ForeachLoader(BaseDataLoader):
|
|
|
119
124
|
raise TypeError("core.foreach loader.args must be a mapping when provided")
|
|
120
125
|
return dict(loader)
|
|
121
126
|
|
|
127
|
+
@staticmethod
|
|
128
|
+
def _normalize_throttle(throttle_seconds: float | None) -> float:
|
|
129
|
+
if throttle_seconds is None:
|
|
130
|
+
return 0.0
|
|
131
|
+
if not isinstance(throttle_seconds, (int, float)):
|
|
132
|
+
raise TypeError("core.foreach throttle_seconds must be a number")
|
|
133
|
+
if throttle_seconds < 0:
|
|
134
|
+
raise ValueError("core.foreach throttle_seconds must be >= 0")
|
|
135
|
+
return float(throttle_seconds)
|
|
136
|
+
|
|
122
137
|
def _make_loader_args(self, vars_: Mapping[str, Any]) -> dict[str, Any]:
|
|
123
138
|
args = self._loader_spec.get("args") or {}
|
|
124
139
|
interpolated = _interpolate(args, vars_)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
from abc import ABC, abstractmethod
|
|
4
2
|
from typing import Iterable, Iterator, List, Dict, Optional, Any
|
|
5
3
|
from urllib.request import Request, urlopen
|
|
@@ -65,11 +63,19 @@ class FsGlobTransport(Transport):
|
|
|
65
63
|
|
|
66
64
|
|
|
67
65
|
class HttpTransport(Transport):
|
|
68
|
-
def __init__(
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
url: str,
|
|
69
|
+
headers: Optional[Dict[str, str]] = None,
|
|
70
|
+
params: Optional[Dict[str, Any]] = None,
|
|
71
|
+
chunk_size: int = 64 * 1024,
|
|
72
|
+
timeout_seconds: Optional[float] = None,
|
|
73
|
+
):
|
|
69
74
|
self.url = url
|
|
70
75
|
self.headers = dict(headers or {})
|
|
71
76
|
self.params: Dict[str, Any] = dict(params or {})
|
|
72
77
|
self.chunk_size = chunk_size
|
|
78
|
+
self.timeout_seconds = timeout_seconds
|
|
73
79
|
|
|
74
80
|
def _build_url(self) -> str:
|
|
75
81
|
if not self.params:
|
|
@@ -88,7 +94,7 @@ class HttpTransport(Transport):
|
|
|
88
94
|
req = Request(req_url, headers=self.headers)
|
|
89
95
|
|
|
90
96
|
try:
|
|
91
|
-
resp = urlopen(req)
|
|
97
|
+
resp = urlopen(req, timeout=self.timeout_seconds)
|
|
92
98
|
except (URLError, HTTPError) as e:
|
|
93
99
|
raise RuntimeError(f"failed to fetch {self.url}: {e}") from e
|
|
94
100
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
kind: ingest
|
|
2
|
+
source: sandbox.ohlcv
|
|
3
|
+
id: equity.ohlcv # format: domain.dataset.(variant)
|
|
4
|
+
|
|
5
|
+
mapper:
|
|
6
|
+
entrypoint: map_sandbox_ohlcv_dto_to_equity
|
|
7
|
+
args: {}
|
|
8
|
+
|
|
9
|
+
cadence: ${group_by} # optional per-contract cadence
|
|
10
|
+
partition_by: ticker
|
|
11
|
+
# sort_batch_size: 100000 # in-memory sort chunk size
|
|
12
|
+
|
|
13
|
+
record: # record-level transforms
|
|
14
|
+
- filter: { field: time, operator: ge, comparand: "${start_time}" }
|
|
15
|
+
- filter: { field: time, operator: le, comparand: "${end_time}" }
|
|
16
|
+
- floor_time: { cadence: "${cadence}" }
|
|
17
|
+
# - lag: { lag: 10m }
|
|
18
|
+
|
|
19
|
+
stream: # per-stream transforms (input sorted by partition,time)
|
|
20
|
+
# - ensure_cadence: { field: close, to: close, cadence: "${cadence}" }
|
|
21
|
+
# - granularity: { field: close, to: close, mode: first }
|
|
22
|
+
- rolling: {
|
|
23
|
+
field: dollar_volume,
|
|
24
|
+
to: adv5,
|
|
25
|
+
window: 5,
|
|
26
|
+
statistic: mean,
|
|
27
|
+
min_samples: 3,
|
|
28
|
+
} # compute 5-day average dollar volume (ADV5)
|
|
29
|
+
- filter: { field: adv5, operator: ge, comparand: 1_000_000 } # filter out illiquid stocks
|
|
30
|
+
# - fill: { statistic: median, window: 6, min_samples: 1 }
|
|
31
|
+
|
|
32
|
+
debug: # optional validation-only checks
|
|
33
|
+
#- lint: { mode: warn, tick: "${cadence}" }
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# See ../../reference/reference/contracts/ingest.reference.yaml for full options.
|
|
2
|
+
kind: ingest
|
|
3
|
+
source: synthetic.ticks
|
|
4
|
+
id: time.ticks.hour_sin
|
|
5
|
+
cadence: ${group_by}
|
|
6
|
+
|
|
7
|
+
mapper:
|
|
8
|
+
entrypoint: encode_time
|
|
9
|
+
args: { mode: hour_sin }
|
|
10
|
+
|
|
11
|
+
record:
|
|
12
|
+
- filter: { field: time, operator: ge, comparand: "${start_time}" }
|
|
13
|
+
- filter: { field: time, operator: le, comparand: "${end_time}" }
|
|
14
|
+
- floor_time: { cadence: "${cadence}" }
|
|
15
|
+
|
|
16
|
+
stream:
|
|
17
|
+
- dedupe: {}
|
|
18
|
+
- granularity: { field: value, to: value, mode: first }
|
|
19
|
+
- ensure_cadence: { field: value, to: value, cadence: "${cadence}" }
|
|
20
|
+
|
|
21
|
+
debug:
|
|
22
|
+
- lint: { mode: error, tick: "${cadence}" }
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# See ../../reference/reference/contracts/ingest.reference.yaml for full options.
|
|
2
|
+
kind: ingest
|
|
3
|
+
source: synthetic.ticks
|
|
4
|
+
id: time.ticks.linear
|
|
5
|
+
cadence: ${group_by}
|
|
6
|
+
|
|
7
|
+
mapper:
|
|
8
|
+
entrypoint: encode_time
|
|
9
|
+
args: { mode: linear }
|
|
10
|
+
|
|
11
|
+
record:
|
|
12
|
+
- filter: { field: time, operator: ge, comparand: "${start_time}" }
|
|
13
|
+
- filter: { field: time, operator: le, comparand: "${end_time}" }
|
|
14
|
+
- floor_time: { cadence: "${cadence}" }
|
|
15
|
+
|
|
16
|
+
stream:
|
|
17
|
+
- dedupe: {}
|
|
18
|
+
- granularity: { field: value, to: value, mode: first }
|
|
19
|
+
- ensure_cadence: { field: value, to: value, cadence: "${cadence}" }
|
|
20
|
+
|
|
21
|
+
debug:
|
|
22
|
+
- lint: { mode: error, tick: "${cadence}" }
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{"time": "2021-01-04 05:00:00+00:00", "open": 129.99, "high": 130.06, "low": 123.47, "close": 126.15, "volume": 1549553.0, "symbol": "AAPL"}
|
|
2
|
+
{"time": "2021-01-05 05:00:00+00:00", "open": 125.55, "high": 128.25, "low": 125.06, "close": 127.5, "volume": 804637.0, "symbol": "AAPL"}
|
|
3
|
+
{"time": "2021-01-06 05:00:00+00:00", "open": 124.2, "high": 127.57, "low": 123.07, "close": 123.35, "volume": 2202534.0, "symbol": "AAPL"}
|
|
4
|
+
{"time": "2021-01-07 05:00:00+00:00", "open": 124.98, "high": 128.14, "low": 124.51, "close": 127.42, "volume": 1440239.0, "symbol": "AAPL"}
|
|
5
|
+
{"time": "2021-01-08 05:00:00+00:00", "open": 128.9, "high": 129.11, "low": 126.81, "close": 128.65, "volume": 1340001.0, "symbol": "AAPL"}
|
|
6
|
+
{"time": "2021-01-11 05:00:00+00:00", "open": 125.81, "high": 126.67, "low": 125.1, "close": 125.53, "volume": 1168071.0, "symbol": "AAPL"}
|
|
7
|
+
{"time": "2021-01-12 05:00:00+00:00", "open": 125.03, "high": 126.26, "low": 123.55, "close": 125.47, "volume": 810812.0, "symbol": "AAPL"}
|
|
8
|
+
{"time": "2021-01-13 05:00:00+00:00", "open": 125.29, "high": 127.96, "low": 125.1, "close": 127.56, "volume": 1341043.0, "symbol": "AAPL"}
|
|
9
|
+
{"time": "2021-01-14 05:00:00+00:00", "open": 127.31, "high": 127.53, "low": 125.39, "close": 125.69, "volume": 969718.0, "symbol": "AAPL"}
|
|
10
|
+
{"time": "2021-01-15 05:00:00+00:00", "open": 125.27, "high": 126.76, "low": 123.65, "close": 123.95, "volume": 1159284.0, "symbol": "AAPL"}
|
|
11
|
+
{"time": "2021-01-19 05:00:00+00:00", "open": 124.37, "high": 125.29, "low": 123.59, "close": 124.39, "volume": 1051940.0, "symbol": "AAPL"}
|
|
12
|
+
{"time": "2021-01-20 05:00:00+00:00", "open": 125.16, "high": 128.99, "low": 125.16, "close": 128.52, "volume": 957072.0, "symbol": "AAPL"}
|
|
13
|
+
{"time": "2021-01-21 05:00:00+00:00", "open": 130.25, "high": 133.7, "low": 130.06, "close": 133.19, "volume": 1994077.0, "symbol": "AAPL"}
|
|
14
|
+
{"time": "2021-01-22 05:00:00+00:00", "open": 132.68, "high": 136.15, "low": 131.45, "close": 135.37, "volume": 1820717.0, "symbol": "AAPL"}
|
|
15
|
+
{"time": "2021-01-25 05:00:00+00:00", "open": 139.22, "high": 141.2, "low": 132.94, "close": 139.14, "volume": 1957404.0, "symbol": "AAPL"}
|
|
16
|
+
{"time": "2021-01-26 05:00:00+00:00", "open": 139.74, "high": 140.48, "low": 137.68, "close": 139.51, "volume": 1242288.0, "symbol": "AAPL"}
|
|
17
|
+
{"time": "2021-01-27 05:00:00+00:00", "open": 139.62, "high": 140.44, "low": 136.79, "close": 138.26, "volume": 1965025.0, "symbol": "AAPL"}
|
|
18
|
+
{"time": "2021-01-28 05:00:00+00:00", "open": 135.82, "high": 138.23, "low": 133.1, "close": 133.1, "volume": 2645618.0, "symbol": "AAPL"}
|
|
19
|
+
{"time": "2021-01-29 05:00:00+00:00", "open": 132.29, "high": 133.09, "low": 126.79, "close": 128.3, "volume": 2609717.0, "symbol": "AAPL"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{"time": "2021-01-04 05:00:00+00:00", "open": 213.55, "high": 213.95, "low": 206.18, "close": 209.1, "volume": 678049.0, "symbol": "MSFT"}
|
|
2
|
+
{"time": "2021-01-05 05:00:00+00:00", "open": 208.76, "high": 209.64, "low": 207.24, "close": 209.29, "volume": 483132.0, "symbol": "MSFT"}
|
|
3
|
+
{"time": "2021-01-06 05:00:00+00:00", "open": 203.82, "high": 207.69, "low": 203.41, "close": 203.5, "volume": 881552.0, "symbol": "MSFT"}
|
|
4
|
+
{"time": "2021-01-07 05:00:00+00:00", "open": 205.33, "high": 210.43, "low": 205.06, "close": 209.52, "volume": 621610.0, "symbol": "MSFT"}
|
|
5
|
+
{"time": "2021-01-08 05:00:00+00:00", "open": 209.81, "high": 211.59, "low": 208.24, "close": 210.74, "volume": 656869.0, "symbol": "MSFT"}
|
|
6
|
+
{"time": "2021-01-11 05:00:00+00:00", "open": 209.6, "high": 210.02, "low": 207.96, "close": 208.67, "volume": 519302.0, "symbol": "MSFT"}
|
|
7
|
+
{"time": "2021-01-12 05:00:00+00:00", "open": 207.77, "high": 208.27, "low": 204.68, "close": 206.44, "volume": 705831.0, "symbol": "MSFT"}
|
|
8
|
+
{"time": "2021-01-13 05:00:00+00:00", "open": 205.3, "high": 207.96, "low": 205.3, "close": 207.83, "volume": 635639.0, "symbol": "MSFT"}
|
|
9
|
+
{"time": "2021-01-14 05:00:00+00:00", "open": 207.37, "high": 208.6, "low": 204.12, "close": 204.77, "volume": 573145.0, "symbol": "MSFT"}
|
|
10
|
+
{"time": "2021-01-15 05:00:00+00:00", "open": 204.57, "high": 205.82, "low": 203.48, "close": 204.15, "volume": 669016.0, "symbol": "MSFT"}
|
|
11
|
+
{"time": "2021-01-19 05:00:00+00:00", "open": 205.09, "high": 208.19, "low": 204.04, "close": 207.69, "volume": 688794.0, "symbol": "MSFT"}
|
|
12
|
+
{"time": "2021-01-20 05:00:00+00:00", "open": 209.06, "high": 216.63, "low": 208.55, "close": 215.25, "volume": 1221985.0, "symbol": "MSFT"}
|
|
13
|
+
{"time": "2021-01-21 05:00:00+00:00", "open": 215.56, "high": 217.12, "low": 213.43, "close": 215.79, "volume": 1226767.0, "symbol": "MSFT"}
|
|
14
|
+
{"time": "2021-01-22 05:00:00+00:00", "open": 217.95, "high": 220.71, "low": 216.89, "close": 216.89, "volume": 1457906.0, "symbol": "MSFT"}
|
|
15
|
+
{"time": "2021-01-25 05:00:00+00:00", "open": 219.82, "high": 220.46, "low": 215.26, "close": 220.09, "volume": 976700.0, "symbol": "MSFT"}
|
|
16
|
+
{"time": "2021-01-26 05:00:00+00:00", "open": 222.66, "high": 224.68, "low": 220.77, "close": 223.1, "volume": 1510093.0, "symbol": "MSFT"}
|
|
17
|
+
{"time": "2021-01-27 05:00:00+00:00", "open": 228.35, "high": 230.69, "low": 220.83, "close": 223.45, "volume": 2063324.0, "symbol": "MSFT"}
|
|
18
|
+
{"time": "2021-01-28 05:00:00+00:00", "open": 226.06, "high": 232.79, "low": 225.58, "close": 229.35, "volume": 1701928.0, "symbol": "MSFT"}
|
|
19
|
+
{"time": "2021-01-29 05:00:00+00:00", "open": 225.83, "high": 228.36, "low": 222.07, "close": 222.31, "volume": 1490894.0, "symbol": "MSFT"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# See ../reference/reference/dataset.yaml for full options.
|
|
2
|
+
group_by: ${group_by}
|
|
3
|
+
|
|
4
|
+
features:
|
|
5
|
+
- id: linear_time
|
|
6
|
+
record_stream: time.ticks.linear
|
|
7
|
+
field: value
|
|
8
|
+
scale: true
|
|
9
|
+
sequence: { size: 2, stride: 1 }
|
|
10
|
+
|
|
11
|
+
- id: closing_price
|
|
12
|
+
record_stream: equity.ohlcv
|
|
13
|
+
field: close
|
|
14
|
+
scale: true
|
|
15
|
+
|
|
16
|
+
- id: opening_price
|
|
17
|
+
record_stream: equity.ohlcv
|
|
18
|
+
field: open
|
|
19
|
+
scale: true
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# See ../reference/reference/postprocess.yaml for full options.
|
|
2
|
+
# - drop: # no targets so no effect but included here for demonstration
|
|
3
|
+
# axis: vertical
|
|
4
|
+
# payload: targets
|
|
5
|
+
# threshold: 0.9
|
|
6
|
+
|
|
7
|
+
- drop: # effectively drops features with >50% missing values. 0 drops in the demo, but included here for demonstration
|
|
8
|
+
axis: vertical
|
|
9
|
+
payload: features
|
|
10
|
+
threshold: 0.5
|
|
11
|
+
|
|
12
|
+
- drop: # this actually drops some vectors
|
|
13
|
+
axis: horizontal
|
|
14
|
+
payload: features
|
|
15
|
+
threshold: 1
|
|
16
|
+
# - drop: # no targets so no effect but included here for demonstration
|
|
17
|
+
# axis: horizontal
|
|
18
|
+
# payload: targets
|
|
19
|
+
# threshold: 1
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# See ../reference/reference/project.yaml for full options.
|
|
2
|
+
version: 1
|
|
3
|
+
name: demo
|
|
4
|
+
paths:
|
|
5
|
+
streams: ./contracts
|
|
6
|
+
sources: ./sources
|
|
7
|
+
dataset: dataset.yaml
|
|
8
|
+
postprocess: postprocess.yaml
|
|
9
|
+
artifacts: ../artifacts/${project_name}/v${version}
|
|
10
|
+
tasks: ./tasks
|
|
11
|
+
globals:
|
|
12
|
+
group_by: 1d
|
|
13
|
+
start_time: 2021-01-01T00:00:00Z
|
|
14
|
+
end_time: 2021-02-01T00:00:00Z
|
|
15
|
+
split:
|
|
16
|
+
mode: hash
|
|
17
|
+
key: group
|
|
18
|
+
seed: 42
|
|
19
|
+
ratios: { train: 0.8, val: 0.1, test: 0.1 }
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Required identifier for this raw source. Contracts reference it under `source:`.
|
|
2
|
+
id: "sandbox.ohlcv" # suggested format: provider.dataset
|
|
3
|
+
|
|
4
|
+
# parser.entrypoint: registered parser name (not a file path)
|
|
5
|
+
parser:
|
|
6
|
+
entrypoint: "sandbox_ohlcv_dto_parser"
|
|
7
|
+
args: {}
|
|
8
|
+
|
|
9
|
+
# loader.entrypoint: registered loader name (not a file path)
|
|
10
|
+
loader:
|
|
11
|
+
entrypoint: "core.io"
|
|
12
|
+
args:
|
|
13
|
+
transport: fs
|
|
14
|
+
format: json-lines
|
|
15
|
+
path: demo/data/*.jsonl
|
|
16
|
+
glob: true
|
|
17
|
+
encoding: utf-8
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from datapipeline.integrations import dataframe_from_vectors
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def main() -> None:
|
|
7
|
+
project = Path(__file__).resolve().parent / "project.yaml"
|
|
8
|
+
df = dataframe_from_vectors(
|
|
9
|
+
project,
|
|
10
|
+
limit=None,
|
|
11
|
+
include_group=True,
|
|
12
|
+
group_format="mapping",
|
|
13
|
+
flatten_sequences=True,
|
|
14
|
+
)
|
|
15
|
+
print("DataFrame shape:", df.shape)
|
|
16
|
+
print(df.head())
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
if __name__ == "__main__":
|
|
20
|
+
main()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
from torch.utils.data import DataLoader
|
|
5
|
+
|
|
6
|
+
from datapipeline.integrations import torch_dataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main() -> None:
|
|
10
|
+
project = Path(__file__).resolve().parent / "project.yaml"
|
|
11
|
+
ds = torch_dataset(
|
|
12
|
+
project,
|
|
13
|
+
limit=256,
|
|
14
|
+
dtype=torch.float32,
|
|
15
|
+
flatten_sequences=True,
|
|
16
|
+
)
|
|
17
|
+
loader = DataLoader(ds, batch_size=32, shuffle=True)
|
|
18
|
+
batch = next(iter(loader))
|
|
19
|
+
print("Feature batch shape:", batch.shape)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
main()
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from datapipeline.domain.record import TemporalRecord
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class EquityRecord(TemporalRecord):
|
|
8
|
+
"""
|
|
9
|
+
Domain record for 'equity'.
|
|
10
|
+
"""
|
|
11
|
+
open: float
|
|
12
|
+
high: float
|
|
13
|
+
low: float
|
|
14
|
+
close: float
|
|
15
|
+
volume: float
|
|
16
|
+
dollar_volume: float
|
|
17
|
+
hl_range: float
|
|
18
|
+
ticker: str # equity ticker symbol
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class SandboxOhlcvDTO:
|
|
7
|
+
"""Data Transfer Object (DTO) for sandbox OHLCV records."""
|
|
8
|
+
time: datetime
|
|
9
|
+
open: float
|
|
10
|
+
high: float
|
|
11
|
+
low: float
|
|
12
|
+
close: float
|
|
13
|
+
volume: float
|
|
14
|
+
symbol: str
|
|
File without changes
|
datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import Any, Iterator
|
|
2
|
+
|
|
3
|
+
from {{PACKAGE_NAME}}.domains.equity.model import EquityRecord
|
|
4
|
+
from {{PACKAGE_NAME}}.dtos.sandbox_ohlcv_dto import SandboxOhlcvDTO
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def map_sandbox_ohlcv_dto_to_equity(
|
|
8
|
+
stream: Iterator[SandboxOhlcvDTO],
|
|
9
|
+
**params: Any,
|
|
10
|
+
) -> Iterator[EquityRecord]:
|
|
11
|
+
"""Map SandboxOhlcvDTO records to domain-level EquityRecord records."""
|
|
12
|
+
for record in stream:
|
|
13
|
+
yield EquityRecord(
|
|
14
|
+
time=record.time, # necessary for correct grouping and ordering
|
|
15
|
+
|
|
16
|
+
# filterable fields
|
|
17
|
+
open=record.open,
|
|
18
|
+
high=record.high,
|
|
19
|
+
low=record.low,
|
|
20
|
+
close=record.close,
|
|
21
|
+
volume=record.volume,
|
|
22
|
+
dollar_volume=record.close * record.volume,
|
|
23
|
+
hl_range=record.high - record.low,
|
|
24
|
+
ticker=record.symbol,
|
|
25
|
+
# filterable fields
|
|
26
|
+
)
|
|
File without changes
|