jerry-thomas 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. datapipeline/analysis/vector/collector.py +120 -17
  2. datapipeline/analysis/vector/matrix.py +33 -8
  3. datapipeline/analysis/vector/report.py +162 -32
  4. datapipeline/build/tasks/__init__.py +11 -0
  5. datapipeline/build/tasks/config.py +74 -0
  6. datapipeline/build/tasks/metadata.py +170 -0
  7. datapipeline/build/tasks/scaler.py +73 -0
  8. datapipeline/build/tasks/schema.py +60 -0
  9. datapipeline/build/tasks/utils.py +169 -0
  10. datapipeline/cli/app.py +304 -127
  11. datapipeline/cli/commands/build.py +240 -16
  12. datapipeline/cli/commands/contract.py +367 -0
  13. datapipeline/cli/commands/domain.py +8 -3
  14. datapipeline/cli/commands/inspect.py +401 -149
  15. datapipeline/cli/commands/list_.py +30 -7
  16. datapipeline/cli/commands/plugin.py +5 -1
  17. datapipeline/cli/commands/run.py +227 -241
  18. datapipeline/cli/commands/run_config.py +101 -0
  19. datapipeline/cli/commands/serve_pipeline.py +156 -0
  20. datapipeline/cli/commands/source.py +44 -8
  21. datapipeline/cli/visuals/__init__.py +4 -2
  22. datapipeline/cli/visuals/common.py +239 -0
  23. datapipeline/cli/visuals/labels.py +15 -15
  24. datapipeline/cli/visuals/runner.py +66 -0
  25. datapipeline/cli/visuals/sections.py +20 -0
  26. datapipeline/cli/visuals/sources.py +132 -119
  27. datapipeline/cli/visuals/sources_basic.py +260 -0
  28. datapipeline/cli/visuals/sources_off.py +76 -0
  29. datapipeline/cli/visuals/sources_rich.py +414 -0
  30. datapipeline/config/catalog.py +37 -3
  31. datapipeline/config/context.py +214 -0
  32. datapipeline/config/dataset/loader.py +21 -4
  33. datapipeline/config/dataset/normalize.py +4 -4
  34. datapipeline/config/metadata.py +43 -0
  35. datapipeline/config/postprocess.py +2 -2
  36. datapipeline/config/project.py +3 -2
  37. datapipeline/config/resolution.py +129 -0
  38. datapipeline/config/tasks.py +309 -0
  39. datapipeline/config/workspace.py +155 -0
  40. datapipeline/domain/__init__.py +12 -0
  41. datapipeline/domain/record.py +11 -0
  42. datapipeline/domain/sample.py +54 -0
  43. datapipeline/integrations/ml/adapter.py +34 -20
  44. datapipeline/integrations/ml/pandas_support.py +0 -2
  45. datapipeline/integrations/ml/rows.py +1 -6
  46. datapipeline/integrations/ml/torch_support.py +1 -3
  47. datapipeline/io/factory.py +112 -0
  48. datapipeline/io/output.py +132 -0
  49. datapipeline/io/protocols.py +21 -0
  50. datapipeline/io/serializers.py +219 -0
  51. datapipeline/io/sinks/__init__.py +23 -0
  52. datapipeline/io/sinks/base.py +2 -0
  53. datapipeline/io/sinks/files.py +79 -0
  54. datapipeline/io/sinks/rich.py +57 -0
  55. datapipeline/io/sinks/stdout.py +18 -0
  56. datapipeline/io/writers/__init__.py +14 -0
  57. datapipeline/io/writers/base.py +28 -0
  58. datapipeline/io/writers/csv_writer.py +25 -0
  59. datapipeline/io/writers/jsonl.py +52 -0
  60. datapipeline/io/writers/pickle_writer.py +30 -0
  61. datapipeline/pipeline/artifacts.py +58 -0
  62. datapipeline/pipeline/context.py +66 -7
  63. datapipeline/pipeline/observability.py +65 -0
  64. datapipeline/pipeline/pipelines.py +65 -13
  65. datapipeline/pipeline/split.py +11 -10
  66. datapipeline/pipeline/stages.py +127 -16
  67. datapipeline/pipeline/utils/keygen.py +20 -7
  68. datapipeline/pipeline/utils/memory_sort.py +22 -10
  69. datapipeline/pipeline/utils/transform_utils.py +22 -0
  70. datapipeline/runtime.py +5 -2
  71. datapipeline/services/artifacts.py +12 -6
  72. datapipeline/services/bootstrap/config.py +25 -0
  73. datapipeline/services/bootstrap/core.py +52 -37
  74. datapipeline/services/constants.py +6 -5
  75. datapipeline/services/factories.py +123 -1
  76. datapipeline/services/project_paths.py +43 -16
  77. datapipeline/services/runs.py +208 -0
  78. datapipeline/services/scaffold/domain.py +3 -2
  79. datapipeline/services/scaffold/filter.py +3 -2
  80. datapipeline/services/scaffold/mappers.py +9 -6
  81. datapipeline/services/scaffold/plugin.py +54 -10
  82. datapipeline/services/scaffold/source.py +93 -56
  83. datapipeline/sources/{composed_loader.py → data_loader.py} +9 -9
  84. datapipeline/sources/decoders.py +83 -18
  85. datapipeline/sources/factory.py +26 -16
  86. datapipeline/sources/models/__init__.py +2 -2
  87. datapipeline/sources/models/generator.py +0 -7
  88. datapipeline/sources/models/loader.py +3 -3
  89. datapipeline/sources/models/parsing_error.py +24 -0
  90. datapipeline/sources/models/source.py +6 -6
  91. datapipeline/sources/synthetic/time/loader.py +14 -2
  92. datapipeline/sources/transports.py +74 -37
  93. datapipeline/templates/plugin_skeleton/README.md +76 -30
  94. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +31 -0
  95. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +30 -0
  96. datapipeline/templates/plugin_skeleton/example/dataset.yaml +18 -0
  97. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +29 -0
  98. datapipeline/templates/plugin_skeleton/{config/datasets/default → example}/project.yaml +11 -8
  99. datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml +12 -0
  100. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +3 -0
  101. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +9 -0
  102. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +2 -0
  103. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +4 -0
  104. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +28 -0
  105. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +4 -0
  106. datapipeline/templates/plugin_skeleton/jerry.yaml +34 -0
  107. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +31 -0
  108. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +30 -0
  109. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +18 -0
  110. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +29 -0
  111. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +22 -0
  112. datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +12 -0
  113. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +3 -0
  114. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +9 -0
  115. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +2 -0
  116. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +4 -0
  117. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +28 -0
  118. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +4 -0
  119. datapipeline/templates/stubs/dto.py.j2 +2 -0
  120. datapipeline/templates/stubs/mapper.py.j2 +5 -4
  121. datapipeline/templates/stubs/parser.py.j2 +2 -0
  122. datapipeline/templates/stubs/record.py.j2 +2 -0
  123. datapipeline/templates/stubs/source.yaml.j2 +2 -3
  124. datapipeline/transforms/debug/lint.py +26 -41
  125. datapipeline/transforms/feature/scaler.py +89 -13
  126. datapipeline/transforms/record/floor_time.py +4 -4
  127. datapipeline/transforms/sequence.py +2 -35
  128. datapipeline/transforms/stream/dedupe.py +24 -0
  129. datapipeline/transforms/stream/ensure_ticks.py +7 -6
  130. datapipeline/transforms/vector/__init__.py +5 -0
  131. datapipeline/transforms/vector/common.py +98 -0
  132. datapipeline/transforms/vector/drop/__init__.py +4 -0
  133. datapipeline/transforms/vector/drop/horizontal.py +79 -0
  134. datapipeline/transforms/vector/drop/orchestrator.py +59 -0
  135. datapipeline/transforms/vector/drop/vertical.py +182 -0
  136. datapipeline/transforms/vector/ensure_schema.py +184 -0
  137. datapipeline/transforms/vector/fill.py +87 -0
  138. datapipeline/transforms/vector/replace.py +62 -0
  139. datapipeline/utils/load.py +24 -3
  140. datapipeline/utils/rich_compat.py +38 -0
  141. datapipeline/utils/window.py +76 -0
  142. jerry_thomas-1.0.1.dist-info/METADATA +825 -0
  143. jerry_thomas-1.0.1.dist-info/RECORD +199 -0
  144. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/entry_points.txt +9 -8
  145. datapipeline/build/tasks.py +0 -186
  146. datapipeline/cli/commands/link.py +0 -128
  147. datapipeline/cli/commands/writers.py +0 -138
  148. datapipeline/config/build.py +0 -64
  149. datapipeline/config/run.py +0 -116
  150. datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml +0 -24
  151. datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml +0 -23
  152. datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +0 -9
  153. datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -14
  154. datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +0 -13
  155. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +0 -10
  156. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +0 -10
  157. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +0 -10
  158. datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml +0 -11
  159. datapipeline/transforms/vector.py +0 -210
  160. jerry_thomas-0.3.0.dist-info/METADATA +0 -502
  161. jerry_thomas-0.3.0.dist-info/RECORD +0 -139
  162. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/WHEEL +0 -0
  163. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/licenses/LICENSE +0 -0
  164. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,138 +1,151 @@
1
- from typing import Iterator, Any, Optional
2
1
  from contextlib import contextmanager
3
- from itertools import cycle
4
2
  import logging
5
- import threading
6
- import time
3
+ import sys
4
+ from typing import Optional, Tuple
7
5
 
8
- from .labels import progress_meta_for_loader
9
6
  from datapipeline.runtime import Runtime
10
- from datapipeline.sources.models.source import Source
11
- from tqdm import tqdm
12
-
13
-
14
- class VisualSourceProxy(Source):
15
- """Proxy wrapping Source.stream() with CLI feedback scaled by logging level."""
16
-
17
- def __init__(self, inner: Source, alias: str, verbosity: int):
18
- self._inner = inner
19
- self._alias = alias
20
- self._verbosity = max(0, min(verbosity, 2))
21
-
22
- @staticmethod
23
- def _start_spinner(label: str):
24
- """Start a background spinner tqdm progress bar."""
25
- bar = tqdm(
26
- total=0,
27
- desc="",
28
- bar_format="{desc}",
29
- dynamic_ncols=True,
30
- leave=False,
31
- )
32
- bar.set_description_str(label)
33
- bar.refresh()
34
-
35
- stop_event = threading.Event()
36
-
37
- def _spin():
38
- frames = cycle((" |", " /", " -", " \\"))
39
- while not stop_event.is_set():
40
- bar.set_description_str(f"{label}{next(frames)}")
41
- bar.refresh()
42
- time.sleep(0.1)
43
- bar.set_description_str(label)
44
- bar.refresh()
45
-
46
- worker = threading.Thread(target=_spin, daemon=True)
47
- worker.start()
48
- return stop_event, worker, bar
49
-
50
- @staticmethod
51
- def _stop_spinner(stop_event, worker, bar):
52
- stop_event.set()
53
- worker.join()
54
- try:
55
- bar.close()
56
- finally:
57
- fp = getattr(bar, "fp", None)
58
- try:
59
- if getattr(bar, "disable", False):
60
- return
61
- if fp and hasattr(fp, "write"):
62
- fp.write("\n")
63
- fp.flush()
64
- else:
65
- print()
66
- except Exception:
67
- pass
68
7
 
69
- def _count_with_indicator(self, label: str) -> Optional[int]:
70
- try:
71
- stop_event, worker, bar = self._start_spinner(label)
72
- except Exception:
73
- # If spinner setup fails, silently fall back to raw count
74
- return self._safe_count()
8
+ logger = logging.getLogger(__name__)
75
9
 
76
- try:
77
- return self._safe_count()
78
- finally:
79
- self._stop_spinner(stop_event, worker, bar)
80
10
 
81
- def _safe_count(self) -> Optional[int]:
11
+ def _is_tty() -> bool:
12
+ try:
13
+ return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
14
+ except Exception:
15
+ return False
16
+
17
+
18
+ class VisualsBackend:
19
+ """Interface for visuals backends.
20
+
21
+ - on_build_start/on_job_start return True if the backend handled the headline, False to let caller log it.
22
+ - wrap_sources returns a contextmanager that enables streaming visuals.
23
+ """
24
+
25
+ def on_build_start(self, path) -> bool: # Path-like
26
+ return False
27
+
28
+ def on_job_start(self, sections: Tuple[str, ...], label: str, idx: int, total: int) -> bool:
29
+ return False
30
+
31
+ def on_streams_complete(self) -> bool:
32
+ """Return True if backend surfaced a final completion line visually."""
33
+ return False
34
+
35
+ def requires_logging_redirect(self) -> bool:
36
+ """Return True when console logging should be routed via tqdm."""
37
+ return True
38
+
39
+ def wrap_sources(self, runtime: Runtime, log_level: int, progress_style: str): # contextmanager
40
+ @contextmanager
41
+ def _noop():
42
+ yield
43
+
44
+ return _noop()
45
+
46
+
47
+ class _BasicBackend(VisualsBackend):
48
+ def wrap_sources(self, runtime: Runtime, log_level: int, progress_style: str):
49
+ from .sources_basic import visual_sources as basic
50
+ return basic(runtime, log_level, progress_style)
51
+
52
+
53
+ class _RichBackend(VisualsBackend):
54
+ def _render_sections(self, console, sections: tuple[str, ...]) -> None:
55
+ if not sections:
56
+ return
57
+ from rich.rule import Rule as _Rule
58
+ console.print(_Rule(sections[0].title(), style="bold white"))
59
+ if len(sections) > 1:
60
+ for level, name in enumerate(sections[1:], start=1):
61
+ indent = " " * level
62
+ console.print(f"{indent}[cyan]{name}[/cyan]")
63
+ console.print()
64
+
65
+ def on_job_start(self, sections: tuple[str, ...], label: str, idx: int, total: int) -> bool:
82
66
  try:
83
- return self._inner.count()
67
+ from rich.console import Console as _Console
68
+ import sys as _sys
69
+ console = _Console(file=_sys.stderr, markup=True)
70
+ self._render_sections(console, sections)
71
+ indent = " " * max(len(sections), 1)
72
+ console.print(f"{indent}── {label} ({idx}/{total}) ──")
73
+ console.print()
74
+ return True
84
75
  except Exception:
85
- return None
86
-
87
- def stream(self) -> Iterator[Any]:
88
- desc, unit = progress_meta_for_loader(self._inner.loader)
89
- progress_desc = f"{desc} [{self._alias}]"
90
- label = f"Preparing data stream for [{self._alias}]"
91
-
92
- if self._verbosity >= 2:
93
- total = self._count_with_indicator(label)
94
- yield from tqdm(
95
- self._inner.stream(),
96
- total=total,
97
- desc=progress_desc,
98
- unit=unit,
99
- dynamic_ncols=True,
100
- mininterval=0.0,
101
- miniters=1,
102
- leave=True,
103
- )
104
- return
76
+ return False
105
77
 
78
+ def on_build_start(self, path) -> bool:
106
79
  try:
107
- stop_event, worker, bar = self._start_spinner(progress_desc)
80
+ from rich.console import Console as _Console
81
+ from rich.rule import Rule as _Rule
82
+ import sys as _sys
83
+ from pathlib import Path as _Path
84
+ import os as _os
85
+ console = _Console(file=_sys.stderr, markup=True)
86
+ console.print(_Rule("Info", style="bold white"))
87
+ # Subheader with compact path to project.yaml
88
+ p = _Path(path)
89
+ try:
90
+ cwd = _Path(_os.getcwd())
91
+ rel = p.relative_to(cwd)
92
+ parts = [part for part in rel.as_posix().split("/") if part]
93
+ except Exception:
94
+ parts = [part for part in p.as_posix().split("/") if part]
95
+ if len(parts) > 3:
96
+ parts = ["..."] + parts[-3:]
97
+ compact = "/".join(parts) if parts else p.name
98
+ console.print(f"[cyan]project:[/cyan] {compact}")
99
+ console.print() # spacer
100
+ return True
108
101
  except Exception:
109
- # Spinner isn't critical; fall back to raw stream
110
- yield from self._inner.stream()
111
- return
102
+ return False
112
103
 
113
- try:
114
- for item in self._inner.stream():
115
- yield item
116
- finally:
117
- self._stop_spinner(stop_event, worker, bar)
104
+ def wrap_sources(self, runtime: Runtime, log_level: int, progress_style: str):
105
+ from .sources_rich import visual_sources as rich_vs
106
+ return rich_vs(runtime, log_level, progress_style)
118
107
 
108
+ def on_streams_complete(self) -> bool:
109
+ # Rich backend manages its own persistent final line; signal handled
110
+ return True
119
111
 
120
- @contextmanager
121
- def visual_sources(runtime: Runtime, log_level: int):
122
- """Temporarily wrap stream sources with logging-level-driven feedback."""
123
- if log_level is None or log_level > logging.INFO:
124
- yield
125
- return
112
+ def requires_logging_redirect(self) -> bool:
113
+ return False
114
+
115
+
116
+ class _OffBackend(VisualsBackend):
117
+ def requires_logging_redirect(self) -> bool:
118
+ return False
126
119
 
127
- verbosity = 2 if log_level <= logging.DEBUG else 1
120
+ def wrap_sources(self, runtime: Runtime, log_level: int, progress_style: str):
121
+ from .sources_off import visual_sources as off_vs
122
+ return off_vs(runtime, log_level, progress_style)
128
123
 
129
- reg = runtime.registries.stream_sources
130
- originals = dict(reg.items())
124
+
125
+ def _rich_available() -> bool:
131
126
  try:
132
- for alias, src in originals.items():
133
- reg.register(alias, VisualSourceProxy(src, alias, verbosity))
127
+ import rich # noqa: F401
128
+ return True
129
+ except Exception:
130
+ return False
131
+
132
+
133
+ def get_visuals_backend(provider: Optional[str]) -> VisualsBackend:
134
+ mode = (provider or "auto").lower()
135
+ if mode == "off":
136
+ return _OffBackend()
137
+ if mode == "tqdm":
138
+ return _BasicBackend()
139
+ if mode == "rich":
140
+ return _RichBackend() if _rich_available() else _BasicBackend()
141
+ # auto
142
+ if _rich_available() and _is_tty():
143
+ return _RichBackend()
144
+ return _BasicBackend()
145
+
146
+
147
+ @contextmanager
148
+ def visual_sources(runtime: Runtime, log_level: int, provider: Optional[str] = None, progress_style: str = "auto"):
149
+ backend = get_visuals_backend(provider)
150
+ with backend.wrap_sources(runtime, log_level, progress_style):
134
151
  yield
135
- finally:
136
- # Restore original sources
137
- for alias, src in originals.items():
138
- reg.register(alias, src)
@@ -0,0 +1,260 @@
1
+ from typing import Iterator, Any, Optional
2
+ from contextlib import contextmanager
3
+ from itertools import cycle
4
+ from pathlib import Path
5
+ import logging
6
+ import os
7
+ import threading
8
+ import time
9
+
10
+ from .labels import progress_meta_for_loader
11
+ from datapipeline.runtime import Runtime
12
+ from datapipeline.sources.models.source import Source
13
+ from datapipeline.sources.transports import FsGlobTransport
14
+ from tqdm import tqdm
15
+ from .common import (
16
+ compute_glob_root,
17
+ current_transport_label,
18
+ log_combined_stream,
19
+ log_transport_details,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class VisualSourceProxy(Source):
26
+ """Proxy wrapping Source.stream() with CLI feedback scaled by logging level."""
27
+
28
+ def __init__(self, inner: Source, alias: str, verbosity: int):
29
+ self._inner = inner
30
+ self._alias = alias
31
+ self._verbosity = max(0, min(verbosity, 2))
32
+
33
+ @staticmethod
34
+ def _start_spinner(label: str):
35
+ """Start a background spinner tqdm progress bar."""
36
+ bar = tqdm(
37
+ total=0,
38
+ desc="",
39
+ bar_format="{desc}",
40
+ dynamic_ncols=True,
41
+ leave=False,
42
+ )
43
+ state = {"base": label}
44
+ bar.set_description_str(label)
45
+ bar.refresh()
46
+
47
+ stop_event = threading.Event()
48
+
49
+ def _spin():
50
+ frames = cycle((" |", " /", " -", " \\"))
51
+ while not stop_event.is_set():
52
+ bar.set_description_str(f"{state['base']}{next(frames)}")
53
+ bar.refresh()
54
+ time.sleep(0.1)
55
+ bar.set_description_str(state["base"])
56
+ bar.refresh()
57
+
58
+ worker = threading.Thread(target=_spin, daemon=True)
59
+ worker.start()
60
+ return state, stop_event, worker, bar
61
+
62
+ @staticmethod
63
+ def _stop_spinner(stop_event, worker, bar):
64
+ stop_event.set()
65
+ worker.join()
66
+ try:
67
+ bar.close()
68
+ finally:
69
+ fp = getattr(bar, "fp", None)
70
+ try:
71
+ if getattr(bar, "disable", False):
72
+ return
73
+ if fp and hasattr(fp, "write"):
74
+ fp.write("\n")
75
+ fp.flush()
76
+ else:
77
+ print()
78
+ except Exception:
79
+ pass
80
+
81
+ def _count_with_indicator(self, label: str) -> Optional[int]:
82
+ try:
83
+ _, stop_event, worker, bar = self._start_spinner(label)
84
+ except Exception:
85
+ # If spinner setup fails, silently fall back to raw count
86
+ return self._safe_count()
87
+
88
+ try:
89
+ return self._safe_count()
90
+ finally:
91
+ self._stop_spinner(stop_event, worker, bar)
92
+
93
+ def _safe_count(self) -> Optional[int]:
94
+ try:
95
+ return self._inner.count()
96
+ except Exception:
97
+ return None
98
+
99
+ def _log_source_details(self, transport, root: Optional[Path]) -> None:
100
+ # Use visuals-agnostic helper so behavior matches rich/basic
101
+ log_transport_details(transport, self._alias)
102
+
103
+ def stream(self) -> Iterator[Any]:
104
+ loader = getattr(self._inner, "loader", None)
105
+ desc, unit = progress_meta_for_loader(loader)
106
+ prefix, sep, suffix = desc.partition(": ")
107
+ header = f"{prefix}:" if sep else desc
108
+ tail = suffix if sep else None
109
+ label = f"[{self._alias}] Preparing data stream"
110
+
111
+ transport = getattr(loader, "transport", None)
112
+
113
+ glob_root: Optional[Path] = None
114
+ if isinstance(transport, FsGlobTransport):
115
+ glob_root = compute_glob_root(transport.files)
116
+
117
+ last_path_label: Optional[str] = None
118
+
119
+ def compose_desc(name: Optional[str]) -> str:
120
+ if name:
121
+ base = header if sep else desc
122
+ return f"[{self._alias}] {base} {name}".rstrip()
123
+ if tail:
124
+ return f"[{self._alias}] {header} {tail}".rstrip()
125
+ return f"[{self._alias}] {desc}"
126
+
127
+ def maybe_update_label(apply_label):
128
+ nonlocal last_path_label
129
+ current_label = current_transport_label(transport, glob_root=glob_root)
130
+ if not current_label or current_label == last_path_label:
131
+ return
132
+ last_path_label = current_label
133
+ apply_label(current_label)
134
+
135
+ emitted = 0
136
+ if self._verbosity >= 2:
137
+ total = self._count_with_indicator(label)
138
+
139
+ bar = tqdm(
140
+ total=total,
141
+ desc=compose_desc(None),
142
+ unit=unit,
143
+ dynamic_ncols=True,
144
+ mininterval=0.0,
145
+ miniters=1,
146
+ leave=True,
147
+ )
148
+
149
+ started = False
150
+
151
+ def update_progress(name: str) -> None:
152
+ bar.set_description_str(compose_desc(name))
153
+ bar.refresh()
154
+
155
+ try:
156
+ for item in self._inner.stream():
157
+ if not started:
158
+ # Emit transport details on first item for correct ordering (DEBUG verbosity)
159
+ self._log_source_details(transport, glob_root)
160
+ started = True
161
+ maybe_update_label(update_progress)
162
+ bar.update()
163
+ emitted += 1
164
+ yield item
165
+ finally:
166
+ bar.close()
167
+ if logger.isEnabledFor(logging.INFO):
168
+ try:
169
+ unit_label = f" {unit}" if unit else ""
170
+ logger.info("[%s] Stream complete (%d%s) ✔",
171
+ self._alias, emitted, unit_label)
172
+ except Exception:
173
+ pass
174
+ return
175
+
176
+ try:
177
+ state, stop_event, worker, bar = self._start_spinner(
178
+ compose_desc(None))
179
+ except Exception:
180
+ # Spinner isn't critical; fall back to raw stream
181
+ yield from self._inner.stream()
182
+ return
183
+
184
+ def update_spinner(name: str) -> None:
185
+ state["base"] = compose_desc(name)
186
+ bar.set_description_str(state["base"])
187
+ bar.refresh()
188
+
189
+ started = False
190
+ try:
191
+ for item in self._inner.stream():
192
+ if not started:
193
+ # Emit transport details at the start for correct grouping
194
+ self._log_source_details(transport, glob_root)
195
+ started = True
196
+ maybe_update_label(update_spinner)
197
+ emitted += 1
198
+ yield item
199
+ finally:
200
+ self._stop_spinner(stop_event, worker, bar)
201
+ if logger.isEnabledFor(logging.INFO):
202
+ try:
203
+ unit_label = f" {unit}" if unit else ""
204
+ logger.info("[%s] Stream complete (%d%s) ✔",
205
+ self._alias, emitted, unit_label)
206
+ except Exception:
207
+ pass
208
+
209
+
210
+ def _style_mode(progress_style: str, log_level: int | None) -> str:
211
+ mode = (progress_style or "auto").lower()
212
+ if mode == "auto":
213
+ level = log_level if log_level is not None else logging.INFO
214
+ return "bars" if level <= logging.DEBUG else "spinner"
215
+ return mode
216
+
217
+
218
+ @contextmanager
219
+ def visual_sources(runtime: Runtime, log_level: int | None, progress_style: str = "auto"):
220
+ """Temporarily wrap stream sources with logging-level-driven feedback."""
221
+ level = log_level if log_level is not None else logging.INFO
222
+ style_mode = _style_mode(progress_style, log_level)
223
+ if style_mode == "off" or level > logging.INFO:
224
+ yield
225
+ return
226
+
227
+ verbosity = 2 if style_mode == "bars" else 1
228
+
229
+ reg = runtime.registries.stream_sources
230
+ originals = dict(reg.items())
231
+ try:
232
+ # Lightweight proxy that only prints a composed header when actually streamed
233
+ class _ComposedHeaderProxy:
234
+ def __init__(self, inner, alias: str):
235
+ self._inner = inner
236
+ self._alias = alias
237
+
238
+ def stream(self): # Iterator[Any]
239
+ detail_entries: Optional[list[str]] = None
240
+ try:
241
+ spec = getattr(self._inner, "_spec", None)
242
+ inputs = getattr(spec, "inputs", None)
243
+ if isinstance(inputs, (list, tuple)) and inputs:
244
+ detail_entries = [str(item) for item in inputs]
245
+ except Exception:
246
+ detail_entries = None
247
+ log_combined_stream(self._alias, detail_entries)
248
+ yield from self._inner.stream()
249
+
250
+ for alias, src in originals.items():
251
+ # Wrap composed/virtual sources with a header-only proxy; others with visuals
252
+ if getattr(src, "loader", None) is None:
253
+ reg.register(alias, _ComposedHeaderProxy(src, alias))
254
+ else:
255
+ reg.register(alias, VisualSourceProxy(src, alias, verbosity))
256
+ yield
257
+ finally:
258
+ # Restore original sources
259
+ for alias, src in originals.items():
260
+ reg.register(alias, src)
@@ -0,0 +1,76 @@
1
+ from contextlib import contextmanager
2
+ from typing import Iterator, Any, Optional
3
+ import logging
4
+
5
+ from datapipeline.runtime import Runtime
6
+ from datapipeline.sources.models.source import Source
7
+
8
+ from .labels import progress_meta_for_loader
9
+ from .common import log_transport_details, log_combined_stream
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class _OffSourceProxy(Source):
15
+ def __init__(self, inner: Source, alias: str):
16
+ self._inner = inner
17
+ self._alias = alias
18
+
19
+ def stream(self) -> Iterator[Any]:
20
+ loader = getattr(self._inner, "loader", None)
21
+ transport = getattr(loader, "transport", None)
22
+ _, unit = progress_meta_for_loader(loader)
23
+ emitted = 0
24
+ started = False
25
+ try:
26
+ for item in self._inner.stream():
27
+ if not started:
28
+ try:
29
+ log_transport_details(transport, self._alias)
30
+ except Exception:
31
+ pass
32
+ started = True
33
+ emitted += 1
34
+ yield item
35
+ finally:
36
+ if logger.isEnabledFor(logging.INFO):
37
+ unit_label = f" {unit}" if unit else ""
38
+ logger.info("[%s] Stream complete (%d%s) ✔", self._alias, emitted, unit_label)
39
+
40
+
41
+ @contextmanager
42
+ def visual_sources(runtime: Runtime, log_level: int | None, progress_style: str = "auto"):
43
+ if log_level is None or log_level > logging.INFO:
44
+ yield
45
+ return
46
+
47
+ reg = runtime.registries.stream_sources
48
+ originals = dict(reg.items())
49
+
50
+ try:
51
+ class _ComposedHeaderProxy:
52
+ def __init__(self, inner, alias: str):
53
+ self._inner = inner
54
+ self._alias = alias
55
+
56
+ def stream(self):
57
+ detail_entries: Optional[list[str]] = None
58
+ try:
59
+ spec = getattr(self._inner, "_spec", None)
60
+ inputs = getattr(spec, "inputs", None)
61
+ if isinstance(inputs, (list, tuple)) and inputs:
62
+ detail_entries = [str(item) for item in inputs]
63
+ except Exception:
64
+ detail_entries = None
65
+ log_combined_stream(self._alias, detail_entries)
66
+ yield from self._inner.stream()
67
+
68
+ for alias, src in originals.items():
69
+ if getattr(src, "loader", None) is None:
70
+ reg.register(alias, _ComposedHeaderProxy(src, alias))
71
+ else:
72
+ reg.register(alias, _OffSourceProxy(src, alias))
73
+ yield
74
+ finally:
75
+ for alias, src in originals.items():
76
+ reg.register(alias, src)