fournex 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. fournex-0.1.1/PKG-INFO +91 -0
  2. fournex-0.1.1/README.md +65 -0
  3. fournex-0.1.1/fournex/__init__.py +168 -0
  4. fournex-0.1.1/fournex/__main__.py +7 -0
  5. fournex-0.1.1/fournex/_native.py +9 -0
  6. fournex-0.1.1/fournex/analysis.py +825 -0
  7. fournex-0.1.1/fournex/autopilot/__init__.py +4 -0
  8. fournex-0.1.1/fournex/autopilot/actions.py +134 -0
  9. fournex-0.1.1/fournex/autopilot/benchmark.py +164 -0
  10. fournex-0.1.1/fournex/autopilot/comparison.py +196 -0
  11. fournex-0.1.1/fournex/autopilot/guards.py +63 -0
  12. fournex-0.1.1/fournex/autopilot/local_executor.py +356 -0
  13. fournex-0.1.1/fournex/autopilot/quality.py +122 -0
  14. fournex-0.1.1/fournex/autopilot/report.py +154 -0
  15. fournex-0.1.1/fournex/autopilot/runner.py +519 -0
  16. fournex-0.1.1/fournex/autopilot/safety.py +122 -0
  17. fournex-0.1.1/fournex/autopilot/tuners/__init__.py +165 -0
  18. fournex-0.1.1/fournex/autopilot/tuners/batch_size.py +63 -0
  19. fournex-0.1.1/fournex/autopilot/tuners/dataloader.py +113 -0
  20. fournex-0.1.1/fournex/autopilot/tuners/memory.py +43 -0
  21. fournex-0.1.1/fournex/autopilot/tuners/mixed_precision.py +86 -0
  22. fournex-0.1.1/fournex/autopilot/tuners/runtime.py +82 -0
  23. fournex-0.1.1/fournex/cli.py +1273 -0
  24. fournex-0.1.1/fournex/common_ir.py +254 -0
  25. fournex-0.1.1/fournex/common_ir_analysis.py +241 -0
  26. fournex-0.1.1/fournex/common_ir_validators.py +44 -0
  27. fournex-0.1.1/fournex/cuda_timers.py +191 -0
  28. fournex-0.1.1/fournex/data_pipeline_ir.py +144 -0
  29. fournex-0.1.1/fournex/dataloader.py +115 -0
  30. fournex-0.1.1/fournex/distributed_ir.py +165 -0
  31. fournex-0.1.1/fournex/nvml_ir.py +157 -0
  32. fournex-0.1.1/fournex/profiler.py +199 -0
  33. fournex-0.1.1/fournex/pytorch_profiler_ir.py +256 -0
  34. fournex-0.1.1/fournex/recommendations/__init__.py +4 -0
  35. fournex-0.1.1/fournex/recommendations/engine.py +342 -0
  36. fournex-0.1.1/fournex/recommendations/signals.py +90 -0
  37. fournex-0.1.1/fournex/sdk.py +242 -0
  38. fournex-0.1.1/fournex/shapes.py +91 -0
  39. fournex-0.1.1/fournex/step_context.py +145 -0
  40. fournex-0.1.1/fournex/storage.py +82 -0
  41. fournex-0.1.1/fournex.egg-info/PKG-INFO +91 -0
  42. fournex-0.1.1/fournex.egg-info/SOURCES.txt +47 -0
  43. fournex-0.1.1/fournex.egg-info/dependency_links.txt +1 -0
  44. fournex-0.1.1/fournex.egg-info/entry_points.txt +3 -0
  45. fournex-0.1.1/fournex.egg-info/requires.txt +1 -0
  46. fournex-0.1.1/fournex.egg-info/top_level.txt +2 -0
  47. fournex-0.1.1/pyproject.toml +50 -0
  48. fournex-0.1.1/setup.cfg +4 -0
  49. fournex-0.1.1/setup.py +45 -0
fournex-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: fournex
3
+ Version: 0.1.1
4
+ Summary: Open-source GPU performance profiler and bottleneck analyzer for PyTorch.
5
+ Author-email: Fournex <hello@fournex.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://fournex.com
8
+ Project-URL: Repository, https://github.com/jorgevee/fournex
9
+ Project-URL: Documentation, https://fournex.com/docs
10
+ Project-URL: Bug Tracker, https://github.com/jorgevee/fournex/issues
11
+ Keywords: pytorch,gpu,profiling,cuda,performance,mlops,bottleneck,optimization,training
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: System :: Monitoring
22
+ Classifier: Environment :: GPU :: NVIDIA CUDA
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ Requires-Dist: PyYAML>=6.0
26
+
27
+ # Fournex
28
+
29
+ **Open-source GPU performance profiler and bottleneck analyzer for PyTorch.**
30
+
31
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/jorgevee/fournex/blob/main/LICENSE)
32
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/)
33
+
34
+ Fournex wraps your training script, collects GPU telemetry, and tells you exactly what is slowing it down — with ranked, actionable recommendations.
35
+
36
+ ## Install
37
+
38
+ ```bash
39
+ pip install fournex
40
+ ```
41
+
42
+ ## Quick start
43
+
44
+ ```bash
45
+ # Profile your workload
46
+ frx collect --name my-run -- python train.py
47
+
48
+ # Analyze and get recommendations
49
+ frx analyze runs/run-<id>
50
+
51
+ # Check your environment
52
+ frx doctor
53
+
54
+ # Validate the pipeline end-to-end
55
+ frx smoke-test
56
+ ```
57
+
58
+ ## Detected bottleneck types
59
+
60
+ | Label | Signal |
61
+ |---|---|
62
+ | `input_bound` | DataLoader wait ≥ 20% of step time |
63
+ | `copy_bound` | H2D transfer ≥ 15% of step time |
64
+ | `sync_bound` | Sync wait ≥ 10% of step time |
65
+ | `underutilized_gpu` | GPU utilization < 35% |
66
+ | `memory_pressure` | Peak memory ratio ≥ 90% |
67
+ | `shape_instability` | Shape volatility ≥ 30% |
68
+ | `launch_bound` | Low utilization + profiler windows, no dominant stall |
69
+ | `insufficient_telemetry` | No timing or GPU utilization data |
70
+
71
+ ## Safe config benchmarking
72
+
73
+ ```bash
74
+ frx tune --safe --max-trials 12 -- python train.py
75
+ ```
76
+
77
+ Fournex sweeps DataLoader and runtime configs, benchmarks each one, and recommends the fastest safe candidate — without changing your code.
78
+
79
+ Interrupted or repeated tune runs can reuse completed trial artifacts:
80
+
81
+ ```bash
82
+ frx tune --resume runs/tune-<id> -- python train.py
83
+ ```
84
+
85
+ `--resume` reuses a trial only when the saved `config.yaml`, `benchmark_window.json`, and `metrics.json` match the current workload command and benchmark settings.
86
+
87
+ ## Links
88
+
89
+ - [GitHub](https://github.com/jorgevee/fournex)
90
+ - [Documentation](https://fournex.com/docs)
91
+ - [Website](https://fournex.com)
@@ -0,0 +1,65 @@
1
+ # Fournex
2
+
3
+ **Open-source GPU performance profiler and bottleneck analyzer for PyTorch.**
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/jorgevee/fournex/blob/main/LICENSE)
6
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/)
7
+
8
+ Fournex wraps your training script, collects GPU telemetry, and tells you exactly what is slowing it down — with ranked, actionable recommendations.
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install fournex
14
+ ```
15
+
16
+ ## Quick start
17
+
18
+ ```bash
19
+ # Profile your workload
20
+ frx collect --name my-run -- python train.py
21
+
22
+ # Analyze and get recommendations
23
+ frx analyze runs/run-<id>
24
+
25
+ # Check your environment
26
+ frx doctor
27
+
28
+ # Validate the pipeline end-to-end
29
+ frx smoke-test
30
+ ```
31
+
32
+ ## Detected bottleneck types
33
+
34
+ | Label | Signal |
35
+ |---|---|
36
+ | `input_bound` | DataLoader wait ≥ 20% of step time |
37
+ | `copy_bound` | H2D transfer ≥ 15% of step time |
38
+ | `sync_bound` | Sync wait ≥ 10% of step time |
39
+ | `underutilized_gpu` | GPU utilization < 35% |
40
+ | `memory_pressure` | Peak memory ratio ≥ 90% |
41
+ | `shape_instability` | Shape volatility ≥ 30% |
42
+ | `launch_bound` | Low utilization + profiler windows, no dominant stall |
43
+ | `insufficient_telemetry` | No timing or GPU utilization data |
44
+
45
+ ## Safe config benchmarking
46
+
47
+ ```bash
48
+ frx tune --safe --max-trials 12 -- python train.py
49
+ ```
50
+
51
+ Fournex sweeps DataLoader and runtime configs, benchmarks each one, and recommends the fastest safe candidate — without changing your code.
52
+
53
+ Interrupted or repeated tune runs can reuse completed trial artifacts:
54
+
55
+ ```bash
56
+ frx tune --resume runs/tune-<id> -- python train.py
57
+ ```
58
+
59
+ `--resume` reuses a trial only when the saved `config.yaml`, `benchmark_window.json`, and `metrics.json` match the current workload command and benchmark settings.
60
+
61
+ ## Links
62
+
63
+ - [GitHub](https://github.com/jorgevee/fournex)
64
+ - [Documentation](https://fournex.com/docs)
65
+ - [Website](https://fournex.com)
@@ -0,0 +1,168 @@
1
+ from .sdk import (
2
+ EVENT_LEVELS,
3
+ EVENT_SOURCES,
4
+ EVENT_TYPES,
5
+ SCHEMA_VERSION,
6
+ begin_span,
7
+ build_runtime_event,
8
+ clear_local_events,
9
+ emit_event,
10
+ end_span,
11
+ flush,
12
+ get_local_events,
13
+ get_runtime_config,
14
+ init,
15
+ make_event,
16
+ shutdown,
17
+ )
18
+ from .dataloader import InstrumentedDataLoader, instrument_dataloader
19
+ from .analysis import (
20
+ build_diagnosis_result,
21
+ classify_bottlenecks,
22
+ derive_run_summary,
23
+ derive_step_metrics,
24
+ select_steady_state_step_ids,
25
+ summarize_run,
26
+ summarize_run_with_steady_state,
27
+ summarize_steady_state,
28
+ summarize_step_scope,
29
+ )
30
+ from .common_ir_analysis import (
31
+ derive_ir_bottleneck_annotations,
32
+ derive_ir_run_summary,
33
+ derive_ir_step_summaries,
34
+ summarize_ir_run,
35
+ )
36
+ from .common_ir import (
37
+ AnnotationRecord,
38
+ BOTTLENECK_CLASSES,
39
+ EVENT_FAMILIES,
40
+ EventRecord,
41
+ JobInfo,
42
+ MEMORY_OPS,
43
+ MODEL_FAMILIES,
44
+ MetricRecord,
45
+ RunRecord,
46
+ WORKLOAD_CLASSES,
47
+ WorkloadInfo,
48
+ validate_run_dict,
49
+ )
50
+ from .common_ir_validators import (
51
+ semantic_warnings_for_run,
52
+ validate_annotation_record,
53
+ validate_event_record,
54
+ validate_metric_record,
55
+ validate_run_payload,
56
+ validate_run_record,
57
+ )
58
+ from .cuda_timers import time_memcpy, time_phase, time_region
59
+ from .storage import (
60
+ persist_local_trace,
61
+ persist_run_artifacts,
62
+ persist_run_summary,
63
+ persist_run_with_steady_state_summary,
64
+ )
65
+ from .profiler import (
66
+ ProfilerSchedule,
67
+ configure_sampled_profiler,
68
+ get_profiler_controller,
69
+ profiler_step_end,
70
+ profiler_step_start,
71
+ profiler_window,
72
+ )
73
+ from .pytorch_profiler_ir import (
74
+ PytorchProfilerTrace,
75
+ PytorchProfilerTraceEvent,
76
+ map_pytorch_profiler_to_ir,
77
+ )
78
+ from .nvml_ir import NvmlSampleRecord, map_nvml_sample_to_ir
79
+ from .distributed_ir import DistributedCommRecord, map_distributed_record_to_ir
80
+ from .data_pipeline_ir import DataPipelineRecord, map_data_pipeline_record_to_ir
81
+ from .step_context import phase, step_context
82
+ from .shapes import (
83
+ describe_batch,
84
+ extract_dtypes,
85
+ extract_shapes,
86
+ infer_batch_size,
87
+ infer_sequence_length,
88
+ )
89
+
90
+ __all__ = [
91
+ "AnnotationRecord",
92
+ "BOTTLENECK_CLASSES",
93
+ "DataPipelineRecord",
94
+ "DistributedCommRecord",
95
+ "EVENT_LEVELS",
96
+ "EVENT_FAMILIES",
97
+ "EVENT_SOURCES",
98
+ "EVENT_TYPES",
99
+ "EventRecord",
100
+ "InstrumentedDataLoader",
101
+ "JobInfo",
102
+ "MEMORY_OPS",
103
+ "MODEL_FAMILIES",
104
+ "MetricRecord",
105
+ "NvmlSampleRecord",
106
+ "ProfilerSchedule",
107
+ "PytorchProfilerTrace",
108
+ "PytorchProfilerTraceEvent",
109
+ "RunRecord",
110
+ "SCHEMA_VERSION",
111
+ "WORKLOAD_CLASSES",
112
+ "WorkloadInfo",
113
+ "begin_span",
114
+ "build_diagnosis_result",
115
+ "build_runtime_event",
116
+ "classify_bottlenecks",
117
+ "clear_local_events",
118
+ "derive_ir_bottleneck_annotations",
119
+ "derive_ir_run_summary",
120
+ "derive_ir_step_summaries",
121
+ "describe_batch",
122
+ "derive_run_summary",
123
+ "derive_step_metrics",
124
+ "emit_event",
125
+ "end_span",
126
+ "extract_dtypes",
127
+ "flush",
128
+ "get_local_events",
129
+ "get_profiler_controller",
130
+ "get_runtime_config",
131
+ "init",
132
+ "make_event",
133
+ "configure_sampled_profiler",
134
+ "profiler_step_end",
135
+ "profiler_step_start",
136
+ "profiler_window",
137
+ "select_steady_state_step_ids",
138
+ "shutdown",
139
+ "phase",
140
+ "map_pytorch_profiler_to_ir",
141
+ "map_nvml_sample_to_ir",
142
+ "map_distributed_record_to_ir",
143
+ "map_data_pipeline_record_to_ir",
144
+ "persist_local_trace",
145
+ "persist_run_artifacts",
146
+ "persist_run_summary",
147
+ "persist_run_with_steady_state_summary",
148
+ "step_context",
149
+ "summarize_run",
150
+ "summarize_run_with_steady_state",
151
+ "summarize_steady_state",
152
+ "summarize_step_scope",
153
+ "summarize_ir_run",
154
+ "extract_shapes",
155
+ "infer_batch_size",
156
+ "infer_sequence_length",
157
+ "instrument_dataloader",
158
+ "time_memcpy",
159
+ "time_phase",
160
+ "time_region",
161
+ "validate_run_dict",
162
+ "validate_annotation_record",
163
+ "validate_event_record",
164
+ "validate_metric_record",
165
+ "validate_run_payload",
166
+ "validate_run_record",
167
+ "semantic_warnings_for_run",
168
+ ]
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from .cli import main
4
+
5
+
6
+ if __name__ == "__main__":
7
+ raise SystemExit(main())
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ try:
4
+ from . import _fournex_native as native
5
+ except ImportError:
6
+ native = None
7
+
8
+
9
+ HAS_NATIVE = native is not None