pirate-frb 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pirate_frb-0.0.1/Makefile +288 -0
- pirate_frb-0.0.1/PKG-INFO +7 -0
- pirate_frb-0.0.1/include/pirate/DedispersionConfig.hpp +86 -0
- pirate_frb-0.0.1/include/pirate/DedispersionPlan.hpp +100 -0
- pirate_frb-0.0.1/include/pirate/avx256/downsample.hpp +571 -0
- pirate_frb-0.0.1/include/pirate/avx256/m128_outbuf.hpp +55 -0
- pirate_frb-0.0.1/include/pirate/avx256/m64_outbuf.hpp +103 -0
- pirate_frb-0.0.1/include/pirate/constants.hpp +38 -0
- pirate_frb-0.0.1/include/pirate/gpu/DownsampleKernel.hpp +157 -0
- pirate_frb-0.0.1/include/pirate/gpu/TransposeKernel.hpp +64 -0
- pirate_frb-0.0.1/include/pirate/gpu/reduce2.hpp +64 -0
- pirate_frb-0.0.1/include/pirate/internals/Directory.hpp +49 -0
- pirate_frb-0.0.1/include/pirate/internals/Epoll.hpp +74 -0
- pirate_frb-0.0.1/include/pirate/internals/FakeCorrelator.hpp +58 -0
- pirate_frb-0.0.1/include/pirate/internals/FakeServer.hpp +139 -0
- pirate_frb-0.0.1/include/pirate/internals/File.hpp +39 -0
- pirate_frb-0.0.1/include/pirate/internals/GpuDedispersionKernel.hpp +105 -0
- pirate_frb-0.0.1/include/pirate/internals/ReferenceDedisperser.hpp +96 -0
- pirate_frb-0.0.1/include/pirate/internals/ReferenceDedispersionKernel.hpp +51 -0
- pirate_frb-0.0.1/include/pirate/internals/ReferenceLagbuf.hpp +37 -0
- pirate_frb-0.0.1/include/pirate/internals/ReferenceLaggedDownsamplingKernel.hpp +64 -0
- pirate_frb-0.0.1/include/pirate/internals/ReferenceTree.hpp +63 -0
- pirate_frb-0.0.1/include/pirate/internals/Socket.hpp +86 -0
- pirate_frb-0.0.1/include/pirate/internals/UntypedArray.hpp +38 -0
- pirate_frb-0.0.1/include/pirate/internals/YamlFile.hpp +196 -0
- pirate_frb-0.0.1/include/pirate/internals/bitvec.hpp +104 -0
- pirate_frb-0.0.1/include/pirate/internals/cpu_downsample.hpp +20 -0
- pirate_frb-0.0.1/include/pirate/internals/dedispersion_inbufs.hpp +139 -0
- pirate_frb-0.0.1/include/pirate/internals/dedispersion_kernel_implementation.hpp +2346 -0
- pirate_frb-0.0.1/include/pirate/internals/dedispersion_outbufs.hpp +127 -0
- pirate_frb-0.0.1/include/pirate/internals/file_utils.hpp +33 -0
- pirate_frb-0.0.1/include/pirate/internals/gpu_downsample.hpp +19 -0
- pirate_frb-0.0.1/include/pirate/internals/gpu_transpose.hpp +17 -0
- pirate_frb-0.0.1/include/pirate/internals/inlines.hpp +164 -0
- pirate_frb-0.0.1/include/pirate/internals/system_utils.hpp +39 -0
- pirate_frb-0.0.1/include/pirate/internals/utils.hpp +78 -0
- pirate_frb-0.0.1/makefile_helper.py +72 -0
- pirate_frb-0.0.1/pirate_frb/__init__.py +6 -0
- pirate_frb-0.0.1/pyproject.toml +15 -0
- pirate_frb-0.0.1/src_bin/fake_correlator.cu +21 -0
- pirate_frb-0.0.1/src_bin/fake_server.cu +152 -0
- pirate_frb-0.0.1/src_bin/scratch.cu +26 -0
- pirate_frb-0.0.1/src_bin/show_dedispersion_plan.cu +30 -0
- pirate_frb-0.0.1/src_bin/test-avx256-m64-outbuf.cu +116 -0
- pirate_frb-0.0.1/src_bin/test-cpu-downsampler.cu +75 -0
- pirate_frb-0.0.1/src_bin/test-gpu-dedispersion-kernels.cu +337 -0
- pirate_frb-0.0.1/src_bin/test-gpu-downsample.cu +108 -0
- pirate_frb-0.0.1/src_bin/test-gpu-lagged-downsampler.cu +286 -0
- pirate_frb-0.0.1/src_bin/test-gpu-reduce2.cu +70 -0
- pirate_frb-0.0.1/src_bin/test-gpu-transpose.cu +53 -0
- pirate_frb-0.0.1/src_bin/test-reference-dedisperser.cu +113 -0
- pirate_frb-0.0.1/src_bin/test-reference-tree.cu +371 -0
- pirate_frb-0.0.1/src_bin/time-cpu-downsample.cu +179 -0
- pirate_frb-0.0.1/src_bin/time-gpu-dedispersion-kernels.cu +90 -0
- pirate_frb-0.0.1/src_bin/time-gpu-downsample.cu +63 -0
- pirate_frb-0.0.1/src_bin/time-gpu-lagged-downsampler.cu +108 -0
- pirate_frb-0.0.1/src_bin/time-gpu-transpose.cu +41 -0
- pirate_frb-0.0.1/src_lib/DedispersionConfig.cu +320 -0
- pirate_frb-0.0.1/src_lib/DedispersionPlan.cu +293 -0
- pirate_frb-0.0.1/src_lib/Directory.cu +44 -0
- pirate_frb-0.0.1/src_lib/Epoll.cu +110 -0
- pirate_frb-0.0.1/src_lib/FakeCorrelator.cu +112 -0
- pirate_frb-0.0.1/src_lib/FakeServer.cu +1003 -0
- pirate_frb-0.0.1/src_lib/File.cu +75 -0
- pirate_frb-0.0.1/src_lib/GpuDedispersionKernel.cu +613 -0
- pirate_frb-0.0.1/src_lib/GpuLaggedDownsamplingKernel.cu +975 -0
- pirate_frb-0.0.1/src_lib/ReferenceDedisperser.cu +637 -0
- pirate_frb-0.0.1/src_lib/ReferenceDedispersionKernel.cu +206 -0
- pirate_frb-0.0.1/src_lib/ReferenceLagbuf.cu +116 -0
- pirate_frb-0.0.1/src_lib/ReferenceLaggedDownsamplingKernel.cu +153 -0
- pirate_frb-0.0.1/src_lib/ReferenceTree.cu +162 -0
- pirate_frb-0.0.1/src_lib/Socket.cu +308 -0
- pirate_frb-0.0.1/src_lib/UntypedArray.cu +144 -0
- pirate_frb-0.0.1/src_lib/YamlFile.cu +133 -0
- pirate_frb-0.0.1/src_lib/cpu_downsample.cu +76 -0
- pirate_frb-0.0.1/src_lib/file_utils.cu +134 -0
- pirate_frb-0.0.1/src_lib/gpu_downsample.cu +87 -0
- pirate_frb-0.0.1/src_lib/gpu_transpose.cu +45 -0
- pirate_frb-0.0.1/src_lib/system_utils.cu +119 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_float16.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_float32.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_nolag_float16.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_nolag_float32.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage0_float16.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage0_float32.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage1_float16.cu +11 -0
- pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage1_float32.cu +11 -0
- pirate_frb-0.0.1/src_lib/utils.cu +269 -0
- pirate_frb-0.0.1/src_pybind11/pirate_pybind11.cu +30 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# This Makefile will be invoked by the python build system (e.g. via 'pip install'),
|
|
2
|
+
# but you can also build individual targets by invoking 'make' directly.
|
|
3
|
+
|
|
4
|
+
# Disable built-in rules and variables (must be first).
|
|
5
|
+
MAKEFLAGS += --no-builtin-rules
|
|
6
|
+
MAKEFLAGS += --no-builtin-variables
|
|
7
|
+
|
|
8
|
+
# Default target 'all' must be first target in Makefile.
|
|
9
|
+
# The 'bin' target builds a bunch of binaries in bin/...
|
|
10
|
+
# The 'lib' target builds the C++ library lib/libpirate.so, and the python extension pirate_frb/pirate_pybind11...so.
|
|
11
|
+
# The 'build_wheel' and 'build_sdist' targets are invoked by 'pip' (or 'make all').
|
|
12
|
+
all: bin lib build_wheel build_sdist
|
|
13
|
+
|
|
14
|
+
.PHONY: all bin lib build_wheel build_sdist clean
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
####################################################################################################
|
|
18
|
+
#
|
|
19
|
+
# Variables encoding configuration: PYTHON, NVCC, NVCC_ARCH, NVCC_DEPFLAGS.
|
|
20
|
+
#
|
|
21
|
+
# FIXME some day I'll define a configure-script mechanism for setting these variables.
|
|
22
|
+
# For now, if you want to change the defaults, just edit the Makfile.
|
|
23
|
+
|
|
24
|
+
PYTHON ?= python3
|
|
25
|
+
NVCC ?= nvcc -std=c++17 -m64 -O3 --compiler-options -Wall,-fPIC,-march=x86-64-v3
|
|
26
|
+
|
|
27
|
+
# Extra nvcc flags needed to build Makefile dependencies
|
|
28
|
+
# -MMD create dep file, omitting "system" headers
|
|
29
|
+
# -MP add phony target for each header in dep file (makes error reporting less confusing)
|
|
30
|
+
# Note: we don't need "-MT $@", since we use in-tree object filenames (x.cu -> x.o).
|
|
31
|
+
# Note: we don't need "-MT $*.d", since we use in-tree depfile names (x.cu -> x.d).
|
|
32
|
+
NVCC_DEPFLAGS ?= -MMD -MP
|
|
33
|
+
|
|
34
|
+
# NVIDIA archictecture.
|
|
35
|
+
DEFAULT_NVCC_ARCH = -gencode arch=compute_80,code=sm_80
|
|
36
|
+
DEFAULT_NVCC_ARCH += -gencode arch=compute_86,code=sm_86
|
|
37
|
+
DEFAULT_NVCC_ARCH += -gencode arch=compute_89,code=sm_89
|
|
38
|
+
# DEFAULT_ARCH += -gencode arch=compute_90,code=sm_90
|
|
39
|
+
NVCC_ARCH ?= $(DEFAULT_NVCC_ARCH)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
####################################################################################################
|
|
43
|
+
#
|
|
44
|
+
# "Derived" config variables: PYTHON_INCDIR, NUMPY_INCDIR, PYBIND11_INCDIR, PYEXT_SUFFIX, KSGPU_DIR.
|
|
45
|
+
#
|
|
46
|
+
# These are autogenerated by makefile_helper.py, and cached in makefile_helper.out.
|
|
47
|
+
# PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
ifneq ($(MAKECMDGOALS),clean)
|
|
51
|
+
include makefile_helper.out
|
|
52
|
+
endif
|
|
53
|
+
|
|
54
|
+
makefile_helper.out: makefile_helper.py Makefile
|
|
55
|
+
$(PYTHON) makefile_helper.py
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
####################################################################################################
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# The main output of the build process is these two libraries.
|
|
62
|
+
# Reminder: PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.
|
|
63
|
+
PIRATE_LIB := lib/libpirate.so
|
|
64
|
+
PIRATE_PYEXT := pirate_frb/pirate_pybind11$(PYEXT_SUFFIX)
|
|
65
|
+
|
|
66
|
+
# These get compiled into lib/libpirate.so.
|
|
67
|
+
LIB_SRCFILES = \
|
|
68
|
+
src_lib/cpu_downsample.cu \
|
|
69
|
+
src_lib/file_utils.cu \
|
|
70
|
+
src_lib/gpu_downsample.cu \
|
|
71
|
+
src_lib/gpu_transpose.cu \
|
|
72
|
+
src_lib/system_utils.cu \
|
|
73
|
+
src_lib/utils.cu \
|
|
74
|
+
src_lib/DedispersionConfig.cu \
|
|
75
|
+
src_lib/DedispersionPlan.cu \
|
|
76
|
+
src_lib/Directory.cu \
|
|
77
|
+
src_lib/Epoll.cu \
|
|
78
|
+
src_lib/FakeCorrelator.cu \
|
|
79
|
+
src_lib/FakeServer.cu \
|
|
80
|
+
src_lib/File.cu \
|
|
81
|
+
src_lib/GpuDedispersionKernel.cu \
|
|
82
|
+
src_lib/GpuLaggedDownsamplingKernel.cu \
|
|
83
|
+
src_lib/ReferenceDedisperser.cu \
|
|
84
|
+
src_lib/ReferenceDedispersionKernel.cu \
|
|
85
|
+
src_lib/ReferenceLagbuf.cu \
|
|
86
|
+
src_lib/ReferenceLaggedDownsamplingKernel.cu \
|
|
87
|
+
src_lib/ReferenceTree.cu \
|
|
88
|
+
src_lib/Socket.cu \
|
|
89
|
+
src_lib/UntypedArray.cu \
|
|
90
|
+
src_lib/YamlFile.cu \
|
|
91
|
+
src_lib/template_instantiations/dedisp_simple_float16.cu \
|
|
92
|
+
src_lib/template_instantiations/dedisp_simple_float32.cu \
|
|
93
|
+
src_lib/template_instantiations/dedisp_simple_nolag_float16.cu \
|
|
94
|
+
src_lib/template_instantiations/dedisp_simple_nolag_float32.cu \
|
|
95
|
+
src_lib/template_instantiations/dedisp_stage0_float16.cu \
|
|
96
|
+
src_lib/template_instantiations/dedisp_stage0_float32.cu \
|
|
97
|
+
src_lib/template_instantiations/dedisp_stage1_float16.cu \
|
|
98
|
+
src_lib/template_instantiations/dedisp_stage1_float32.cu
|
|
99
|
+
|
|
100
|
+
# These get compiled into pirate_frb/pirate_pybind11....so.
|
|
101
|
+
PYEXT_SRCFILES = \
|
|
102
|
+
src_pybind11/pirate_pybind11.cu
|
|
103
|
+
|
|
104
|
+
# Must list all python source files here.
|
|
105
|
+
# (Otherwise they won't show up in 'pip install' or pypi.)
|
|
106
|
+
PYFILES = \
|
|
107
|
+
pirate_frb/__init__.py
|
|
108
|
+
|
|
109
|
+
# These are in 1-1 corresponding with executables in bin/
|
|
110
|
+
# For example, 'src_bin/fake_correlator.cu' gets compiled to 'bin/fake_correlator'.
|
|
111
|
+
BIN_SRCFILES = \
|
|
112
|
+
src_bin/fake_correlator.cu \
|
|
113
|
+
src_bin/fake_server.cu \
|
|
114
|
+
src_bin/scratch.cu \
|
|
115
|
+
src_bin/show_dedispersion_plan.cu \
|
|
116
|
+
src_bin/test-avx256-m64-outbuf.cu \
|
|
117
|
+
src_bin/test-cpu-downsampler.cu \
|
|
118
|
+
src_bin/test-gpu-dedispersion-kernels.cu \
|
|
119
|
+
src_bin/test-gpu-downsample.cu \
|
|
120
|
+
src_bin/test-gpu-lagged-downsampler.cu \
|
|
121
|
+
src_bin/test-gpu-reduce2.cu \
|
|
122
|
+
src_bin/test-gpu-transpose.cu \
|
|
123
|
+
src_bin/test-reference-dedisperser.cu \
|
|
124
|
+
src_bin/test-reference-tree.cu \
|
|
125
|
+
src_bin/time-cpu-downsample.cu \
|
|
126
|
+
src_bin/time-gpu-dedispersion-kernels.cu \
|
|
127
|
+
src_bin/time-gpu-downsample.cu \
|
|
128
|
+
src_bin/time-gpu-lagged-downsampler.cu \
|
|
129
|
+
src_bin/time-gpu-transpose.cu
|
|
130
|
+
|
|
131
|
+
# Must list all header files here.
|
|
132
|
+
# (Otherwise they won't show up in 'pip install' or pypi.)
|
|
133
|
+
HFILES = \
|
|
134
|
+
include/pirate/constants.hpp \
|
|
135
|
+
include/pirate/DedispersionConfig.hpp \
|
|
136
|
+
include/pirate/DedispersionPlan.hpp \
|
|
137
|
+
include/pirate/avx256/downsample.hpp \
|
|
138
|
+
include/pirate/avx256/m64_outbuf.hpp \
|
|
139
|
+
include/pirate/avx256/m128_outbuf.hpp \
|
|
140
|
+
include/pirate/gpu/reduce2.hpp \
|
|
141
|
+
include/pirate/gpu/DownsampleKernel.hpp \
|
|
142
|
+
include/pirate/gpu/TransposeKernel.hpp \
|
|
143
|
+
include/pirate/internals/bitvec.hpp \
|
|
144
|
+
include/pirate/internals/cpu_downsample.hpp \
|
|
145
|
+
include/pirate/internals/dedispersion_kernel_implementation.hpp \
|
|
146
|
+
include/pirate/internals/dedispersion_inbufs.hpp \
|
|
147
|
+
include/pirate/internals/dedispersion_outbufs.hpp \
|
|
148
|
+
include/pirate/internals/gpu_downsample.hpp \
|
|
149
|
+
include/pirate/internals/gpu_transpose.hpp \
|
|
150
|
+
include/pirate/internals/file_utils.hpp \
|
|
151
|
+
include/pirate/internals/inlines.hpp \
|
|
152
|
+
include/pirate/internals/system_utils.hpp \
|
|
153
|
+
include/pirate/internals/utils.hpp \
|
|
154
|
+
include/pirate/internals/Directory.hpp \
|
|
155
|
+
include/pirate/internals/Epoll.hpp \
|
|
156
|
+
include/pirate/internals/FakeCorrelator.hpp \
|
|
157
|
+
include/pirate/internals/FakeServer.hpp \
|
|
158
|
+
include/pirate/internals/File.hpp \
|
|
159
|
+
include/pirate/internals/GpuDedispersionKernel.hpp \
|
|
160
|
+
include/pirate/internals/ReferenceDedisperser.hpp \
|
|
161
|
+
include/pirate/internals/ReferenceDedispersionKernel.hpp \
|
|
162
|
+
include/pirate/internals/ReferenceLagbuf.hpp \
|
|
163
|
+
include/pirate/internals/ReferenceLaggedDownsamplingKernel.hpp \
|
|
164
|
+
include/pirate/internals/ReferenceTree.hpp \
|
|
165
|
+
include/pirate/internals/Socket.hpp \
|
|
166
|
+
include/pirate/internals/UntypedArray.hpp \
|
|
167
|
+
include/pirate/internals/YamlFile.hpp
|
|
168
|
+
|
|
169
|
+
# 'make clean' deletes {*~, *.o, *.d, *.so, *.pyc} from these dirs.
|
|
170
|
+
CLEAN_DIRS := . lib src_bin src_lib src_lib/template_instantiations pirate_frb/__pycache__ include include/pirate include/pirate/avx256 include/pirate/gpu include/pirate/internals
|
|
171
|
+
|
|
172
|
+
# Extra files to be deleted by 'make clean'.
|
|
173
|
+
# Note that 'pirate_frb/include' and 'pirate_frb/lib' are symlinks, so we put them in CLEAN_FILES, not CLEAN_RMDIRS
|
|
174
|
+
CLEAN_FILES := sdist_files.txt wheel_files.txt makefile_helper.out pirate_frb/include pirate_frb/lib
|
|
175
|
+
|
|
176
|
+
# Directories that should be empty at the end of 'make clean', and can be deleted.
|
|
177
|
+
CLEAN_RMDIRS := bin lib pirate_frb/__pycache__
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
####################################################################################################
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
LIB_OFILES := $(LIB_SRCFILES:%.cu=%.o)
|
|
184
|
+
PYEXT_OFILES := $(PYEXT_SRCFILES:%.cu=%.o)
|
|
185
|
+
BIN_XFILES := $(BIN_SRCFILES:src_bin/%.cu=bin/%)
|
|
186
|
+
|
|
187
|
+
# Must include all .d files, or build will break!
|
|
188
|
+
ALL_SRCFILES := $(LIB_SRCFILES) $(PYEXT_SRCFILES) $(BIN_SRCFILES)
|
|
189
|
+
DEPFILES := $(ALL_SRCFILES:%.cu=%.d)
|
|
190
|
+
|
|
191
|
+
SDIST_FILES := pyproject.toml Makefile makefile_helper.py
|
|
192
|
+
SDIST_FILES += $(PYFILES) $(ALL_SRCFILES) $(HFILES)
|
|
193
|
+
|
|
194
|
+
# Some symlinks for the wheel:
|
|
195
|
+
# - header file include/%.hpp gets symlinked to pirate_frb/include/%.hpp
|
|
196
|
+
# - library lib/libpirate.so gets symlinked to pirate_frb/lib/libpirate.so
|
|
197
|
+
# - python extension pirate_frb/pirate_pybind11...so does not need to be symlinked/renamed.
|
|
198
|
+
WHEEL_FILES := $(PYFILES) $(PIRATE_PYEXT) pirate_frb/$(PIRATE_LIB)
|
|
199
|
+
WHEEL_FILES += $(HFILES:%=pirate_frb/%)
|
|
200
|
+
|
|
201
|
+
# Phony targets. The special targets 'build_wheel' and 'build_sdist' are needed by pip/pipmake.
|
|
202
|
+
lib: $(PIRATE_LIB) $(PIRATE_PYEXT)
|
|
203
|
+
bin: $(BIN_XFILES)
|
|
204
|
+
build_wheel: wheel_files.txt $(PIRATE_LIB) $(PIRATE_PYEXT)
|
|
205
|
+
build_sdist: sdist_files.txt
|
|
206
|
+
|
|
207
|
+
# Symlink {include,lib} into python directory 'pirate_frb'.
|
|
208
|
+
pirate_frb/include:
|
|
209
|
+
ln -s ../include $@
|
|
210
|
+
pirate_frb/lib:
|
|
211
|
+
ln -s ../lib $@
|
|
212
|
+
|
|
213
|
+
# Build object files in src_lib/, src_bin/, and src_lib/template_instantiations/ with default flags.
|
|
214
|
+
%.o: %.cu %.d
|
|
215
|
+
$(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -c -o $@ $<
|
|
216
|
+
|
|
217
|
+
# Build object files in src_pybind11/ with special flags.
|
|
218
|
+
src_pybind11/%.o: src_pybind11/%.cu src_pybind11/%.d
|
|
219
|
+
$(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -I$(PYTHON_INCDIR) -I$(NUMPY_INCDIR) -I$(PYBIND11_INCDIR) -c -o $@ $<
|
|
220
|
+
|
|
221
|
+
# Build the C++ library (lib/libpirate.so)
|
|
222
|
+
# We want it to automatically pull in the C++ library $(KSGPU_DIR)/lib/libkspgu.so.
|
|
223
|
+
#
|
|
224
|
+
# The python extension has been built correctly if 'objdump -x' shows the following:
|
|
225
|
+
# NEEDED libksgpu.so
|
|
226
|
+
# RUNPATH $(KSGPU_DIR)/lib # where Makefile var $(KSGPU_DIR) is read from makefile_helper.out
|
|
227
|
+
#
|
|
228
|
+
# The quoting can be understood by working backwards as follows:
|
|
229
|
+
# - g++ command line should look like: g++ -Wl,-rpath="$(KSGPU_DIR)/lib"
|
|
230
|
+
# - nvcc command line should look like: nvcc -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'
|
|
231
|
+
|
|
232
|
+
$(PIRATE_LIB): $(LIB_OFILES)
|
|
233
|
+
@mkdir -p lib
|
|
234
|
+
$(NVCC) $(NVCC_ARCH) -shared -o $@ $^ -lksgpu -lyaml-cpp -L$(KSGPU_DIR)/lib -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'
|
|
235
|
+
|
|
236
|
+
# Build C++ binaries (bin/*)
|
|
237
|
+
# Link flags are similar to previous rule -- see comments above.
|
|
238
|
+
bin/%: src_bin/%.o $(PIRATE_LIB)
|
|
239
|
+
@mkdir -p bin/
|
|
240
|
+
$(NVCC) $(NVCC_ARCH) -o $@ $^ -lksgpu -lyaml-cpp -L$(KSGPU_DIR)/lib -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'
|
|
241
|
+
|
|
242
|
+
# Build the python extension (pirate_frb/pirate_pybind11...so)
|
|
243
|
+
# We want it to automatically pull in the C++ library pirate_frb/lib/libpirate.so.
|
|
244
|
+
#
|
|
245
|
+
# The python extension has been built correctly if 'objdump -x' shows the following:
|
|
246
|
+
# NEEDED libpirate.so
|
|
247
|
+
# RUNPATH $ORIGIN/lib
|
|
248
|
+
#
|
|
249
|
+
# The quoting can be understood by working backwards as follows:
|
|
250
|
+
# - g++ command line should look like: g++ -Wl,-rpath="\$ORIGIN/lib"
|
|
251
|
+
# - nvcc command line should look like: nvcc -Xcompiler '"-Wl,-rpath=\\$ORIGIN/lib"'
|
|
252
|
+
# - Makefile line should look like: nvcc -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'
|
|
253
|
+
#
|
|
254
|
+
# Note that we don't link to libksgpu.so or ksgpu_pybind11...so in this step.
|
|
255
|
+
# These libraries end up getting imported as follows:
|
|
256
|
+
#
|
|
257
|
+
# 1. When 'pirate_frb' is imported, we do 'import ksgpu' (in pirate_frb/__init__.py)
|
|
258
|
+
# before 'import pirate_pybind11'.
|
|
259
|
+
#
|
|
260
|
+
# 2. When 'ksgpu' is imported, we use the "ctypes trick" (see comment in ksgpu/__init__.py)
|
|
261
|
+
# to load the libraries libksgpu.so and ksgpu_pybind11...so with globally visible symbols.
|
|
262
|
+
|
|
263
|
+
$(PIRATE_PYEXT): $(PYEXT_OFILES) $(PIRATE_LIB) pirate_frb/lib
|
|
264
|
+
$(NVCC) $(NVCC_ARCH) -shared -o $@ $(PYEXT_OFILES) -lpirate -Lpirate_frb/lib -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'
|
|
265
|
+
|
|
266
|
+
# Needed by pip/pipmake: list of all files that go into the (non-editable) wheel.
|
|
267
|
+
wheel_files.txt: Makefile pirate_frb/include pirate_frb/lib
|
|
268
|
+
rm -f $@
|
|
269
|
+
for f in $(WHEEL_FILES); do echo $$f; done >>$@
|
|
270
|
+
|
|
271
|
+
# Needed by pip/pipmake: list of all files that go into the sdist.
|
|
272
|
+
sdist_files.txt: Makefile
|
|
273
|
+
rm -f $@
|
|
274
|
+
for f in $(SDIST_FILES); do echo $$f; done >>$@
|
|
275
|
+
|
|
276
|
+
clean:
|
|
277
|
+
@for f in $(foreach d,$(CLEAN_DIRS),$(wildcard $d/*~ $d/*.o $d/*.d $d/*.so $d/*.pyc)); do echo rm $$f; rm $$f; done
|
|
278
|
+
@for f in $(wildcard $(CLEAN_FILES) $(BIN_XFILES)); do echo rm $$f; rm $$f; done
|
|
279
|
+
@for d in $(wildcard $(CLEAN_RMDIRS)); do echo rmdir $$d; rmdir $$d; done
|
|
280
|
+
|
|
281
|
+
# Specifying .SECONDARY with no prerequisites disables auto-deletion of intermediate files.
|
|
282
|
+
.SECONDARY:
|
|
283
|
+
|
|
284
|
+
# If a depfile is absent, build can still proceed.
|
|
285
|
+
$(DEPFILES):
|
|
286
|
+
|
|
287
|
+
# Include any depfiles which are present.
|
|
288
|
+
include $(wildcard $(DEPFILES))
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#ifndef _PIRATE_DEDISPERSION_CONFIG_HPP
|
|
2
|
+
#define _PIRATE_DEDISPERSION_CONFIG_HPP
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <iostream>
|
|
7
|
+
|
|
8
|
+
namespace YAML { class Emitter; } // #include <yaml-cpp/yaml.h>
|
|
9
|
+
namespace pirate { struct YamlFile; } // #include <pirate/internals/YamlFile.hpp>
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
namespace pirate {
|
|
13
|
+
#if 0
|
|
14
|
+
} // editor auto-indent
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
struct DedispersionConfig
|
|
19
|
+
{
|
|
20
|
+
// Core dedispersion parameters.
|
|
21
|
+
ssize_t tree_rank = -1;
|
|
22
|
+
ssize_t num_downsampling_levels = -1;
|
|
23
|
+
ssize_t time_samples_per_chunk = 0;
|
|
24
|
+
|
|
25
|
+
// For now, there is only one dtype, which can be either "float32" or "float16".
|
|
26
|
+
// Later, I might split this into "compute" and "ringbuf" dtypes, and allow compressed
|
|
27
|
+
// dtypes (e.g. float8, int7).
|
|
28
|
+
|
|
29
|
+
std::string dtype; // "float32" or "float16"
|
|
30
|
+
|
|
31
|
+
struct EarlyTrigger
|
|
32
|
+
{
|
|
33
|
+
ssize_t ds_level = -1;
|
|
34
|
+
ssize_t tree_rank = 0;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
// Sorted (by ds_level first, then tree_rank).
|
|
38
|
+
std::vector<EarlyTrigger> early_triggers;
|
|
39
|
+
|
|
40
|
+
// GPU configuration.
|
|
41
|
+
ssize_t beams_per_gpu = 0;
|
|
42
|
+
ssize_t beams_per_batch = 0;
|
|
43
|
+
ssize_t num_active_batches = 0;
|
|
44
|
+
ssize_t gmem_nbytes_per_gpu = 0;
|
|
45
|
+
|
|
46
|
+
void validate() const;
|
|
47
|
+
|
|
48
|
+
// Write in informal text format (e.g. for log files)
|
|
49
|
+
// FIXME I might phase this out, in favor of yaml everywhere.
|
|
50
|
+
void print(std::ostream &os = std::cout, int indent=0) const;
|
|
51
|
+
|
|
52
|
+
// Write in YAML format.
|
|
53
|
+
void to_yaml(YAML::Emitter &emitter) const;
|
|
54
|
+
void to_yaml(const std::string &filename) const;
|
|
55
|
+
std::string to_yaml_string() const;
|
|
56
|
+
|
|
57
|
+
// Construct from YAML file.
|
|
58
|
+
// The 'verbosity' argument has the following meaning:
|
|
59
|
+
// 0 = quiet
|
|
60
|
+
// 1 = announce default values for all unspecified parameters
|
|
61
|
+
// 2 = announce all parameters
|
|
62
|
+
|
|
63
|
+
static DedispersionConfig from_yaml(const std::string &filename, int verbosity=0);
|
|
64
|
+
static DedispersionConfig from_yaml(const YamlFile &file);
|
|
65
|
+
|
|
66
|
+
// Helper functions for constructing DedispersionConfig instances.
|
|
67
|
+
// Add early triggers, while maintaining invariant that 'early_triggers' is sorted.
|
|
68
|
+
void add_early_trigger(ssize_t ds_level, ssize_t tree_rank);
|
|
69
|
+
void add_early_triggers(ssize_t ds_level, std::initializer_list<ssize_t> tree_ranks);
|
|
70
|
+
|
|
71
|
+
// Note: rather than calling this function directly, you probably want the
|
|
72
|
+
// DedispersionPlan (not DedispersionConfig) member 'nelts_per_segment'.
|
|
73
|
+
int get_nelts_per_segment() const;
|
|
74
|
+
|
|
75
|
+
// make_random(): used for unit tests.
|
|
76
|
+
static DedispersionConfig make_random();
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
extern bool operator==(const DedispersionConfig::EarlyTrigger &x, const DedispersionConfig::EarlyTrigger &y);
|
|
80
|
+
extern bool operator>(const DedispersionConfig::EarlyTrigger &x, const DedispersionConfig::EarlyTrigger &y);
|
|
81
|
+
extern std::ostream &operator<<(std::ostream &os, const DedispersionConfig::EarlyTrigger &et);
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
} // namespace pirate
|
|
85
|
+
|
|
86
|
+
#endif // _PIRATE_DEDISPERSION_CONFIG_HPP
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#ifndef _PIRATE_DEDISPERSION_PLAN_HPP
|
|
2
|
+
#define _PIRATE_DEDISPERSION_PLAN_HPP
|
|
3
|
+
|
|
4
|
+
#include "DedispersionConfig.hpp"
|
|
5
|
+
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <memory> // shared_ptr
|
|
8
|
+
#include <ksgpu/Array.hpp>
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
namespace pirate {
|
|
12
|
+
#if 0
|
|
13
|
+
} // editor auto-indent
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
struct DedispersionPlan
|
|
18
|
+
{
|
|
19
|
+
DedispersionPlan(const DedispersionConfig &config);
|
|
20
|
+
|
|
21
|
+
void print(std::ostream &os=std::cout, int indent=0) const;
|
|
22
|
+
|
|
23
|
+
// -------------------- Helper classes --------------------
|
|
24
|
+
|
|
25
|
+
struct Stage0Tree
|
|
26
|
+
{
|
|
27
|
+
// Note: total tree rank (rank0 + rank1) is equal to (config.tree_rank - (ds_level ? 1 : 0)).
|
|
28
|
+
|
|
29
|
+
int ds_level = -1; // downsampling level (downsampling "factor" is 2^level)
|
|
30
|
+
int rank0 = 0; // rank of Stage0Tree
|
|
31
|
+
int rank1 = 0; // rank of subsequent Stage1Tree (if no early trigger)
|
|
32
|
+
int nt_ds = 0; // downsampled time samples per chunk (= config.time_samples_per_chunk / pow2(ds_level))
|
|
33
|
+
|
|
34
|
+
int segments_per_beam = 0; // equal to pow2(rank0+rank1) * (nt_ds / nelts_per_segment)
|
|
35
|
+
int base_segment = 0; // cumulative (over all Stage0Trees) segment count
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
struct Stage1Tree
|
|
39
|
+
{
|
|
40
|
+
int ds_level = -1; // Same as Stage0Tree::ds_level
|
|
41
|
+
int rank0 = 0; // Same as Stage0Tree::rank0
|
|
42
|
+
int rank1_ambient = 0; // Same as Stage0Tree::rank1
|
|
43
|
+
int rank1_trigger = 0; // Can be smaller than rank1_ambient, for early trigger
|
|
44
|
+
int nt_ds = 0; // Same as Stage0Tree::nt_ds
|
|
45
|
+
|
|
46
|
+
int segments_per_beam = 0; // equal to pow2(rank0 + rank1_trigger) * (nt_ds / nelts_per_segment)
|
|
47
|
+
int base_segment = 0; // cumulative (over all Stage1Trees) segment count
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
struct Ringbuf
|
|
51
|
+
{
|
|
52
|
+
long rb_len = 0; // number of (time chunk, beam) pairs
|
|
53
|
+
long nseg_per_beam = 0;
|
|
54
|
+
long base_segment = 0;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// -------------------- Members --------------------
|
|
58
|
+
|
|
59
|
+
const DedispersionConfig config;
|
|
60
|
+
|
|
61
|
+
int nelts_per_segment = 0; // currently always constants::bytes_per_gpu_cache_line / (sizeof config dtype)
|
|
62
|
+
int nbytes_per_segment = 0; // currently always constants::bytes_per_gpu_cache_line
|
|
63
|
+
|
|
64
|
+
std::vector<Stage0Tree> stage0_trees;
|
|
65
|
+
std::vector<Stage1Tree> stage1_trees;
|
|
66
|
+
|
|
67
|
+
ssize_t stage0_total_segments_per_beam = 0;
|
|
68
|
+
ssize_t stage1_total_segments_per_beam = 0;
|
|
69
|
+
|
|
70
|
+
int max_clag = 0;
|
|
71
|
+
long gmem_ringbuf_nseg = 0; // includes gmem + g2h + h2g
|
|
72
|
+
|
|
73
|
+
// All vector<Ringbuf> objects have length (max_clag + 1).
|
|
74
|
+
// T = total beams, A = active beams, B = beams per batch.
|
|
75
|
+
|
|
76
|
+
std::vector<Ringbuf> gmem_ringbufs; // rb_size = (clag*T + A), on GPU
|
|
77
|
+
std::vector<Ringbuf> g2h_ringbufs; // rb_size = min(A+B, T), on GPU
|
|
78
|
+
std::vector<Ringbuf> h2g_ringbufs; // rb_size = min(A+B, T), on GPU
|
|
79
|
+
std::vector<Ringbuf> h2h_ringbufs; // rb_size = (clag*T + B), on host
|
|
80
|
+
|
|
81
|
+
// stage0_output_rb_locs, stage1_input_rb_locs.
|
|
82
|
+
//
|
|
83
|
+
// These arrays contain GPU ringbuf locations, represented as 4 uint32s:
|
|
84
|
+
// uint rb_offset; // in segments, not bytes
|
|
85
|
+
// uint rb_phase; // index of (time chunk, beam) pair, relative to current pair
|
|
86
|
+
// uint rb_len; // number of (time chunk, beam) pairs in ringbuf (same as Ringbuf::rb_len)
|
|
87
|
+
// uint rb_nseg; // number of segments per (time chunk, beam) (same as Ringbuf::nseg_per_beam)
|
|
88
|
+
//
|
|
89
|
+
// The arrays are indexed by:
|
|
90
|
+
// iseg0 -> (time/nelts_per_segment, 2^rank1, 2^rank0)
|
|
91
|
+
// iseg1 -> (time/nelts_per_segment, 2^rank0, 2^rank1) note transpose
|
|
92
|
+
|
|
93
|
+
ksgpu::Array<uint> stage0_rb_locs; // shape (stage0_total_segments_per_beam, 4)
|
|
94
|
+
ksgpu::Array<uint> stage1_rb_locs; // shape (stage1_total_segments_per_beam, 4)
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
} // namespace pirate
|
|
99
|
+
|
|
100
|
+
#endif // _PIRATE_DEDISPERSION_PLAN_HPP
|