pirate-frb 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. pirate_frb-0.0.1/Makefile +288 -0
  2. pirate_frb-0.0.1/PKG-INFO +7 -0
  3. pirate_frb-0.0.1/include/pirate/DedispersionConfig.hpp +86 -0
  4. pirate_frb-0.0.1/include/pirate/DedispersionPlan.hpp +100 -0
  5. pirate_frb-0.0.1/include/pirate/avx256/downsample.hpp +571 -0
  6. pirate_frb-0.0.1/include/pirate/avx256/m128_outbuf.hpp +55 -0
  7. pirate_frb-0.0.1/include/pirate/avx256/m64_outbuf.hpp +103 -0
  8. pirate_frb-0.0.1/include/pirate/constants.hpp +38 -0
  9. pirate_frb-0.0.1/include/pirate/gpu/DownsampleKernel.hpp +157 -0
  10. pirate_frb-0.0.1/include/pirate/gpu/TransposeKernel.hpp +64 -0
  11. pirate_frb-0.0.1/include/pirate/gpu/reduce2.hpp +64 -0
  12. pirate_frb-0.0.1/include/pirate/internals/Directory.hpp +49 -0
  13. pirate_frb-0.0.1/include/pirate/internals/Epoll.hpp +74 -0
  14. pirate_frb-0.0.1/include/pirate/internals/FakeCorrelator.hpp +58 -0
  15. pirate_frb-0.0.1/include/pirate/internals/FakeServer.hpp +139 -0
  16. pirate_frb-0.0.1/include/pirate/internals/File.hpp +39 -0
  17. pirate_frb-0.0.1/include/pirate/internals/GpuDedispersionKernel.hpp +105 -0
  18. pirate_frb-0.0.1/include/pirate/internals/ReferenceDedisperser.hpp +96 -0
  19. pirate_frb-0.0.1/include/pirate/internals/ReferenceDedispersionKernel.hpp +51 -0
  20. pirate_frb-0.0.1/include/pirate/internals/ReferenceLagbuf.hpp +37 -0
  21. pirate_frb-0.0.1/include/pirate/internals/ReferenceLaggedDownsamplingKernel.hpp +64 -0
  22. pirate_frb-0.0.1/include/pirate/internals/ReferenceTree.hpp +63 -0
  23. pirate_frb-0.0.1/include/pirate/internals/Socket.hpp +86 -0
  24. pirate_frb-0.0.1/include/pirate/internals/UntypedArray.hpp +38 -0
  25. pirate_frb-0.0.1/include/pirate/internals/YamlFile.hpp +196 -0
  26. pirate_frb-0.0.1/include/pirate/internals/bitvec.hpp +104 -0
  27. pirate_frb-0.0.1/include/pirate/internals/cpu_downsample.hpp +20 -0
  28. pirate_frb-0.0.1/include/pirate/internals/dedispersion_inbufs.hpp +139 -0
  29. pirate_frb-0.0.1/include/pirate/internals/dedispersion_kernel_implementation.hpp +2346 -0
  30. pirate_frb-0.0.1/include/pirate/internals/dedispersion_outbufs.hpp +127 -0
  31. pirate_frb-0.0.1/include/pirate/internals/file_utils.hpp +33 -0
  32. pirate_frb-0.0.1/include/pirate/internals/gpu_downsample.hpp +19 -0
  33. pirate_frb-0.0.1/include/pirate/internals/gpu_transpose.hpp +17 -0
  34. pirate_frb-0.0.1/include/pirate/internals/inlines.hpp +164 -0
  35. pirate_frb-0.0.1/include/pirate/internals/system_utils.hpp +39 -0
  36. pirate_frb-0.0.1/include/pirate/internals/utils.hpp +78 -0
  37. pirate_frb-0.0.1/makefile_helper.py +72 -0
  38. pirate_frb-0.0.1/pirate_frb/__init__.py +6 -0
  39. pirate_frb-0.0.1/pyproject.toml +15 -0
  40. pirate_frb-0.0.1/src_bin/fake_correlator.cu +21 -0
  41. pirate_frb-0.0.1/src_bin/fake_server.cu +152 -0
  42. pirate_frb-0.0.1/src_bin/scratch.cu +26 -0
  43. pirate_frb-0.0.1/src_bin/show_dedispersion_plan.cu +30 -0
  44. pirate_frb-0.0.1/src_bin/test-avx256-m64-outbuf.cu +116 -0
  45. pirate_frb-0.0.1/src_bin/test-cpu-downsampler.cu +75 -0
  46. pirate_frb-0.0.1/src_bin/test-gpu-dedispersion-kernels.cu +337 -0
  47. pirate_frb-0.0.1/src_bin/test-gpu-downsample.cu +108 -0
  48. pirate_frb-0.0.1/src_bin/test-gpu-lagged-downsampler.cu +286 -0
  49. pirate_frb-0.0.1/src_bin/test-gpu-reduce2.cu +70 -0
  50. pirate_frb-0.0.1/src_bin/test-gpu-transpose.cu +53 -0
  51. pirate_frb-0.0.1/src_bin/test-reference-dedisperser.cu +113 -0
  52. pirate_frb-0.0.1/src_bin/test-reference-tree.cu +371 -0
  53. pirate_frb-0.0.1/src_bin/time-cpu-downsample.cu +179 -0
  54. pirate_frb-0.0.1/src_bin/time-gpu-dedispersion-kernels.cu +90 -0
  55. pirate_frb-0.0.1/src_bin/time-gpu-downsample.cu +63 -0
  56. pirate_frb-0.0.1/src_bin/time-gpu-lagged-downsampler.cu +108 -0
  57. pirate_frb-0.0.1/src_bin/time-gpu-transpose.cu +41 -0
  58. pirate_frb-0.0.1/src_lib/DedispersionConfig.cu +320 -0
  59. pirate_frb-0.0.1/src_lib/DedispersionPlan.cu +293 -0
  60. pirate_frb-0.0.1/src_lib/Directory.cu +44 -0
  61. pirate_frb-0.0.1/src_lib/Epoll.cu +110 -0
  62. pirate_frb-0.0.1/src_lib/FakeCorrelator.cu +112 -0
  63. pirate_frb-0.0.1/src_lib/FakeServer.cu +1003 -0
  64. pirate_frb-0.0.1/src_lib/File.cu +75 -0
  65. pirate_frb-0.0.1/src_lib/GpuDedispersionKernel.cu +613 -0
  66. pirate_frb-0.0.1/src_lib/GpuLaggedDownsamplingKernel.cu +975 -0
  67. pirate_frb-0.0.1/src_lib/ReferenceDedisperser.cu +637 -0
  68. pirate_frb-0.0.1/src_lib/ReferenceDedispersionKernel.cu +206 -0
  69. pirate_frb-0.0.1/src_lib/ReferenceLagbuf.cu +116 -0
  70. pirate_frb-0.0.1/src_lib/ReferenceLaggedDownsamplingKernel.cu +153 -0
  71. pirate_frb-0.0.1/src_lib/ReferenceTree.cu +162 -0
  72. pirate_frb-0.0.1/src_lib/Socket.cu +308 -0
  73. pirate_frb-0.0.1/src_lib/UntypedArray.cu +144 -0
  74. pirate_frb-0.0.1/src_lib/YamlFile.cu +133 -0
  75. pirate_frb-0.0.1/src_lib/cpu_downsample.cu +76 -0
  76. pirate_frb-0.0.1/src_lib/file_utils.cu +134 -0
  77. pirate_frb-0.0.1/src_lib/gpu_downsample.cu +87 -0
  78. pirate_frb-0.0.1/src_lib/gpu_transpose.cu +45 -0
  79. pirate_frb-0.0.1/src_lib/system_utils.cu +119 -0
  80. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_float16.cu +11 -0
  81. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_float32.cu +11 -0
  82. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_nolag_float16.cu +11 -0
  83. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_simple_nolag_float32.cu +11 -0
  84. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage0_float16.cu +11 -0
  85. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage0_float32.cu +11 -0
  86. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage1_float16.cu +11 -0
  87. pirate_frb-0.0.1/src_lib/template_instantiations/dedisp_stage1_float32.cu +11 -0
  88. pirate_frb-0.0.1/src_lib/utils.cu +269 -0
  89. pirate_frb-0.0.1/src_pybind11/pirate_pybind11.cu +30 -0
@@ -0,0 +1,288 @@
1
+ # This Makefile will be invoked by the python build system (e.g. via 'pip install'),
2
+ # but you can also build individual targets by invoking 'make' directly.
3
+
4
+ # Disable built-in rules and variables (must be first).
5
+ MAKEFLAGS += --no-builtin-rules
6
+ MAKEFLAGS += --no-builtin-variables
7
+
8
+ # Default target 'all' must be first target in Makefile.
9
+ # The 'bin' target builds a bunch of binaries in bin/...
10
+ # The 'lib' target builds the C++ library lib/libpirate.so, and the python extension pirate_frb/pirate_pybind11...so.
11
+ # The 'build_wheel' and 'build_sdist' targets are invoked by 'pip' (or 'make all').
12
+ all: bin lib build_wheel build_sdist
13
+
14
+ .PHONY: all bin lib build_wheel build_sdist clean
15
+
16
+
17
+ ####################################################################################################
18
+ #
19
+ # Variables encoding configuration: PYTHON, NVCC, NVCC_ARCH, NVCC_DEPFLAGS.
20
+ #
21
+ # FIXME some day I'll define a configure-script mechanism for setting these variables.
22
+ # For now, if you want to change the defaults, just edit the Makfile.
23
+
24
+ PYTHON ?= python3
25
+ NVCC ?= nvcc -std=c++17 -m64 -O3 --compiler-options -Wall,-fPIC,-march=x86-64-v3
26
+
27
+ # Extra nvcc flags needed to build Makefile dependencies
28
+ # -MMD create dep file, omitting "system" headers
29
+ # -MP add phony target for each header in dep file (makes error reporting less confusing)
30
+ # Note: we don't need "-MT $@", since we use in-tree object filenames (x.cu -> x.o).
31
+ # Note: we don't need "-MT $*.d", since we use in-tree depfile names (x.cu -> x.d).
32
+ NVCC_DEPFLAGS ?= -MMD -MP
33
+
34
+ # NVIDIA archictecture.
35
+ DEFAULT_NVCC_ARCH = -gencode arch=compute_80,code=sm_80
36
+ DEFAULT_NVCC_ARCH += -gencode arch=compute_86,code=sm_86
37
+ DEFAULT_NVCC_ARCH += -gencode arch=compute_89,code=sm_89
38
+ # DEFAULT_ARCH += -gencode arch=compute_90,code=sm_90
39
+ NVCC_ARCH ?= $(DEFAULT_NVCC_ARCH)
40
+
41
+
42
+ ####################################################################################################
43
+ #
44
+ # "Derived" config variables: PYTHON_INCDIR, NUMPY_INCDIR, PYBIND11_INCDIR, PYEXT_SUFFIX, KSGPU_DIR.
45
+ #
46
+ # These are autogenerated by makefile_helper.py, and cached in makefile_helper.out.
47
+ # PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.
48
+
49
+
50
+ ifneq ($(MAKECMDGOALS),clean)
51
+ include makefile_helper.out
52
+ endif
53
+
54
+ makefile_helper.out: makefile_helper.py Makefile
55
+ $(PYTHON) makefile_helper.py
56
+
57
+
58
+ ####################################################################################################
59
+
60
+
61
+ # The main output of the build process is these two libraries.
62
+ # Reminder: PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.
63
+ PIRATE_LIB := lib/libpirate.so
64
+ PIRATE_PYEXT := pirate_frb/pirate_pybind11$(PYEXT_SUFFIX)
65
+
66
+ # These get compiled into lib/libpirate.so.
67
+ LIB_SRCFILES = \
68
+ src_lib/cpu_downsample.cu \
69
+ src_lib/file_utils.cu \
70
+ src_lib/gpu_downsample.cu \
71
+ src_lib/gpu_transpose.cu \
72
+ src_lib/system_utils.cu \
73
+ src_lib/utils.cu \
74
+ src_lib/DedispersionConfig.cu \
75
+ src_lib/DedispersionPlan.cu \
76
+ src_lib/Directory.cu \
77
+ src_lib/Epoll.cu \
78
+ src_lib/FakeCorrelator.cu \
79
+ src_lib/FakeServer.cu \
80
+ src_lib/File.cu \
81
+ src_lib/GpuDedispersionKernel.cu \
82
+ src_lib/GpuLaggedDownsamplingKernel.cu \
83
+ src_lib/ReferenceDedisperser.cu \
84
+ src_lib/ReferenceDedispersionKernel.cu \
85
+ src_lib/ReferenceLagbuf.cu \
86
+ src_lib/ReferenceLaggedDownsamplingKernel.cu \
87
+ src_lib/ReferenceTree.cu \
88
+ src_lib/Socket.cu \
89
+ src_lib/UntypedArray.cu \
90
+ src_lib/YamlFile.cu \
91
+ src_lib/template_instantiations/dedisp_simple_float16.cu \
92
+ src_lib/template_instantiations/dedisp_simple_float32.cu \
93
+ src_lib/template_instantiations/dedisp_simple_nolag_float16.cu \
94
+ src_lib/template_instantiations/dedisp_simple_nolag_float32.cu \
95
+ src_lib/template_instantiations/dedisp_stage0_float16.cu \
96
+ src_lib/template_instantiations/dedisp_stage0_float32.cu \
97
+ src_lib/template_instantiations/dedisp_stage1_float16.cu \
98
+ src_lib/template_instantiations/dedisp_stage1_float32.cu
99
+
100
+ # These get compiled into pirate_frb/pirate_pybind11....so.
101
+ PYEXT_SRCFILES = \
102
+ src_pybind11/pirate_pybind11.cu
103
+
104
+ # Must list all python source files here.
105
+ # (Otherwise they won't show up in 'pip install' or pypi.)
106
+ PYFILES = \
107
+ pirate_frb/__init__.py
108
+
109
+ # These are in 1-1 corresponding with executables in bin/
110
+ # For example, 'src_bin/fake_correlator.cu' gets compiled to 'bin/fake_correlator'.
111
+ BIN_SRCFILES = \
112
+ src_bin/fake_correlator.cu \
113
+ src_bin/fake_server.cu \
114
+ src_bin/scratch.cu \
115
+ src_bin/show_dedispersion_plan.cu \
116
+ src_bin/test-avx256-m64-outbuf.cu \
117
+ src_bin/test-cpu-downsampler.cu \
118
+ src_bin/test-gpu-dedispersion-kernels.cu \
119
+ src_bin/test-gpu-downsample.cu \
120
+ src_bin/test-gpu-lagged-downsampler.cu \
121
+ src_bin/test-gpu-reduce2.cu \
122
+ src_bin/test-gpu-transpose.cu \
123
+ src_bin/test-reference-dedisperser.cu \
124
+ src_bin/test-reference-tree.cu \
125
+ src_bin/time-cpu-downsample.cu \
126
+ src_bin/time-gpu-dedispersion-kernels.cu \
127
+ src_bin/time-gpu-downsample.cu \
128
+ src_bin/time-gpu-lagged-downsampler.cu \
129
+ src_bin/time-gpu-transpose.cu
130
+
131
+ # Must list all header files here.
132
+ # (Otherwise they won't show up in 'pip install' or pypi.)
133
+ HFILES = \
134
+ include/pirate/constants.hpp \
135
+ include/pirate/DedispersionConfig.hpp \
136
+ include/pirate/DedispersionPlan.hpp \
137
+ include/pirate/avx256/downsample.hpp \
138
+ include/pirate/avx256/m64_outbuf.hpp \
139
+ include/pirate/avx256/m128_outbuf.hpp \
140
+ include/pirate/gpu/reduce2.hpp \
141
+ include/pirate/gpu/DownsampleKernel.hpp \
142
+ include/pirate/gpu/TransposeKernel.hpp \
143
+ include/pirate/internals/bitvec.hpp \
144
+ include/pirate/internals/cpu_downsample.hpp \
145
+ include/pirate/internals/dedispersion_kernel_implementation.hpp \
146
+ include/pirate/internals/dedispersion_inbufs.hpp \
147
+ include/pirate/internals/dedispersion_outbufs.hpp \
148
+ include/pirate/internals/gpu_downsample.hpp \
149
+ include/pirate/internals/gpu_transpose.hpp \
150
+ include/pirate/internals/file_utils.hpp \
151
+ include/pirate/internals/inlines.hpp \
152
+ include/pirate/internals/system_utils.hpp \
153
+ include/pirate/internals/utils.hpp \
154
+ include/pirate/internals/Directory.hpp \
155
+ include/pirate/internals/Epoll.hpp \
156
+ include/pirate/internals/FakeCorrelator.hpp \
157
+ include/pirate/internals/FakeServer.hpp \
158
+ include/pirate/internals/File.hpp \
159
+ include/pirate/internals/GpuDedispersionKernel.hpp \
160
+ include/pirate/internals/ReferenceDedisperser.hpp \
161
+ include/pirate/internals/ReferenceDedispersionKernel.hpp \
162
+ include/pirate/internals/ReferenceLagbuf.hpp \
163
+ include/pirate/internals/ReferenceLaggedDownsamplingKernel.hpp \
164
+ include/pirate/internals/ReferenceTree.hpp \
165
+ include/pirate/internals/Socket.hpp \
166
+ include/pirate/internals/UntypedArray.hpp \
167
+ include/pirate/internals/YamlFile.hpp
168
+
169
+ # 'make clean' deletes {*~, *.o, *.d, *.so, *.pyc} from these dirs.
170
+ CLEAN_DIRS := . lib src_bin src_lib src_lib/template_instantiations pirate_frb/__pycache__ include include/pirate include/pirate/avx256 include/pirate/gpu include/pirate/internals
171
+
172
+ # Extra files to be deleted by 'make clean'.
173
+ # Note that 'pirate_frb/include' and 'pirate_frb/lib' are symlinks, so we put them in CLEAN_FILES, not CLEAN_RMDIRS
174
+ CLEAN_FILES := sdist_files.txt wheel_files.txt makefile_helper.out pirate_frb/include pirate_frb/lib
175
+
176
+ # Directories that should be empty at the end of 'make clean', and can be deleted.
177
+ CLEAN_RMDIRS := bin lib pirate_frb/__pycache__
178
+
179
+
180
+ ####################################################################################################
181
+
182
+
183
+ LIB_OFILES := $(LIB_SRCFILES:%.cu=%.o)
184
+ PYEXT_OFILES := $(PYEXT_SRCFILES:%.cu=%.o)
185
+ BIN_XFILES := $(BIN_SRCFILES:src_bin/%.cu=bin/%)
186
+
187
+ # Must include all .d files, or build will break!
188
+ ALL_SRCFILES := $(LIB_SRCFILES) $(PYEXT_SRCFILES) $(BIN_SRCFILES)
189
+ DEPFILES := $(ALL_SRCFILES:%.cu=%.d)
190
+
191
+ SDIST_FILES := pyproject.toml Makefile makefile_helper.py
192
+ SDIST_FILES += $(PYFILES) $(ALL_SRCFILES) $(HFILES)
193
+
194
+ # Some symlinks for the wheel:
195
+ # - header file include/%.hpp gets symlinked to pirate_frb/include/%.hpp
196
+ # - library lib/libpirate.so gets symlinked to pirate_frb/lib/libpirate.so
197
+ # - python extension pirate_frb/pirate_pybind11...so does not need to be symlinked/renamed.
198
+ WHEEL_FILES := $(PYFILES) $(PIRATE_PYEXT) pirate_frb/$(PIRATE_LIB)
199
+ WHEEL_FILES += $(HFILES:%=pirate_frb/%)
200
+
201
+ # Phony targets. The special targets 'build_wheel' and 'build_sdist' are needed by pip/pipmake.
202
+ lib: $(PIRATE_LIB) $(PIRATE_PYEXT)
203
+ bin: $(BIN_XFILES)
204
+ build_wheel: wheel_files.txt $(PIRATE_LIB) $(PIRATE_PYEXT)
205
+ build_sdist: sdist_files.txt
206
+
207
+ # Symlink {include,lib} into python directory 'pirate_frb'.
208
+ pirate_frb/include:
209
+ ln -s ../include $@
210
+ pirate_frb/lib:
211
+ ln -s ../lib $@
212
+
213
+ # Build object files in src_lib/, src_bin/, and src_lib/template_instantiations/ with default flags.
214
+ %.o: %.cu %.d
215
+ $(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -c -o $@ $<
216
+
217
+ # Build object files in src_pybind11/ with special flags.
218
+ src_pybind11/%.o: src_pybind11/%.cu src_pybind11/%.d
219
+ $(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -I$(PYTHON_INCDIR) -I$(NUMPY_INCDIR) -I$(PYBIND11_INCDIR) -c -o $@ $<
220
+
221
+ # Build the C++ library (lib/libpirate.so)
222
+ # We want it to automatically pull in the C++ library $(KSGPU_DIR)/lib/libkspgu.so.
223
+ #
224
+ # The python extension has been built correctly if 'objdump -x' shows the following:
225
+ # NEEDED libksgpu.so
226
+ # RUNPATH $(KSGPU_DIR)/lib # where Makefile var $(KSGPU_DIR) is read from makefile_helper.out
227
+ #
228
+ # The quoting can be understood by working backwards as follows:
229
+ # - g++ command line should look like: g++ -Wl,-rpath="$(KSGPU_DIR)/lib"
230
+ # - nvcc command line should look like: nvcc -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'
231
+
232
+ $(PIRATE_LIB): $(LIB_OFILES)
233
+ @mkdir -p lib
234
+ $(NVCC) $(NVCC_ARCH) -shared -o $@ $^ -lksgpu -lyaml-cpp -L$(KSGPU_DIR)/lib -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'
235
+
236
+ # Build C++ binaries (bin/*)
237
+ # Link flags are similar to previous rule -- see comments above.
238
+ bin/%: src_bin/%.o $(PIRATE_LIB)
239
+ @mkdir -p bin/
240
+ $(NVCC) $(NVCC_ARCH) -o $@ $^ -lksgpu -lyaml-cpp -L$(KSGPU_DIR)/lib -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'
241
+
242
+ # Build the python extension (pirate_frb/pirate_pybind11...so)
243
+ # We want it to automatically pull in the C++ library pirate_frb/lib/libpirate.so.
244
+ #
245
+ # The python extension has been built correctly if 'objdump -x' shows the following:
246
+ # NEEDED libpirate.so
247
+ # RUNPATH $ORIGIN/lib
248
+ #
249
+ # The quoting can be understood by working backwards as follows:
250
+ # - g++ command line should look like: g++ -Wl,-rpath="\$ORIGIN/lib"
251
+ # - nvcc command line should look like: nvcc -Xcompiler '"-Wl,-rpath=\\$ORIGIN/lib"'
252
+ # - Makefile line should look like: nvcc -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'
253
+ #
254
+ # Note that we don't link to libksgpu.so or ksgpu_pybind11...so in this step.
255
+ # These libraries end up getting imported as follows:
256
+ #
257
+ # 1. When 'pirate_frb' is imported, we do 'import ksgpu' (in pirate_frb/__init__.py)
258
+ # before 'import pirate_pybind11'.
259
+ #
260
+ # 2. When 'ksgpu' is imported, we use the "ctypes trick" (see comment in ksgpu/__init__.py)
261
+ # to load the libraries libksgpu.so and ksgpu_pybind11...so with globally visible symbols.
262
+
263
+ $(PIRATE_PYEXT): $(PYEXT_OFILES) $(PIRATE_LIB) pirate_frb/lib
264
+ $(NVCC) $(NVCC_ARCH) -shared -o $@ $(PYEXT_OFILES) -lpirate -Lpirate_frb/lib -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'
265
+
266
+ # Needed by pip/pipmake: list of all files that go into the (non-editable) wheel.
267
+ wheel_files.txt: Makefile pirate_frb/include pirate_frb/lib
268
+ rm -f $@
269
+ for f in $(WHEEL_FILES); do echo $$f; done >>$@
270
+
271
+ # Needed by pip/pipmake: list of all files that go into the sdist.
272
+ sdist_files.txt: Makefile
273
+ rm -f $@
274
+ for f in $(SDIST_FILES); do echo $$f; done >>$@
275
+
276
+ clean:
277
+ @for f in $(foreach d,$(CLEAN_DIRS),$(wildcard $d/*~ $d/*.o $d/*.d $d/*.so $d/*.pyc)); do echo rm $$f; rm $$f; done
278
+ @for f in $(wildcard $(CLEAN_FILES) $(BIN_XFILES)); do echo rm $$f; rm $$f; done
279
+ @for d in $(wildcard $(CLEAN_RMDIRS)); do echo rmdir $$d; rmdir $$d; done
280
+
281
+ # Specifying .SECONDARY with no prerequisites disables auto-deletion of intermediate files.
282
+ .SECONDARY:
283
+
284
+ # If a depfile is absent, build can still proceed.
285
+ $(DEPFILES):
286
+
287
+ # Include any depfiles which are present.
288
+ include $(wildcard $(DEPFILES))
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.2
2
+ Name: pirate_frb
3
+ Version: 0.0.1
4
+ Requires-Python: >=3.8
5
+ Requires-Dist: pybind11
6
+ Requires-Dist: numpy
7
+ Requires-Dist: ksgpu >= 1.0.3
@@ -0,0 +1,86 @@
1
+ #ifndef _PIRATE_DEDISPERSION_CONFIG_HPP
2
+ #define _PIRATE_DEDISPERSION_CONFIG_HPP
3
+
4
+ #include <vector>
5
+ #include <string>
6
+ #include <iostream>
7
+
8
+ namespace YAML { class Emitter; } // #include <yaml-cpp/yaml.h>
9
+ namespace pirate { struct YamlFile; } // #include <pirate/internals/YamlFile.hpp>
10
+
11
+
12
+ namespace pirate {
13
+ #if 0
14
+ } // editor auto-indent
15
+ #endif
16
+
17
+
18
+ struct DedispersionConfig
19
+ {
20
+ // Core dedispersion parameters.
21
+ ssize_t tree_rank = -1;
22
+ ssize_t num_downsampling_levels = -1;
23
+ ssize_t time_samples_per_chunk = 0;
24
+
25
+ // For now, there is only one dtype, which can be either "float32" or "float16".
26
+ // Later, I might split this into "compute" and "ringbuf" dtypes, and allow compressed
27
+ // dtypes (e.g. float8, int7).
28
+
29
+ std::string dtype; // "float32" or "float16"
30
+
31
+ struct EarlyTrigger
32
+ {
33
+ ssize_t ds_level = -1;
34
+ ssize_t tree_rank = 0;
35
+ };
36
+
37
+ // Sorted (by ds_level first, then tree_rank).
38
+ std::vector<EarlyTrigger> early_triggers;
39
+
40
+ // GPU configuration.
41
+ ssize_t beams_per_gpu = 0;
42
+ ssize_t beams_per_batch = 0;
43
+ ssize_t num_active_batches = 0;
44
+ ssize_t gmem_nbytes_per_gpu = 0;
45
+
46
+ void validate() const;
47
+
48
+ // Write in informal text format (e.g. for log files)
49
+ // FIXME I might phase this out, in favor of yaml everywhere.
50
+ void print(std::ostream &os = std::cout, int indent=0) const;
51
+
52
+ // Write in YAML format.
53
+ void to_yaml(YAML::Emitter &emitter) const;
54
+ void to_yaml(const std::string &filename) const;
55
+ std::string to_yaml_string() const;
56
+
57
+ // Construct from YAML file.
58
+ // The 'verbosity' argument has the following meaning:
59
+ // 0 = quiet
60
+ // 1 = announce default values for all unspecified parameters
61
+ // 2 = announce all parameters
62
+
63
+ static DedispersionConfig from_yaml(const std::string &filename, int verbosity=0);
64
+ static DedispersionConfig from_yaml(const YamlFile &file);
65
+
66
+ // Helper functions for constructing DedispersionConfig instances.
67
+ // Add early triggers, while maintaining invariant that 'early_triggers' is sorted.
68
+ void add_early_trigger(ssize_t ds_level, ssize_t tree_rank);
69
+ void add_early_triggers(ssize_t ds_level, std::initializer_list<ssize_t> tree_ranks);
70
+
71
+ // Note: rather than calling this function directly, you probably want the
72
+ // DedispersionPlan (not DedispersionConfig) member 'nelts_per_segment'.
73
+ int get_nelts_per_segment() const;
74
+
75
+ // make_random(): used for unit tests.
76
+ static DedispersionConfig make_random();
77
+ };
78
+
79
+ extern bool operator==(const DedispersionConfig::EarlyTrigger &x, const DedispersionConfig::EarlyTrigger &y);
80
+ extern bool operator>(const DedispersionConfig::EarlyTrigger &x, const DedispersionConfig::EarlyTrigger &y);
81
+ extern std::ostream &operator<<(std::ostream &os, const DedispersionConfig::EarlyTrigger &et);
82
+
83
+
84
+ } // namespace pirate
85
+
86
+ #endif // _PIRATE_DEDISPERSION_CONFIG_HPP
@@ -0,0 +1,100 @@
1
+ #ifndef _PIRATE_DEDISPERSION_PLAN_HPP
2
+ #define _PIRATE_DEDISPERSION_PLAN_HPP
3
+
4
+ #include "DedispersionConfig.hpp"
5
+
6
+ #include <vector>
7
+ #include <memory> // shared_ptr
8
+ #include <ksgpu/Array.hpp>
9
+
10
+
11
+ namespace pirate {
12
+ #if 0
13
+ } // editor auto-indent
14
+ #endif
15
+
16
+
17
+ struct DedispersionPlan
18
+ {
19
+ DedispersionPlan(const DedispersionConfig &config);
20
+
21
+ void print(std::ostream &os=std::cout, int indent=0) const;
22
+
23
+ // -------------------- Helper classes --------------------
24
+
25
+ struct Stage0Tree
26
+ {
27
+ // Note: total tree rank (rank0 + rank1) is equal to (config.tree_rank - (ds_level ? 1 : 0)).
28
+
29
+ int ds_level = -1; // downsampling level (downsampling "factor" is 2^level)
30
+ int rank0 = 0; // rank of Stage0Tree
31
+ int rank1 = 0; // rank of subsequent Stage1Tree (if no early trigger)
32
+ int nt_ds = 0; // downsampled time samples per chunk (= config.time_samples_per_chunk / pow2(ds_level))
33
+
34
+ int segments_per_beam = 0; // equal to pow2(rank0+rank1) * (nt_ds / nelts_per_segment)
35
+ int base_segment = 0; // cumulative (over all Stage0Trees) segment count
36
+ };
37
+
38
+ struct Stage1Tree
39
+ {
40
+ int ds_level = -1; // Same as Stage0Tree::ds_level
41
+ int rank0 = 0; // Same as Stage0Tree::rank0
42
+ int rank1_ambient = 0; // Same as Stage0Tree::rank1
43
+ int rank1_trigger = 0; // Can be smaller than rank1_ambient, for early trigger
44
+ int nt_ds = 0; // Same as Stage0Tree::nt_ds
45
+
46
+ int segments_per_beam = 0; // equal to pow2(rank0 + rank1_trigger) * (nt_ds / nelts_per_segment)
47
+ int base_segment = 0; // cumulative (over all Stage1Trees) segment count
48
+ };
49
+
50
+ struct Ringbuf
51
+ {
52
+ long rb_len = 0; // number of (time chunk, beam) pairs
53
+ long nseg_per_beam = 0;
54
+ long base_segment = 0;
55
+ };
56
+
57
+ // -------------------- Members --------------------
58
+
59
+ const DedispersionConfig config;
60
+
61
+ int nelts_per_segment = 0; // currently always constants::bytes_per_gpu_cache_line / (sizeof config dtype)
62
+ int nbytes_per_segment = 0; // currently always constants::bytes_per_gpu_cache_line
63
+
64
+ std::vector<Stage0Tree> stage0_trees;
65
+ std::vector<Stage1Tree> stage1_trees;
66
+
67
+ ssize_t stage0_total_segments_per_beam = 0;
68
+ ssize_t stage1_total_segments_per_beam = 0;
69
+
70
+ int max_clag = 0;
71
+ long gmem_ringbuf_nseg = 0; // includes gmem + g2h + h2g
72
+
73
+ // All vector<Ringbuf> objects have length (max_clag + 1).
74
+ // T = total beams, A = active beams, B = beams per batch.
75
+
76
+ std::vector<Ringbuf> gmem_ringbufs; // rb_size = (clag*T + A), on GPU
77
+ std::vector<Ringbuf> g2h_ringbufs; // rb_size = min(A+B, T), on GPU
78
+ std::vector<Ringbuf> h2g_ringbufs; // rb_size = min(A+B, T), on GPU
79
+ std::vector<Ringbuf> h2h_ringbufs; // rb_size = (clag*T + B), on host
80
+
81
+ // stage0_output_rb_locs, stage1_input_rb_locs.
82
+ //
83
+ // These arrays contain GPU ringbuf locations, represented as 4 uint32s:
84
+ // uint rb_offset; // in segments, not bytes
85
+ // uint rb_phase; // index of (time chunk, beam) pair, relative to current pair
86
+ // uint rb_len; // number of (time chunk, beam) pairs in ringbuf (same as Ringbuf::rb_len)
87
+ // uint rb_nseg; // number of segments per (time chunk, beam) (same as Ringbuf::nseg_per_beam)
88
+ //
89
+ // The arrays are indexed by:
90
+ // iseg0 -> (time/nelts_per_segment, 2^rank1, 2^rank0)
91
+ // iseg1 -> (time/nelts_per_segment, 2^rank0, 2^rank1) note transpose
92
+
93
+ ksgpu::Array<uint> stage0_rb_locs; // shape (stage0_total_segments_per_beam, 4)
94
+ ksgpu::Array<uint> stage1_rb_locs; // shape (stage1_total_segments_per_beam, 4)
95
+ };
96
+
97
+
98
+ } // namespace pirate
99
+
100
+ #endif // _PIRATE_DEDISPERSION_PLAN_HPP