learning3d 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. learning3d/__init__.py +0 -2
  2. learning3d/models/__init__.py +1 -6
  3. learning3d/utils/__init__.py +1 -6
  4. {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/METADATA +1 -2
  5. {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/RECORD +8 -43
  6. learning3d/examples/test_flownet.py +0 -113
  7. learning3d/examples/train_flownet.py +0 -259
  8. learning3d/models/flownet3d.py +0 -446
  9. learning3d/utils/lib/build/lib.linux-x86_64-3.5/pointnet2_cuda.cpython-35m-x86_64-linux-gnu.so +0 -0
  10. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/ball_query.o +0 -0
  11. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/ball_query_gpu.o +0 -0
  12. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/group_points.o +0 -0
  13. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/group_points_gpu.o +0 -0
  14. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/interpolate.o +0 -0
  15. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/interpolate_gpu.o +0 -0
  16. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/pointnet2_api.o +0 -0
  17. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/sampling.o +0 -0
  18. learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/sampling_gpu.o +0 -0
  19. learning3d/utils/lib/dist/pointnet2-0.0.0-py3.5-linux-x86_64.egg +0 -0
  20. learning3d/utils/lib/pointnet2.egg-info/SOURCES.txt +0 -14
  21. learning3d/utils/lib/pointnet2.egg-info/dependency_links.txt +0 -1
  22. learning3d/utils/lib/pointnet2.egg-info/top_level.txt +0 -1
  23. learning3d/utils/lib/pointnet2_modules.py +0 -160
  24. learning3d/utils/lib/pointnet2_utils.py +0 -318
  25. learning3d/utils/lib/pytorch_utils.py +0 -236
  26. learning3d/utils/lib/setup.py +0 -23
  27. learning3d/utils/lib/src/ball_query.cpp +0 -25
  28. learning3d/utils/lib/src/ball_query_gpu.cu +0 -67
  29. learning3d/utils/lib/src/ball_query_gpu.h +0 -15
  30. learning3d/utils/lib/src/cuda_utils.h +0 -15
  31. learning3d/utils/lib/src/group_points.cpp +0 -36
  32. learning3d/utils/lib/src/group_points_gpu.cu +0 -86
  33. learning3d/utils/lib/src/group_points_gpu.h +0 -22
  34. learning3d/utils/lib/src/interpolate.cpp +0 -65
  35. learning3d/utils/lib/src/interpolate_gpu.cu +0 -233
  36. learning3d/utils/lib/src/interpolate_gpu.h +0 -36
  37. learning3d/utils/lib/src/pointnet2_api.cpp +0 -25
  38. learning3d/utils/lib/src/sampling.cpp +0 -46
  39. learning3d/utils/lib/src/sampling_gpu.cu +0 -253
  40. learning3d/utils/lib/src/sampling_gpu.h +0 -29
  41. {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/LICENSE +0 -0
  42. {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/WHEEL +0 -0
  43. {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/top_level.txt +0 -0
@@ -1,36 +0,0 @@
1
- #include <torch/serialize/tensor.h>
2
- #include <cuda.h>
3
- #include <cuda_runtime_api.h>
4
- #include <vector>
5
- #include <THC/THC.h>
6
- #include "group_points_gpu.h"
7
-
8
- extern THCState *state;
9
-
10
-
11
- int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample,
12
- at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
13
-
14
- float *grad_points = grad_points_tensor.data<float>();
15
- const int *idx = idx_tensor.data<int>();
16
- const float *grad_out = grad_out_tensor.data<float>();
17
-
18
- cudaStream_t stream = THCState_getCurrentStream(state);
19
-
20
- group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);
21
- return 1;
22
- }
23
-
24
-
25
- int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample,
26
- at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {
27
-
28
- const float *points = points_tensor.data<float>();
29
- const int *idx = idx_tensor.data<int>();
30
- float *out = out_tensor.data<float>();
31
-
32
- cudaStream_t stream = THCState_getCurrentStream(state);
33
-
34
- group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);
35
- return 1;
36
- }
@@ -1,86 +0,0 @@
1
- #include <stdio.h>
2
- #include <stdlib.h>
3
-
4
- #include "cuda_utils.h"
5
- #include "group_points_gpu.h"
6
-
7
-
8
- __global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample,
9
- const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) {
10
- // grad_out: (B, C, npoints, nsample)
11
- // idx: (B, npoints, nsample)
12
- // output:
13
- // grad_points: (B, C, N)
14
- int bs_idx = blockIdx.z;
15
- int c_idx = blockIdx.y;
16
- int index = blockIdx.x * blockDim.x + threadIdx.x;
17
- int pt_idx = index / nsample;
18
- if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
19
-
20
- int sample_idx = index % nsample;
21
- grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
22
- idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
23
-
24
- atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]);
25
- }
26
-
27
- void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
28
- const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
29
- // grad_out: (B, C, npoints, nsample)
30
- // idx: (B, npoints, nsample)
31
- // output:
32
- // grad_points: (B, C, N)
33
- cudaError_t err;
34
- dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
35
- dim3 threads(THREADS_PER_BLOCK);
36
-
37
- group_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, grad_out, idx, grad_points);
38
-
39
- err = cudaGetLastError();
40
- if (cudaSuccess != err) {
41
- fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
42
- exit(-1);
43
- }
44
- }
45
-
46
-
47
- __global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample,
48
- const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
49
- // points: (B, C, N)
50
- // idx: (B, npoints, nsample)
51
- // output:
52
- // out: (B, C, npoints, nsample)
53
- int bs_idx = blockIdx.z;
54
- int c_idx = blockIdx.y;
55
- int index = blockIdx.x * blockDim.x + threadIdx.x;
56
- int pt_idx = index / nsample;
57
- if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
58
-
59
- int sample_idx = index % nsample;
60
-
61
- idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
62
- int in_idx = bs_idx * c * n + c_idx * n + idx[0];
63
- int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
64
-
65
- out[out_idx] = points[in_idx];
66
- }
67
-
68
-
69
- void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
70
- const float *points, const int *idx, float *out, cudaStream_t stream) {
71
- // points: (B, C, N)
72
- // idx: (B, npoints, nsample)
73
- // output:
74
- // out: (B, C, npoints, nsample)
75
- cudaError_t err;
76
- dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
77
- dim3 threads(THREADS_PER_BLOCK);
78
-
79
- group_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, points, idx, out);
80
- // cudaDeviceSynchronize(); // for using printf in kernel function
81
- err = cudaGetLastError();
82
- if (cudaSuccess != err) {
83
- fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
84
- exit(-1);
85
- }
86
- }
@@ -1,22 +0,0 @@
1
- #ifndef _GROUP_POINTS_GPU_H
2
- #define _GROUP_POINTS_GPU_H
3
-
4
- #include <torch/serialize/tensor.h>
5
- #include <cuda.h>
6
- #include <cuda_runtime_api.h>
7
- #include <vector>
8
-
9
-
10
- int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample,
11
- at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
12
-
13
- void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
14
- const float *points, const int *idx, float *out, cudaStream_t stream);
15
-
16
- int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample,
17
- at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
18
-
19
- void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
20
- const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
21
-
22
- #endif
@@ -1,65 +0,0 @@
1
- #include <torch/serialize/tensor.h>
2
- #include <vector>
3
- #include <THC/THC.h>
4
- #include <math.h>
5
- #include <stdio.h>
6
- #include <stdlib.h>
7
- #include <cuda.h>
8
- #include <cuda_runtime_api.h>
9
- #include "interpolate_gpu.h"
10
-
11
- extern THCState *state;
12
-
13
-
14
- void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor,
15
- at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
16
- const float *unknown = unknown_tensor.data<float>();
17
- const float *known = known_tensor.data<float>();
18
- float *dist2 = dist2_tensor.data<float>();
19
- int *idx = idx_tensor.data<int>();
20
-
21
- cudaStream_t stream = THCState_getCurrentStream(state);
22
- three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream);
23
- }
24
-
25
- void knn_wrapper_fast(int b, int n, int m, int k, at::Tensor unknown_tensor,
26
- at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
27
- const float *unknown = unknown_tensor.data<float>();
28
- const float *known = known_tensor.data<float>();
29
- float *dist2 = dist2_tensor.data<float>();
30
- int *idx = idx_tensor.data<int>();
31
-
32
- cudaStream_t stream = THCState_getCurrentStream(state);
33
- knn_kernel_launcher_fast(b, n, m, k, unknown, known, dist2, idx, stream);
34
- }
35
-
36
-
37
- void three_interpolate_wrapper_fast(int b, int c, int m, int n,
38
- at::Tensor points_tensor,
39
- at::Tensor idx_tensor,
40
- at::Tensor weight_tensor,
41
- at::Tensor out_tensor) {
42
-
43
- const float *points = points_tensor.data<float>();
44
- const float *weight = weight_tensor.data<float>();
45
- float *out = out_tensor.data<float>();
46
- const int *idx = idx_tensor.data<int>();
47
-
48
- cudaStream_t stream = THCState_getCurrentStream(state);
49
- three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream);
50
- }
51
-
52
- void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m,
53
- at::Tensor grad_out_tensor,
54
- at::Tensor idx_tensor,
55
- at::Tensor weight_tensor,
56
- at::Tensor grad_points_tensor) {
57
-
58
- const float *grad_out = grad_out_tensor.data<float>();
59
- const float *weight = weight_tensor.data<float>();
60
- float *grad_points = grad_points_tensor.data<float>();
61
- const int *idx = idx_tensor.data<int>();
62
-
63
- cudaStream_t stream = THCState_getCurrentStream(state);
64
- three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream);
65
- }
@@ -1,233 +0,0 @@
1
- #include <math.h>
2
- #include <stdio.h>
3
- #include <stdlib.h>
4
-
5
- #include "cuda_utils.h"
6
- #include "interpolate_gpu.h"
7
-
8
-
9
- __global__ void knn_kernel_fast(int b, int n, int m, int k, const float *__restrict__ unknown,
10
- const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
11
- // unknown: (B, N, 3)
12
- // known: (B, M, 3)
13
- // output:
14
- // dist2: (B, N, k)
15
- // idx: (B, N, k)
16
-
17
- int bs_idx = blockIdx.y;
18
- int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
19
- if (bs_idx >= b || pt_idx >= n) return;
20
-
21
- unknown += bs_idx * n * 3 + pt_idx * 3;
22
- known += bs_idx * m * 3;
23
- dist2 += bs_idx * n * k + pt_idx * k;
24
- idx += bs_idx * n * k + pt_idx * k;
25
-
26
- float ux = unknown[0];
27
- float uy = unknown[1];
28
- float uz = unknown[2];
29
-
30
- double best[200];
31
- int besti[200];
32
- for(int i = 0; i < k; i++){
33
- best[i] = 1e40;
34
- besti[i] = 0;
35
- }
36
- for (int i = 0; i < m; ++i) {
37
- float x = known[i * 3 + 0];
38
- float y = known[i * 3 + 1];
39
- float z = known[i * 3 + 2];
40
- float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
41
- for(int j = 0; j < k; j++){
42
- if(d < best[j]){
43
- for(int l = k - 1; l > j; l--){
44
- best[l] = best[l - 1];
45
- besti[l] = besti[l - 1];
46
- }
47
- best[j] = d;
48
- besti[j] = i;
49
- break;
50
- }
51
- }
52
- }
53
- for(int i = 0; i < k; i++){
54
- idx[i] = besti[i];
55
- dist2[i] = best[i];
56
- }
57
- }
58
-
59
-
60
- void knn_kernel_launcher_fast(int b, int n, int m, int k, const float *unknown,
61
- const float *known, float *dist2, int *idx, cudaStream_t stream) {
62
- // unknown: (B, N, 3)
63
- // known: (B, M, 3)
64
- // output:
65
- // dist2: (B, N, k)
66
- // idx: (B, N, k)
67
-
68
- cudaError_t err;
69
- dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
70
- dim3 threads(THREADS_PER_BLOCK);
71
-
72
- knn_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, k, unknown, known, dist2, idx);
73
-
74
- err = cudaGetLastError();
75
- if (cudaSuccess != err) {
76
- fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
77
- exit(-1);
78
- }
79
- }
80
-
81
- __global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown,
82
- const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
83
- // unknown: (B, N, 3)
84
- // known: (B, M, 3)
85
- // output:
86
- // dist2: (B, N, 3)
87
- // idx: (B, N, 3)
88
-
89
- int bs_idx = blockIdx.y;
90
- int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
91
- if (bs_idx >= b || pt_idx >= n) return;
92
-
93
- unknown += bs_idx * n * 3 + pt_idx * 3;
94
- known += bs_idx * m * 3;
95
- dist2 += bs_idx * n * 3 + pt_idx * 3;
96
- idx += bs_idx * n * 3 + pt_idx * 3;
97
-
98
- float ux = unknown[0];
99
- float uy = unknown[1];
100
- float uz = unknown[2];
101
-
102
- double best1 = 1e40, best2 = 1e40, best3 = 1e40;
103
- int besti1 = 0, besti2 = 0, besti3 = 0;
104
- for (int k = 0; k < m; ++k) {
105
- float x = known[k * 3 + 0];
106
- float y = known[k * 3 + 1];
107
- float z = known[k * 3 + 2];
108
- float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
109
- if (d < best1) {
110
- best3 = best2; besti3 = besti2;
111
- best2 = best1; besti2 = besti1;
112
- best1 = d; besti1 = k;
113
- }
114
- else if (d < best2) {
115
- best3 = best2; besti3 = besti2;
116
- best2 = d; besti2 = k;
117
- }
118
- else if (d < best3) {
119
- best3 = d; besti3 = k;
120
- }
121
- }
122
- dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
123
- idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
124
- }
125
-
126
-
127
- void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,
128
- const float *known, float *dist2, int *idx, cudaStream_t stream) {
129
- // unknown: (B, N, 3)
130
- // known: (B, M, 3)
131
- // output:
132
- // dist2: (B, N, 3)
133
- // idx: (B, N, 3)
134
-
135
- cudaError_t err;
136
- dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
137
- dim3 threads(THREADS_PER_BLOCK);
138
-
139
- three_nn_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known, dist2, idx);
140
-
141
- err = cudaGetLastError();
142
- if (cudaSuccess != err) {
143
- fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
144
- exit(-1);
145
- }
146
- }
147
-
148
-
149
- __global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points,
150
- const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
151
- // points: (B, C, M)
152
- // idx: (B, N, 3)
153
- // weight: (B, N, 3)
154
- // output:
155
- // out: (B, C, N)
156
-
157
- int bs_idx = blockIdx.z;
158
- int c_idx = blockIdx.y;
159
- int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
160
-
161
- if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
162
-
163
- weight += bs_idx * n * 3 + pt_idx * 3;
164
- points += bs_idx * c * m + c_idx * m;
165
- idx += bs_idx * n * 3 + pt_idx * 3;
166
- out += bs_idx * c * n + c_idx * n;
167
-
168
- out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
169
- }
170
-
171
- void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n,
172
- const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {
173
- // points: (B, C, M)
174
- // idx: (B, N, 3)
175
- // weight: (B, N, 3)
176
- // output:
177
- // out: (B, C, N)
178
-
179
- cudaError_t err;
180
- dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
181
- dim3 threads(THREADS_PER_BLOCK);
182
- three_interpolate_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, m, n, points, idx, weight, out);
183
-
184
- err = cudaGetLastError();
185
- if (cudaSuccess != err) {
186
- fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
187
- exit(-1);
188
- }
189
- }
190
-
191
-
192
- __global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out,
193
- const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {
194
- // grad_out: (B, C, N)
195
- // weight: (B, N, 3)
196
- // output:
197
- // grad_points: (B, C, M)
198
-
199
- int bs_idx = blockIdx.z;
200
- int c_idx = blockIdx.y;
201
- int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
202
-
203
- if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
204
-
205
- grad_out += bs_idx * c * n + c_idx * n + pt_idx;
206
- weight += bs_idx * n * 3 + pt_idx * 3;
207
- grad_points += bs_idx * c * m + c_idx * m;
208
- idx += bs_idx * n * 3 + pt_idx * 3;
209
-
210
-
211
- atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);
212
- atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);
213
- atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);
214
- }
215
-
216
- void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out,
217
- const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {
218
- // grad_out: (B, C, N)
219
- // weight: (B, N, 3)
220
- // output:
221
- // grad_points: (B, C, M)
222
-
223
- cudaError_t err;
224
- dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
225
- dim3 threads(THREADS_PER_BLOCK);
226
- three_interpolate_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, m, grad_out, idx, weight, grad_points);
227
-
228
- err = cudaGetLastError();
229
- if (cudaSuccess != err) {
230
- fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
231
- exit(-1);
232
- }
233
- }
@@ -1,36 +0,0 @@
1
- #ifndef _INTERPOLATE_GPU_H
2
- #define _INTERPOLATE_GPU_H
3
-
4
- #include <torch/serialize/tensor.h>
5
- #include<vector>
6
- #include <cuda.h>
7
- #include <cuda_runtime_api.h>
8
-
9
-
10
- void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor,
11
- at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12
-
13
- void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,
14
- const float *known, float *dist2, int *idx, cudaStream_t stream);
15
-
16
- void knn_wrapper_fast(int b, int n, int m, int k, at::Tensor unknown_tensor,
17
- at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
18
-
19
- void knn_kernel_launcher_fast(int b, int n, int m, int k, const float *unknown,
20
- const float *known, float *dist2, int *idx, cudaStream_t stream);
21
-
22
-
23
- void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor,
24
- at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
25
-
26
- void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n,
27
- const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);
28
-
29
-
30
- void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor,
31
- at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
32
-
33
- void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out,
34
- const int *idx, const float *weight, float *grad_points, cudaStream_t stream);
35
-
36
- #endif
@@ -1,25 +0,0 @@
1
- #include <torch/serialize/tensor.h>
2
- #include <torch/extension.h>
3
-
4
- #include "ball_query_gpu.h"
5
- #include "group_points_gpu.h"
6
- #include "sampling_gpu.h"
7
- #include "interpolate_gpu.h"
8
-
9
-
10
- PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11
- m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast");
12
-
13
- m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast");
14
- m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast");
15
-
16
- m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast");
17
- m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast");
18
-
19
- m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper");
20
-
21
- m.def("knn_wrapper", &knn_wrapper_fast, "knn_wrapper_fast");
22
- m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast");
23
- m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast");
24
- m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast");
25
- }
@@ -1,46 +0,0 @@
1
- #include <torch/serialize/tensor.h>
2
- #include <ATen/cuda/CUDAContext.h>
3
- #include <vector>
4
- #include <THC/THC.h>
5
-
6
- #include "sampling_gpu.h"
7
-
8
- extern THCState *state;
9
-
10
-
11
- int gather_points_wrapper_fast(int b, int c, int n, int npoints,
12
- at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
13
- const float *points = points_tensor.data<float>();
14
- const int *idx = idx_tensor.data<int>();
15
- float *out = out_tensor.data<float>();
16
-
17
- cudaStream_t stream = THCState_getCurrentStream(state);
18
- gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);
19
- return 1;
20
- }
21
-
22
-
23
- int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints,
24
- at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
25
-
26
- const float *grad_out = grad_out_tensor.data<float>();
27
- const int *idx = idx_tensor.data<int>();
28
- float *grad_points = grad_points_tensor.data<float>();
29
-
30
- cudaStream_t stream = THCState_getCurrentStream(state);
31
- gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);
32
- return 1;
33
- }
34
-
35
-
36
- int furthest_point_sampling_wrapper(int b, int n, int m,
37
- at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {
38
-
39
- const float *points = points_tensor.data<float>();
40
- float *temp = temp_tensor.data<float>();
41
- int *idx = idx_tensor.data<int>();
42
-
43
- cudaStream_t stream = THCState_getCurrentStream(state);
44
- furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
45
- return 1;
46
- }