learning3d 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- learning3d/__init__.py +0 -2
- learning3d/models/__init__.py +1 -6
- learning3d/utils/__init__.py +1 -6
- {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/METADATA +1 -2
- {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/RECORD +8 -43
- learning3d/examples/test_flownet.py +0 -113
- learning3d/examples/train_flownet.py +0 -259
- learning3d/models/flownet3d.py +0 -446
- learning3d/utils/lib/build/lib.linux-x86_64-3.5/pointnet2_cuda.cpython-35m-x86_64-linux-gnu.so +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/ball_query.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/ball_query_gpu.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/group_points.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/group_points_gpu.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/interpolate.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/interpolate_gpu.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/pointnet2_api.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/sampling.o +0 -0
- learning3d/utils/lib/build/temp.linux-x86_64-3.5/src/sampling_gpu.o +0 -0
- learning3d/utils/lib/dist/pointnet2-0.0.0-py3.5-linux-x86_64.egg +0 -0
- learning3d/utils/lib/pointnet2.egg-info/SOURCES.txt +0 -14
- learning3d/utils/lib/pointnet2.egg-info/dependency_links.txt +0 -1
- learning3d/utils/lib/pointnet2.egg-info/top_level.txt +0 -1
- learning3d/utils/lib/pointnet2_modules.py +0 -160
- learning3d/utils/lib/pointnet2_utils.py +0 -318
- learning3d/utils/lib/pytorch_utils.py +0 -236
- learning3d/utils/lib/setup.py +0 -23
- learning3d/utils/lib/src/ball_query.cpp +0 -25
- learning3d/utils/lib/src/ball_query_gpu.cu +0 -67
- learning3d/utils/lib/src/ball_query_gpu.h +0 -15
- learning3d/utils/lib/src/cuda_utils.h +0 -15
- learning3d/utils/lib/src/group_points.cpp +0 -36
- learning3d/utils/lib/src/group_points_gpu.cu +0 -86
- learning3d/utils/lib/src/group_points_gpu.h +0 -22
- learning3d/utils/lib/src/interpolate.cpp +0 -65
- learning3d/utils/lib/src/interpolate_gpu.cu +0 -233
- learning3d/utils/lib/src/interpolate_gpu.h +0 -36
- learning3d/utils/lib/src/pointnet2_api.cpp +0 -25
- learning3d/utils/lib/src/sampling.cpp +0 -46
- learning3d/utils/lib/src/sampling_gpu.cu +0 -253
- learning3d/utils/lib/src/sampling_gpu.h +0 -29
- {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/LICENSE +0 -0
- {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/WHEEL +0 -0
- {learning3d-0.0.1.dist-info → learning3d-0.0.3.dist-info}/top_level.txt +0 -0
@@ -1,253 +0,0 @@
|
|
1
|
-
#include <stdio.h>
|
2
|
-
#include <stdlib.h>
|
3
|
-
|
4
|
-
#include "cuda_utils.h"
|
5
|
-
#include "sampling_gpu.h"
|
6
|
-
|
7
|
-
|
8
|
-
__global__ void gather_points_kernel_fast(int b, int c, int n, int m,
|
9
|
-
const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
|
10
|
-
// points: (B, C, N)
|
11
|
-
// idx: (B, M)
|
12
|
-
// output:
|
13
|
-
// out: (B, C, M)
|
14
|
-
|
15
|
-
int bs_idx = blockIdx.z;
|
16
|
-
int c_idx = blockIdx.y;
|
17
|
-
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
18
|
-
if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
|
19
|
-
|
20
|
-
out += bs_idx * c * m + c_idx * m + pt_idx;
|
21
|
-
idx += bs_idx * m + pt_idx;
|
22
|
-
points += bs_idx * c * n + c_idx * n;
|
23
|
-
out[0] = points[idx[0]];
|
24
|
-
}
|
25
|
-
|
26
|
-
void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints,
|
27
|
-
const float *points, const int *idx, float *out, cudaStream_t stream) {
|
28
|
-
// points: (B, C, N)
|
29
|
-
// idx: (B, npoints)
|
30
|
-
// output:
|
31
|
-
// out: (B, C, npoints)
|
32
|
-
|
33
|
-
cudaError_t err;
|
34
|
-
dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
|
35
|
-
dim3 threads(THREADS_PER_BLOCK);
|
36
|
-
|
37
|
-
gather_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points, idx, out);
|
38
|
-
|
39
|
-
err = cudaGetLastError();
|
40
|
-
if (cudaSuccess != err) {
|
41
|
-
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
|
42
|
-
exit(-1);
|
43
|
-
}
|
44
|
-
}
|
45
|
-
|
46
|
-
__global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out,
|
47
|
-
const int *__restrict__ idx, float *__restrict__ grad_points) {
|
48
|
-
// grad_out: (B, C, M)
|
49
|
-
// idx: (B, M)
|
50
|
-
// output:
|
51
|
-
// grad_points: (B, C, N)
|
52
|
-
|
53
|
-
int bs_idx = blockIdx.z;
|
54
|
-
int c_idx = blockIdx.y;
|
55
|
-
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
56
|
-
if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
|
57
|
-
|
58
|
-
grad_out += bs_idx * c * m + c_idx * m + pt_idx;
|
59
|
-
idx += bs_idx * m + pt_idx;
|
60
|
-
grad_points += bs_idx * c * n + c_idx * n;
|
61
|
-
|
62
|
-
atomicAdd(grad_points + idx[0], grad_out[0]);
|
63
|
-
}
|
64
|
-
|
65
|
-
void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints,
|
66
|
-
const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
|
67
|
-
// grad_out: (B, C, npoints)
|
68
|
-
// idx: (B, npoints)
|
69
|
-
// output:
|
70
|
-
// grad_points: (B, C, N)
|
71
|
-
|
72
|
-
cudaError_t err;
|
73
|
-
dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
|
74
|
-
dim3 threads(THREADS_PER_BLOCK);
|
75
|
-
|
76
|
-
gather_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, grad_out, idx, grad_points);
|
77
|
-
|
78
|
-
err = cudaGetLastError();
|
79
|
-
if (cudaSuccess != err) {
|
80
|
-
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
|
81
|
-
exit(-1);
|
82
|
-
}
|
83
|
-
}
|
84
|
-
|
85
|
-
|
86
|
-
__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){
|
87
|
-
const float v1 = dists[idx1], v2 = dists[idx2];
|
88
|
-
const int i1 = dists_i[idx1], i2 = dists_i[idx2];
|
89
|
-
dists[idx1] = max(v1, v2);
|
90
|
-
dists_i[idx1] = v2 > v1 ? i2 : i1;
|
91
|
-
}
|
92
|
-
|
93
|
-
template <unsigned int block_size>
|
94
|
-
__global__ void furthest_point_sampling_kernel(int b, int n, int m,
|
95
|
-
const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {
|
96
|
-
// dataset: (B, N, 3)
|
97
|
-
// tmp: (B, N)
|
98
|
-
// output:
|
99
|
-
// idx: (B, M)
|
100
|
-
|
101
|
-
if (m <= 0) return;
|
102
|
-
__shared__ float dists[block_size];
|
103
|
-
__shared__ int dists_i[block_size];
|
104
|
-
|
105
|
-
int batch_index = blockIdx.x;
|
106
|
-
dataset += batch_index * n * 3;
|
107
|
-
temp += batch_index * n;
|
108
|
-
idxs += batch_index * m;
|
109
|
-
|
110
|
-
int tid = threadIdx.x;
|
111
|
-
const int stride = block_size;
|
112
|
-
|
113
|
-
int old = 0;
|
114
|
-
if (threadIdx.x == 0)
|
115
|
-
idxs[0] = old;
|
116
|
-
|
117
|
-
__syncthreads();
|
118
|
-
for (int j = 1; j < m; j++) {
|
119
|
-
int besti = 0;
|
120
|
-
float best = -1;
|
121
|
-
float x1 = dataset[old * 3 + 0];
|
122
|
-
float y1 = dataset[old * 3 + 1];
|
123
|
-
float z1 = dataset[old * 3 + 2];
|
124
|
-
for (int k = tid; k < n; k += stride) {
|
125
|
-
float x2, y2, z2;
|
126
|
-
x2 = dataset[k * 3 + 0];
|
127
|
-
y2 = dataset[k * 3 + 1];
|
128
|
-
z2 = dataset[k * 3 + 2];
|
129
|
-
// float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
|
130
|
-
// if (mag <= 1e-3)
|
131
|
-
// continue;
|
132
|
-
|
133
|
-
float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
|
134
|
-
float d2 = min(d, temp[k]);
|
135
|
-
temp[k] = d2;
|
136
|
-
besti = d2 > best ? k : besti;
|
137
|
-
best = d2 > best ? d2 : best;
|
138
|
-
}
|
139
|
-
dists[tid] = best;
|
140
|
-
dists_i[tid] = besti;
|
141
|
-
__syncthreads();
|
142
|
-
|
143
|
-
if (block_size >= 1024) {
|
144
|
-
if (tid < 512) {
|
145
|
-
__update(dists, dists_i, tid, tid + 512);
|
146
|
-
}
|
147
|
-
__syncthreads();
|
148
|
-
}
|
149
|
-
|
150
|
-
if (block_size >= 512) {
|
151
|
-
if (tid < 256) {
|
152
|
-
__update(dists, dists_i, tid, tid + 256);
|
153
|
-
}
|
154
|
-
__syncthreads();
|
155
|
-
}
|
156
|
-
if (block_size >= 256) {
|
157
|
-
if (tid < 128) {
|
158
|
-
__update(dists, dists_i, tid, tid + 128);
|
159
|
-
}
|
160
|
-
__syncthreads();
|
161
|
-
}
|
162
|
-
if (block_size >= 128) {
|
163
|
-
if (tid < 64) {
|
164
|
-
__update(dists, dists_i, tid, tid + 64);
|
165
|
-
}
|
166
|
-
__syncthreads();
|
167
|
-
}
|
168
|
-
if (block_size >= 64) {
|
169
|
-
if (tid < 32) {
|
170
|
-
__update(dists, dists_i, tid, tid + 32);
|
171
|
-
}
|
172
|
-
__syncthreads();
|
173
|
-
}
|
174
|
-
if (block_size >= 32) {
|
175
|
-
if (tid < 16) {
|
176
|
-
__update(dists, dists_i, tid, tid + 16);
|
177
|
-
}
|
178
|
-
__syncthreads();
|
179
|
-
}
|
180
|
-
if (block_size >= 16) {
|
181
|
-
if (tid < 8) {
|
182
|
-
__update(dists, dists_i, tid, tid + 8);
|
183
|
-
}
|
184
|
-
__syncthreads();
|
185
|
-
}
|
186
|
-
if (block_size >= 8) {
|
187
|
-
if (tid < 4) {
|
188
|
-
__update(dists, dists_i, tid, tid + 4);
|
189
|
-
}
|
190
|
-
__syncthreads();
|
191
|
-
}
|
192
|
-
if (block_size >= 4) {
|
193
|
-
if (tid < 2) {
|
194
|
-
__update(dists, dists_i, tid, tid + 2);
|
195
|
-
}
|
196
|
-
__syncthreads();
|
197
|
-
}
|
198
|
-
if (block_size >= 2) {
|
199
|
-
if (tid < 1) {
|
200
|
-
__update(dists, dists_i, tid, tid + 1);
|
201
|
-
}
|
202
|
-
__syncthreads();
|
203
|
-
}
|
204
|
-
|
205
|
-
old = dists_i[0];
|
206
|
-
if (tid == 0)
|
207
|
-
idxs[j] = old;
|
208
|
-
}
|
209
|
-
}
|
210
|
-
|
211
|
-
void furthest_point_sampling_kernel_launcher(int b, int n, int m,
|
212
|
-
const float *dataset, float *temp, int *idxs, cudaStream_t stream) {
|
213
|
-
// dataset: (B, N, 3)
|
214
|
-
// tmp: (B, N)
|
215
|
-
// output:
|
216
|
-
// idx: (B, M)
|
217
|
-
|
218
|
-
cudaError_t err;
|
219
|
-
unsigned int n_threads = opt_n_threads(n);
|
220
|
-
|
221
|
-
switch (n_threads) {
|
222
|
-
case 1024:
|
223
|
-
furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
224
|
-
case 512:
|
225
|
-
furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
226
|
-
case 256:
|
227
|
-
furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
228
|
-
case 128:
|
229
|
-
furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
230
|
-
case 64:
|
231
|
-
furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
232
|
-
case 32:
|
233
|
-
furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
234
|
-
case 16:
|
235
|
-
furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
236
|
-
case 8:
|
237
|
-
furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
238
|
-
case 4:
|
239
|
-
furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
240
|
-
case 2:
|
241
|
-
furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
242
|
-
case 1:
|
243
|
-
furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
|
244
|
-
default:
|
245
|
-
furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
246
|
-
}
|
247
|
-
|
248
|
-
err = cudaGetLastError();
|
249
|
-
if (cudaSuccess != err) {
|
250
|
-
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
|
251
|
-
exit(-1);
|
252
|
-
}
|
253
|
-
}
|
@@ -1,29 +0,0 @@
|
|
1
|
-
#ifndef _SAMPLING_GPU_H
|
2
|
-
#define _SAMPLING_GPU_H
|
3
|
-
|
4
|
-
#include <torch/serialize/tensor.h>
|
5
|
-
#include <ATen/cuda/CUDAContext.h>
|
6
|
-
#include<vector>
|
7
|
-
|
8
|
-
|
9
|
-
int gather_points_wrapper_fast(int b, int c, int n, int npoints,
|
10
|
-
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
|
11
|
-
|
12
|
-
void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints,
|
13
|
-
const float *points, const int *idx, float *out, cudaStream_t stream);
|
14
|
-
|
15
|
-
|
16
|
-
int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints,
|
17
|
-
at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
|
18
|
-
|
19
|
-
void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints,
|
20
|
-
const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
|
21
|
-
|
22
|
-
|
23
|
-
int furthest_point_sampling_wrapper(int b, int n, int m,
|
24
|
-
at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
|
25
|
-
|
26
|
-
void furthest_point_sampling_kernel_launcher(int b, int n, int m,
|
27
|
-
const float *dataset, float *temp, int *idxs, cudaStream_t stream);
|
28
|
-
|
29
|
-
#endif
|
File without changes
|
File without changes
|
File without changes
|