sdf-sampler 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdf_sampler/__init__.py CHANGED
@@ -47,7 +47,7 @@ from sdf_sampler.models import (
47
47
  )
48
48
  from sdf_sampler.sampler import SDFSampler
49
49
 
50
- __version__ = "0.1.0"
50
+ __version__ = "0.3.0"
51
51
 
52
52
  __all__ = [
53
53
  # Main classes
@@ -0,0 +1,17 @@
1
+ # ABOUTME: Entry point for running sdf-sampler as a module
2
+ # ABOUTME: Enables `python -m sdf_sampler` invocation
3
+
4
+ """
5
+ Run sdf-sampler as a module.
6
+
7
+ Usage:
8
+ python -m sdf_sampler --help
9
+ python -m sdf_sampler analyze input.ply -o constraints.json
10
+ python -m sdf_sampler sample input.ply constraints.json -o samples.parquet
11
+ python -m sdf_sampler pipeline input.ply -o samples.parquet
12
+ """
13
+
14
+ from sdf_sampler.cli import main
15
+
16
+ if __name__ == "__main__":
17
+ raise SystemExit(main())
sdf_sampler/cli.py ADDED
@@ -0,0 +1,681 @@
1
+ # ABOUTME: Command-line interface for sdf-sampler
2
+ # ABOUTME: Provides analyze, sample, and pipeline commands with full parameter control
3
+
4
+ """
5
+ CLI for sdf-sampler.
6
+
7
+ Usage:
8
+ python -m sdf_sampler analyze input.ply -o constraints.json
9
+ python -m sdf_sampler sample input.ply constraints.json -o samples.parquet
10
+ python -m sdf_sampler pipeline input.ply -o samples.parquet
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ import numpy as np
19
+
20
+
21
+ def add_analysis_options(parser: argparse.ArgumentParser) -> None:
22
+ """Add analysis configuration options to a parser."""
23
+ group = parser.add_argument_group("Analysis Options")
24
+
25
+ group.add_argument(
26
+ "--min-gap-size",
27
+ type=float,
28
+ default=0.10,
29
+ help="Minimum gap size for flood fill in meters (default: 0.10)",
30
+ )
31
+ group.add_argument(
32
+ "--max-grid-dim",
33
+ type=int,
34
+ default=200,
35
+ help="Maximum voxel grid dimension (default: 200)",
36
+ )
37
+ group.add_argument(
38
+ "--cone-angle",
39
+ type=float,
40
+ default=15.0,
41
+ help="Ray propagation cone half-angle in degrees (default: 15.0)",
42
+ )
43
+ group.add_argument(
44
+ "--normal-offset-pairs",
45
+ type=int,
46
+ default=40,
47
+ help="Number of box pairs for normal_offset algorithm (default: 40)",
48
+ )
49
+ group.add_argument(
50
+ "--max-boxes",
51
+ type=int,
52
+ default=30,
53
+ help="Maximum boxes per algorithm (default: 30)",
54
+ )
55
+ group.add_argument(
56
+ "--overlap-threshold",
57
+ type=float,
58
+ default=0.5,
59
+ help="Overlap threshold for box simplification (default: 0.5)",
60
+ )
61
+ group.add_argument(
62
+ "--idw-sample-count",
63
+ type=int,
64
+ default=1000,
65
+ help="Number of IDW samples to generate (default: 1000)",
66
+ )
67
+ group.add_argument(
68
+ "--idw-max-distance",
69
+ type=float,
70
+ default=0.5,
71
+ help="Maximum IDW distance from surface in meters (default: 0.5)",
72
+ )
73
+ group.add_argument(
74
+ "--idw-power",
75
+ type=float,
76
+ default=2.0,
77
+ help="IDW power factor (default: 2.0)",
78
+ )
79
+ group.add_argument(
80
+ "--hull-alpha",
81
+ type=float,
82
+ default=1.0,
83
+ help="Alpha shape parameter for hull filtering (default: 1.0)",
84
+ )
85
+ group.add_argument(
86
+ "--flood-fill-output",
87
+ type=str,
88
+ choices=["boxes", "samples", "both"],
89
+ default="samples",
90
+ help="Output mode for flood fill (default: samples)",
91
+ )
92
+ group.add_argument(
93
+ "--flood-fill-samples",
94
+ type=int,
95
+ default=500,
96
+ help="Number of sample points from flood fill (default: 500)",
97
+ )
98
+ group.add_argument(
99
+ "--voxel-regions-output",
100
+ type=str,
101
+ choices=["boxes", "samples", "both"],
102
+ default="samples",
103
+ help="Output mode for voxel regions (default: samples)",
104
+ )
105
+ group.add_argument(
106
+ "--voxel-regions-samples",
107
+ type=int,
108
+ default=500,
109
+ help="Number of sample points from voxel regions (default: 500)",
110
+ )
111
+
112
+
113
+ def add_sampling_options(parser: argparse.ArgumentParser) -> None:
114
+ """Add sampling configuration options to a parser."""
115
+ group = parser.add_argument_group("Sampling Options")
116
+
117
+ group.add_argument(
118
+ "-n", "--total-samples",
119
+ type=int,
120
+ default=10000,
121
+ help="Total number of samples to generate (default: 10000)",
122
+ )
123
+ group.add_argument(
124
+ "-s", "--strategy",
125
+ type=str,
126
+ choices=["constant", "density", "inverse_square"],
127
+ default="inverse_square",
128
+ help="Sampling strategy (default: inverse_square)",
129
+ )
130
+ group.add_argument(
131
+ "--seed",
132
+ type=int,
133
+ default=None,
134
+ help="Random seed for reproducibility",
135
+ )
136
+ group.add_argument(
137
+ "--samples-per-primitive",
138
+ type=int,
139
+ default=100,
140
+ help="Samples per primitive for CONSTANT strategy (default: 100)",
141
+ )
142
+ group.add_argument(
143
+ "--inverse-square-base",
144
+ type=int,
145
+ default=100,
146
+ help="Base samples at surface for INVERSE_SQUARE (default: 100)",
147
+ )
148
+ group.add_argument(
149
+ "--inverse-square-falloff",
150
+ type=float,
151
+ default=2.0,
152
+ help="Falloff exponent for INVERSE_SQUARE (default: 2.0)",
153
+ )
154
+ group.add_argument(
155
+ "--near-band",
156
+ type=float,
157
+ default=0.02,
158
+ help="Near-band width around surface (default: 0.02)",
159
+ )
160
+
161
+
162
+ def add_output_options(parser: argparse.ArgumentParser) -> None:
163
+ """Add output configuration options to a parser."""
164
+ group = parser.add_argument_group("Output Options")
165
+
166
+ group.add_argument(
167
+ "--include-surface-points",
168
+ action="store_true",
169
+ help="Include original surface points (phi=0) in output",
170
+ )
171
+ group.add_argument(
172
+ "--surface-point-ratio",
173
+ type=float,
174
+ default=0.1,
175
+ help="Ratio of surface points to include (default: 0.1 = 10%%)",
176
+ )
177
+
178
+
179
+ def main(argv: list[str] | None = None) -> int:
180
+ """Main CLI entry point."""
181
+ parser = argparse.ArgumentParser(
182
+ prog="sdf-sampler",
183
+ description="Auto-analysis and sampling of point clouds for SDF training data",
184
+ )
185
+ parser.add_argument(
186
+ "--version", action="store_true", help="Show version and exit"
187
+ )
188
+
189
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
190
+
191
+ # analyze command
192
+ analyze_parser = subparsers.add_parser(
193
+ "analyze",
194
+ help="Analyze point cloud to detect SOLID/EMPTY regions",
195
+ )
196
+ analyze_parser.add_argument(
197
+ "input",
198
+ type=Path,
199
+ help="Input point cloud file (PLY, LAS, NPZ, CSV, Parquet)",
200
+ )
201
+ analyze_parser.add_argument(
202
+ "-o", "--output",
203
+ type=Path,
204
+ default=None,
205
+ help="Output constraints JSON file (default: <input>_constraints.json)",
206
+ )
207
+ analyze_parser.add_argument(
208
+ "-a", "--algorithms",
209
+ type=str,
210
+ nargs="+",
211
+ default=None,
212
+ help="Algorithms to run (flood_fill, voxel_regions, normal_offset, normal_idw, pocket)",
213
+ )
214
+ analyze_parser.add_argument(
215
+ "--no-hull-filter",
216
+ action="store_true",
217
+ help="Disable hull filtering",
218
+ )
219
+ analyze_parser.add_argument(
220
+ "-v", "--verbose",
221
+ action="store_true",
222
+ help="Verbose output",
223
+ )
224
+ add_analysis_options(analyze_parser)
225
+
226
+ # sample command
227
+ sample_parser = subparsers.add_parser(
228
+ "sample",
229
+ help="Generate training samples from constraints",
230
+ )
231
+ sample_parser.add_argument(
232
+ "input",
233
+ type=Path,
234
+ help="Input point cloud file",
235
+ )
236
+ sample_parser.add_argument(
237
+ "constraints",
238
+ type=Path,
239
+ help="Constraints JSON file (from analyze command)",
240
+ )
241
+ sample_parser.add_argument(
242
+ "-o", "--output",
243
+ type=Path,
244
+ default=None,
245
+ help="Output parquet file (default: <input>_samples.parquet)",
246
+ )
247
+ sample_parser.add_argument(
248
+ "-v", "--verbose",
249
+ action="store_true",
250
+ help="Verbose output",
251
+ )
252
+ add_sampling_options(sample_parser)
253
+ add_output_options(sample_parser)
254
+
255
+ # pipeline command
256
+ pipeline_parser = subparsers.add_parser(
257
+ "pipeline",
258
+ help="Full pipeline: analyze + sample + export",
259
+ )
260
+ pipeline_parser.add_argument(
261
+ "input",
262
+ type=Path,
263
+ help="Input point cloud file",
264
+ )
265
+ pipeline_parser.add_argument(
266
+ "-o", "--output",
267
+ type=Path,
268
+ default=None,
269
+ help="Output parquet file (default: <input>_samples.parquet)",
270
+ )
271
+ pipeline_parser.add_argument(
272
+ "-a", "--algorithms",
273
+ type=str,
274
+ nargs="+",
275
+ default=None,
276
+ help="Algorithms to run",
277
+ )
278
+ pipeline_parser.add_argument(
279
+ "--no-hull-filter",
280
+ action="store_true",
281
+ help="Disable hull filtering",
282
+ )
283
+ pipeline_parser.add_argument(
284
+ "--save-constraints",
285
+ type=Path,
286
+ default=None,
287
+ help="Also save constraints to JSON file",
288
+ )
289
+ pipeline_parser.add_argument(
290
+ "-v", "--verbose",
291
+ action="store_true",
292
+ help="Verbose output",
293
+ )
294
+ add_analysis_options(pipeline_parser)
295
+ add_sampling_options(pipeline_parser)
296
+ add_output_options(pipeline_parser)
297
+
298
+ # info command
299
+ info_parser = subparsers.add_parser(
300
+ "info",
301
+ help="Show information about a point cloud or constraints file",
302
+ )
303
+ info_parser.add_argument(
304
+ "input",
305
+ type=Path,
306
+ help="Input file (point cloud or constraints JSON)",
307
+ )
308
+
309
+ args = parser.parse_args(argv)
310
+
311
+ if args.version:
312
+ from sdf_sampler import __version__
313
+ print(f"sdf-sampler {__version__}")
314
+ return 0
315
+
316
+ if args.command is None:
317
+ parser.print_help()
318
+ return 0
319
+
320
+ if args.command == "analyze":
321
+ return cmd_analyze(args)
322
+ elif args.command == "sample":
323
+ return cmd_sample(args)
324
+ elif args.command == "pipeline":
325
+ return cmd_pipeline(args)
326
+ elif args.command == "info":
327
+ return cmd_info(args)
328
+
329
+ return 0
330
+
331
+
332
+ def build_analysis_options(args: argparse.Namespace):
333
+ """Build AutoAnalysisOptions from CLI args."""
334
+ from sdf_sampler.config import AutoAnalysisOptions
335
+
336
+ return AutoAnalysisOptions(
337
+ min_gap_size=args.min_gap_size,
338
+ max_grid_dim=args.max_grid_dim,
339
+ cone_angle=args.cone_angle,
340
+ normal_offset_pairs=args.normal_offset_pairs,
341
+ max_boxes=args.max_boxes,
342
+ overlap_threshold=args.overlap_threshold,
343
+ idw_sample_count=args.idw_sample_count,
344
+ idw_max_distance=args.idw_max_distance,
345
+ idw_power=args.idw_power,
346
+ hull_filter_enabled=not getattr(args, 'no_hull_filter', False),
347
+ hull_alpha=args.hull_alpha,
348
+ flood_fill_output=args.flood_fill_output,
349
+ flood_fill_sample_count=args.flood_fill_samples,
350
+ voxel_regions_output=args.voxel_regions_output,
351
+ voxel_regions_sample_count=args.voxel_regions_samples,
352
+ )
353
+
354
+
355
+ def build_sampler_config(args: argparse.Namespace):
356
+ """Build SamplerConfig from CLI args."""
357
+ from sdf_sampler.config import SamplerConfig
358
+
359
+ return SamplerConfig(
360
+ total_samples=args.total_samples,
361
+ samples_per_primitive=args.samples_per_primitive,
362
+ inverse_square_base_samples=args.inverse_square_base,
363
+ inverse_square_falloff=args.inverse_square_falloff,
364
+ near_band=args.near_band,
365
+ seed=args.seed or 0,
366
+ )
367
+
368
+
369
+ def cmd_analyze(args: argparse.Namespace) -> int:
370
+ """Run analyze command."""
371
+ from sdf_sampler import SDFAnalyzer, load_point_cloud
372
+
373
+ if not args.input.exists():
374
+ print(f"Error: Input file not found: {args.input}", file=sys.stderr)
375
+ return 1
376
+
377
+ output = args.output or args.input.with_suffix(".constraints.json")
378
+
379
+ if args.verbose:
380
+ print(f"Loading point cloud: {args.input}")
381
+
382
+ try:
383
+ xyz, normals = load_point_cloud(str(args.input))
384
+ except Exception as e:
385
+ print(f"Error loading point cloud: {e}", file=sys.stderr)
386
+ return 1
387
+
388
+ if args.verbose:
389
+ print(f" Points: {len(xyz):,}")
390
+ print(f" Normals: {'yes' if normals is not None else 'no'}")
391
+
392
+ options = build_analysis_options(args)
393
+
394
+ if args.verbose:
395
+ algos = args.algorithms or ["all"]
396
+ print(f"Running analysis: {', '.join(algos)}")
397
+
398
+ analyzer = SDFAnalyzer()
399
+ result = analyzer.analyze(
400
+ xyz=xyz,
401
+ normals=normals,
402
+ algorithms=args.algorithms,
403
+ options=options,
404
+ )
405
+
406
+ if args.verbose:
407
+ print(f"Generated {len(result.constraints)} constraints")
408
+ print(f" SOLID: {result.summary.solid_constraints}")
409
+ print(f" EMPTY: {result.summary.empty_constraints}")
410
+
411
+ # Save constraints
412
+ with open(output, "w") as f:
413
+ json.dump(result.constraints, f, indent=2, default=_json_serializer)
414
+
415
+ print(f"Saved constraints to: {output}")
416
+ return 0
417
+
418
+
419
+ def cmd_sample(args: argparse.Namespace) -> int:
420
+ """Run sample command."""
421
+ from sdf_sampler import SDFSampler, load_point_cloud
422
+
423
+ if not args.input.exists():
424
+ print(f"Error: Input file not found: {args.input}", file=sys.stderr)
425
+ return 1
426
+
427
+ if not args.constraints.exists():
428
+ print(f"Error: Constraints file not found: {args.constraints}", file=sys.stderr)
429
+ return 1
430
+
431
+ output = args.output or args.input.with_suffix(".samples.parquet")
432
+
433
+ if args.verbose:
434
+ print(f"Loading point cloud: {args.input}")
435
+
436
+ try:
437
+ xyz, normals = load_point_cloud(str(args.input))
438
+ except Exception as e:
439
+ print(f"Error loading point cloud: {e}", file=sys.stderr)
440
+ return 1
441
+
442
+ if args.verbose:
443
+ print(f"Loading constraints: {args.constraints}")
444
+
445
+ with open(args.constraints) as f:
446
+ constraints = json.load(f)
447
+
448
+ if args.verbose:
449
+ print(f" Constraints: {len(constraints)}")
450
+ print(f"Generating {args.total_samples:,} samples with strategy: {args.strategy}")
451
+
452
+ config = build_sampler_config(args)
453
+ sampler = SDFSampler(config=config)
454
+ samples = sampler.generate(
455
+ xyz=xyz,
456
+ normals=normals,
457
+ constraints=constraints,
458
+ total_samples=args.total_samples,
459
+ strategy=args.strategy,
460
+ seed=args.seed,
461
+ )
462
+
463
+ # Include surface points if requested
464
+ if args.include_surface_points:
465
+ samples = _add_surface_points(
466
+ samples, xyz, normals, args.surface_point_ratio, args.verbose
467
+ )
468
+
469
+ if args.verbose:
470
+ print(f"Generated {len(samples)} samples")
471
+
472
+ sampler.export_parquet(samples, str(output))
473
+ print(f"Saved samples to: {output}")
474
+ return 0
475
+
476
+
477
+ def cmd_pipeline(args: argparse.Namespace) -> int:
478
+ """Run full pipeline: analyze + sample + export."""
479
+ from sdf_sampler import SDFAnalyzer, SDFSampler, load_point_cloud
480
+
481
+ if not args.input.exists():
482
+ print(f"Error: Input file not found: {args.input}", file=sys.stderr)
483
+ return 1
484
+
485
+ output = args.output or args.input.with_suffix(".samples.parquet")
486
+
487
+ if args.verbose:
488
+ print(f"Loading point cloud: {args.input}")
489
+
490
+ try:
491
+ xyz, normals = load_point_cloud(str(args.input))
492
+ except Exception as e:
493
+ print(f"Error loading point cloud: {e}", file=sys.stderr)
494
+ return 1
495
+
496
+ if args.verbose:
497
+ print(f" Points: {len(xyz):,}")
498
+ print(f" Normals: {'yes' if normals is not None else 'no'}")
499
+
500
+ # Analyze
501
+ if args.verbose:
502
+ algos = args.algorithms or ["all"]
503
+ print(f"Running analysis: {', '.join(algos)}")
504
+
505
+ options = build_analysis_options(args)
506
+ analyzer = SDFAnalyzer()
507
+ result = analyzer.analyze(
508
+ xyz=xyz,
509
+ normals=normals,
510
+ algorithms=args.algorithms,
511
+ options=options,
512
+ )
513
+
514
+ if args.verbose:
515
+ print(f"Generated {len(result.constraints)} constraints")
516
+ print(f" SOLID: {result.summary.solid_constraints}")
517
+ print(f" EMPTY: {result.summary.empty_constraints}")
518
+
519
+ # Optionally save constraints
520
+ if args.save_constraints:
521
+ with open(args.save_constraints, "w") as f:
522
+ json.dump(result.constraints, f, indent=2, default=_json_serializer)
523
+ if args.verbose:
524
+ print(f"Saved constraints to: {args.save_constraints}")
525
+
526
+ # Sample
527
+ if args.verbose:
528
+ print(f"Generating {args.total_samples:,} samples with strategy: {args.strategy}")
529
+
530
+ config = build_sampler_config(args)
531
+ sampler = SDFSampler(config=config)
532
+ samples = sampler.generate(
533
+ xyz=xyz,
534
+ normals=normals,
535
+ constraints=result.constraints,
536
+ total_samples=args.total_samples,
537
+ strategy=args.strategy,
538
+ seed=args.seed,
539
+ )
540
+
541
+ # Include surface points if requested
542
+ if args.include_surface_points:
543
+ samples = _add_surface_points(
544
+ samples, xyz, normals, args.surface_point_ratio, args.verbose
545
+ )
546
+
547
+ if args.verbose:
548
+ print(f"Generated {len(samples)} samples")
549
+
550
+ # Export
551
+ sampler.export_parquet(samples, str(output))
552
+ print(f"Saved samples to: {output}")
553
+ return 0
554
+
555
+
556
+ def _add_surface_points(
557
+ samples: list,
558
+ xyz: np.ndarray,
559
+ normals: np.ndarray | None,
560
+ ratio: float,
561
+ verbose: bool,
562
+ ) -> list:
563
+ """Add surface points to sample list."""
564
+ from sdf_sampler.models import TrainingSample
565
+
566
+ n_surface = int(len(xyz) * ratio)
567
+ if n_surface == 0:
568
+ return samples
569
+
570
+ # Subsample if needed
571
+ if n_surface < len(xyz):
572
+ indices = np.random.choice(len(xyz), n_surface, replace=False)
573
+ surface_xyz = xyz[indices]
574
+ surface_normals = normals[indices] if normals is not None else None
575
+ else:
576
+ surface_xyz = xyz
577
+ surface_normals = normals
578
+
579
+ if verbose:
580
+ print(f"Adding {len(surface_xyz):,} surface points (phi=0)")
581
+
582
+ for i in range(len(surface_xyz)):
583
+ sample = TrainingSample(
584
+ x=float(surface_xyz[i, 0]),
585
+ y=float(surface_xyz[i, 1]),
586
+ z=float(surface_xyz[i, 2]),
587
+ phi=0.0,
588
+ weight=1.0,
589
+ source="surface",
590
+ is_surface=True,
591
+ is_free=False,
592
+ )
593
+ if surface_normals is not None:
594
+ sample.nx = float(surface_normals[i, 0])
595
+ sample.ny = float(surface_normals[i, 1])
596
+ sample.nz = float(surface_normals[i, 2])
597
+ samples.append(sample)
598
+
599
+ return samples
600
+
601
+
602
+ def cmd_info(args: argparse.Namespace) -> int:
603
+ """Show information about a file."""
604
+ if not args.input.exists():
605
+ print(f"Error: File not found: {args.input}", file=sys.stderr)
606
+ return 1
607
+
608
+ suffix = args.input.suffix.lower()
609
+
610
+ if suffix == ".json":
611
+ # Constraints file
612
+ with open(args.input) as f:
613
+ constraints = json.load(f)
614
+
615
+ print(f"Constraints file: {args.input}")
616
+ print(f" Total constraints: {len(constraints)}")
617
+
618
+ # Count by type and sign
619
+ by_type: dict[str, int] = {}
620
+ by_sign: dict[str, int] = {}
621
+ for c in constraints:
622
+ ctype = c.get("type", "unknown")
623
+ sign = c.get("sign", "unknown")
624
+ by_type[ctype] = by_type.get(ctype, 0) + 1
625
+ by_sign[sign] = by_sign.get(sign, 0) + 1
626
+
627
+ print(" By type:")
628
+ for t, count in sorted(by_type.items()):
629
+ print(f" {t}: {count}")
630
+ print(" By sign:")
631
+ for s, count in sorted(by_sign.items()):
632
+ print(f" {s}: {count}")
633
+
634
+ elif suffix == ".parquet":
635
+ import pandas as pd
636
+ df = pd.read_parquet(args.input)
637
+
638
+ print(f"Parquet file: {args.input}")
639
+ print(f" Samples: {len(df):,}")
640
+ print(f" Columns: {', '.join(df.columns)}")
641
+
642
+ if "source" in df.columns:
643
+ print(" By source:")
644
+ for source, count in df["source"].value_counts().items():
645
+ print(f" {source}: {count:,}")
646
+
647
+ if "phi" in df.columns:
648
+ print(f" Phi range: [{df['phi'].min():.4f}, {df['phi'].max():.4f}]")
649
+
650
+ else:
651
+ # Point cloud file
652
+ from sdf_sampler import load_point_cloud
653
+
654
+ try:
655
+ xyz, normals = load_point_cloud(str(args.input))
656
+ except Exception as e:
657
+ print(f"Error loading file: {e}", file=sys.stderr)
658
+ return 1
659
+
660
+ print(f"Point cloud: {args.input}")
661
+ print(f" Points: {len(xyz):,}")
662
+ print(f" Normals: {'yes' if normals is not None else 'no'}")
663
+ print(f" Bounds:")
664
+ print(f" X: [{xyz[:, 0].min():.4f}, {xyz[:, 0].max():.4f}]")
665
+ print(f" Y: [{xyz[:, 1].min():.4f}, {xyz[:, 1].max():.4f}]")
666
+ print(f" Z: [{xyz[:, 2].min():.4f}, {xyz[:, 2].max():.4f}]")
667
+
668
+ return 0
669
+
670
+
671
+ def _json_serializer(obj):
672
+ """JSON serializer for numpy types."""
673
+ if isinstance(obj, np.ndarray):
674
+ return obj.tolist()
675
+ if isinstance(obj, (np.integer, np.floating)):
676
+ return obj.item()
677
+ raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
678
+
679
+
680
+ if __name__ == "__main__":
681
+ sys.exit(main())
sdf_sampler/sampler.py CHANGED
@@ -65,6 +65,8 @@ class SDFSampler:
65
65
  total_samples: int | None = None,
66
66
  strategy: str | SamplingStrategy = SamplingStrategy.INVERSE_SQUARE,
67
67
  seed: int | None = None,
68
+ include_surface_points: bool = False,
69
+ surface_point_ratio: float = 0.1,
68
70
  ) -> list[TrainingSample]:
69
71
  """Generate training samples from constraints.
70
72
 
@@ -75,6 +77,8 @@ class SDFSampler:
75
77
  total_samples: Total samples to generate (default from config)
76
78
  strategy: Sampling strategy (CONSTANT, DENSITY, or INVERSE_SQUARE)
77
79
  seed: Random seed for reproducibility
80
+ include_surface_points: If True, include original surface points with phi=0
81
+ surface_point_ratio: Fraction of surface points to include (default 0.1 = 10%)
78
82
 
79
83
  Returns:
80
84
  List of TrainingSample objects
@@ -85,6 +89,7 @@ class SDFSampler:
85
89
  ... constraints=result.constraints,
86
90
  ... strategy="inverse_square",
87
91
  ... total_samples=50000,
92
+ ... include_surface_points=True,
88
93
  ... )
89
94
  """
90
95
  xyz = np.asarray(xyz)
@@ -153,6 +158,63 @@ class SDFSampler:
153
158
  elif isinstance(constraint, SamplePointConstraint):
154
159
  samples.extend(self._sample_sample_point(constraint))
155
160
 
161
+ # Add surface points if requested
162
+ if include_surface_points:
163
+ samples.extend(
164
+ self._generate_surface_points(xyz, normals, surface_point_ratio, rng)
165
+ )
166
+
167
+ return samples
168
+
169
+ def _generate_surface_points(
170
+ self,
171
+ xyz: np.ndarray,
172
+ normals: np.ndarray | None,
173
+ ratio: float,
174
+ rng: np.random.Generator,
175
+ ) -> list[TrainingSample]:
176
+ """Generate surface point samples (phi=0) from the input point cloud.
177
+
178
+ Args:
179
+ xyz: Point cloud positions (N, 3)
180
+ normals: Optional point normals (N, 3)
181
+ ratio: Fraction of points to include (0.0 to 1.0)
182
+ rng: Random number generator
183
+
184
+ Returns:
185
+ List of TrainingSample objects with phi=0
186
+ """
187
+ n_surface = int(len(xyz) * ratio)
188
+ if n_surface == 0:
189
+ return []
190
+
191
+ # Subsample if needed
192
+ if n_surface < len(xyz):
193
+ indices = rng.choice(len(xyz), n_surface, replace=False)
194
+ surface_xyz = xyz[indices]
195
+ surface_normals = normals[indices] if normals is not None else None
196
+ else:
197
+ surface_xyz = xyz
198
+ surface_normals = normals
199
+
200
+ samples = []
201
+ for i in range(len(surface_xyz)):
202
+ sample = TrainingSample(
203
+ x=float(surface_xyz[i, 0]),
204
+ y=float(surface_xyz[i, 1]),
205
+ z=float(surface_xyz[i, 2]),
206
+ phi=0.0,
207
+ weight=1.0,
208
+ source="surface",
209
+ is_surface=True,
210
+ is_free=False,
211
+ )
212
+ if surface_normals is not None:
213
+ sample.nx = float(surface_normals[i, 0])
214
+ sample.ny = float(surface_normals[i, 1])
215
+ sample.nz = float(surface_normals[i, 2])
216
+ samples.append(sample)
217
+
156
218
  return samples
157
219
 
158
220
  def to_dataframe(self, samples: list[TrainingSample]) -> pd.DataFrame:
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdf-sampler
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Auto-analysis and sampling of point clouds for SDF (Signed Distance Field) training data generation
5
- Project-URL: Repository, https://github.com/chiark/sdf-sampler
5
+ Project-URL: Repository, https://github.com/Chiark-Collective/sdf-sampler
6
6
  Author-email: Liam <liam@example.com>
7
7
  License: MIT
8
8
  License-File: LICENSE
@@ -60,7 +60,88 @@ For additional I/O format support (PLY, LAS/LAZ):
60
60
  pip install sdf-sampler[io]
61
61
  ```
62
62
 
63
- ## Quick Start
63
+ ## Command-Line Interface
64
+
65
+ sdf-sampler provides a CLI for common workflows:
66
+
67
+ ```bash
68
+ # Run as module
69
+ python -m sdf_sampler --help
70
+
71
+ # Or use the installed command
72
+ sdf-sampler --help
73
+ ```
74
+
75
+ ### Commands
76
+
77
+ #### `pipeline` - Full workflow (recommended)
78
+
79
+ Run the complete pipeline: analyze point cloud → generate samples → export.
80
+
81
+ ```bash
82
+ # Basic usage
83
+ sdf-sampler pipeline scan.ply -o training_data.parquet
84
+
85
+ # With options
86
+ sdf-sampler pipeline scan.ply \
87
+ -o training_data.parquet \
88
+ -n 50000 \
89
+ -s inverse_square \
90
+ --save-constraints constraints.json \
91
+ -v
92
+ ```
93
+
94
+ Options:
95
+ - `-o, --output`: Output parquet file (default: `<input>_samples.parquet`)
96
+ - `-n, --total-samples`: Number of samples to generate (default: 10000)
97
+ - `-s, --strategy`: Sampling strategy: `constant`, `density`, `inverse_square` (default: `inverse_square`)
98
+ - `-a, --algorithms`: Specific algorithms to run (default: all)
99
+ - `--save-constraints`: Also save constraints to JSON
100
+ - `--seed`: Random seed for reproducibility
101
+ - `-v, --verbose`: Verbose output
102
+
103
+ #### `analyze` - Detect regions
104
+
105
+ Analyze a point cloud to detect SOLID/EMPTY regions.
106
+
107
+ ```bash
108
+ sdf-sampler analyze scan.ply -o constraints.json -v
109
+ ```
110
+
111
+ Options:
112
+ - `-o, --output`: Output JSON file (default: `<input>_constraints.json`)
113
+ - `-a, --algorithms`: Algorithms to run (see below)
114
+ - `--no-hull-filter`: Disable hull filtering
115
+ - `-v, --verbose`: Verbose output
116
+
117
+ #### `sample` - Generate training samples
118
+
119
+ Generate training samples from a constraints file.
120
+
121
+ ```bash
122
+ sdf-sampler sample scan.ply constraints.json -o samples.parquet -n 50000
123
+ ```
124
+
125
+ Options:
126
+ - `-o, --output`: Output parquet file
127
+ - `-n, --total-samples`: Number of samples (default: 10000)
128
+ - `-s, --strategy`: Sampling strategy (default: `inverse_square`)
129
+ - `--seed`: Random seed
130
+ - `-v, --verbose`: Verbose output
131
+
132
+ #### `info` - Inspect files
133
+
134
+ Show information about point clouds, constraints, or sample files.
135
+
136
+ ```bash
137
+ sdf-sampler info scan.ply
138
+ sdf-sampler info constraints.json
139
+ sdf-sampler info samples.parquet
140
+ ```
141
+
142
+ ## Python SDK
143
+
144
+ ### Quick Start
64
145
 
65
146
  ```python
66
147
  from sdf_sampler import SDFAnalyzer, SDFSampler, load_point_cloud
@@ -86,28 +167,13 @@ samples = sampler.generate(
86
167
  sampler.export_parquet(samples, "training_data.parquet")
87
168
  ```
88
169
 
89
- ## Features
90
-
91
- ### Auto-Analysis Algorithms
92
-
93
- - **flood_fill**: Detects EMPTY (outside) regions by ray propagation from sky
94
- - **voxel_regions**: Detects SOLID (underground) regions
95
- - **normal_offset**: Generates paired SOLID/EMPTY boxes along surface normals
96
- - **normal_idw**: Inverse distance weighted sampling along normals
97
- - **pocket**: Detects interior cavities
98
-
99
- ### Sampling Strategies
100
-
101
- - **CONSTANT**: Fixed number of samples per constraint
102
- - **DENSITY**: Samples proportional to constraint volume
103
- - **INVERSE_SQUARE**: More samples near surface, fewer far away (recommended)
104
-
105
- ## API Reference
106
-
107
170
  ### SDFAnalyzer
108
171
 
172
+ Analyzes point clouds to detect SOLID and EMPTY regions.
173
+
109
174
  ```python
110
- from sdf_sampler import SDFAnalyzer, AnalyzerConfig
175
+ from sdf_sampler import SDFAnalyzer
176
+ from sdf_sampler.config import AnalyzerConfig, AutoAnalysisOptions
111
177
 
112
178
  # With default config
113
179
  analyzer = SDFAnalyzer()
@@ -136,10 +202,23 @@ print(f"EMPTY: {result.summary.empty_constraints}")
136
202
  constraints = result.constraints
137
203
  ```
138
204
 
205
+ #### Analysis Algorithms
206
+
207
+ | Algorithm | Description | Output |
208
+ |-----------|-------------|--------|
209
+ | `flood_fill` | Detects EMPTY (outside) regions by ray propagation from sky | Box or SamplePoint constraints |
210
+ | `voxel_regions` | Detects SOLID (underground) regions | Box or SamplePoint constraints |
211
+ | `normal_offset` | Generates paired SOLID/EMPTY boxes along surface normals | Box constraints |
212
+ | `normal_idw` | Inverse distance weighted sampling along normals | SamplePoint constraints |
213
+ | `pocket` | Detects interior cavities | Pocket constraints |
214
+
139
215
  ### SDFSampler
140
216
 
217
+ Generates training samples from constraints.
218
+
141
219
  ```python
142
- from sdf_sampler import SDFSampler, SamplerConfig
220
+ from sdf_sampler import SDFSampler
221
+ from sdf_sampler.config import SamplerConfig
143
222
 
144
223
  # With default config
145
224
  sampler = SDFSampler()
@@ -167,6 +246,14 @@ sampler.export_parquet(samples, "output.parquet")
167
246
  df = sampler.to_dataframe(samples)
168
247
  ```
169
248
 
249
+ #### Sampling Strategies
250
+
251
+ | Strategy | Description |
252
+ |----------|-------------|
253
+ | `constant` | Fixed number of samples per constraint |
254
+ | `density` | Samples proportional to constraint volume |
255
+ | `inverse_square` | More samples near surface, fewer far away (recommended) |
256
+
170
257
  ### Constraint Types
171
258
 
172
259
  The analyzer generates various constraint types:
@@ -180,6 +267,22 @@ Each constraint has:
180
267
  - `sign`: "solid" (negative SDF) or "empty" (positive SDF)
181
268
  - `weight`: Sample weight (default 1.0)
182
269
 
270
+ ### I/O Helpers
271
+
272
+ ```python
273
+ from sdf_sampler import load_point_cloud, export_parquet
274
+
275
+ # Load various formats
276
+ xyz, normals = load_point_cloud("scan.ply") # PLY (requires trimesh)
277
+ xyz, normals = load_point_cloud("scan.las") # LAS/LAZ (requires laspy)
278
+ xyz, normals = load_point_cloud("scan.csv") # CSV with x,y,z columns
279
+ xyz, normals = load_point_cloud("scan.npz") # NumPy archive
280
+ xyz, normals = load_point_cloud("scan.parquet") # Parquet
281
+
282
+ # Export samples
283
+ export_parquet(samples, "output.parquet")
284
+ ```
285
+
183
286
  ## Output Format
184
287
 
185
288
  The exported parquet file contains columns:
@@ -194,32 +297,40 @@ The exported parquet file contains columns:
194
297
  | is_surface | bool | Whether sample is on surface |
195
298
  | is_free | bool | Whether sample is in free space (EMPTY) |
196
299
 
197
- ## Configuration Options
300
+ ## Configuration Reference
198
301
 
199
302
  ### AnalyzerConfig
200
303
 
201
304
  | Option | Default | Description |
202
305
  |--------|---------|-------------|
203
- | min_gap_size | 0.10 | Minimum gap size for flood fill (meters) |
204
- | max_grid_dim | 200 | Maximum voxel grid dimension |
205
- | cone_angle | 15.0 | Ray propagation cone half-angle (degrees) |
206
- | normal_offset_pairs | 40 | Number of box pairs for normal_offset |
207
- | idw_sample_count | 1000 | Total IDW samples |
208
- | idw_max_distance | 0.5 | Maximum IDW distance (meters) |
209
- | hull_filter_enabled | True | Filter outside X-Y alpha shape |
210
- | hull_alpha | 1.0 | Alpha shape parameter |
306
+ | `min_gap_size` | 0.10 | Minimum gap size for flood fill (meters) |
307
+ | `max_grid_dim` | 200 | Maximum voxel grid dimension |
308
+ | `cone_angle` | 15.0 | Ray propagation cone half-angle (degrees) |
309
+ | `normal_offset_pairs` | 40 | Number of box pairs for normal_offset |
310
+ | `idw_sample_count` | 1000 | Total IDW samples |
311
+ | `idw_max_distance` | 0.5 | Maximum IDW distance (meters) |
312
+ | `hull_filter_enabled` | True | Filter outside X-Y alpha shape |
313
+ | `hull_alpha` | 1.0 | Alpha shape parameter |
211
314
 
212
315
  ### SamplerConfig
213
316
 
214
317
  | Option | Default | Description |
215
318
  |--------|---------|-------------|
216
- | total_samples | 10000 | Default total samples |
217
- | samples_per_primitive | 100 | Samples per constraint (CONSTANT) |
218
- | samples_per_cubic_meter | 10000 | Sample density (DENSITY) |
219
- | inverse_square_base_samples | 100 | Base samples (INVERSE_SQUARE) |
220
- | inverse_square_falloff | 2.0 | Falloff exponent |
221
- | near_band | 0.02 | Near-band width |
222
- | seed | 0 | Random seed |
319
+ | `total_samples` | 10000 | Default total samples |
320
+ | `samples_per_primitive` | 100 | Samples per constraint (CONSTANT) |
321
+ | `samples_per_cubic_meter` | 10000 | Sample density (DENSITY) |
322
+ | `inverse_square_base_samples` | 100 | Base samples (INVERSE_SQUARE) |
323
+ | `inverse_square_falloff` | 2.0 | Falloff exponent |
324
+ | `near_band` | 0.02 | Near-band width |
325
+ | `seed` | 0 | Random seed |
326
+
327
+ ## Integration with Ubik
328
+
329
+ sdf-sampler is the core analysis engine for [Ubik](https://github.com/Chiark-Collective/ubik), an interactive web application for SDF labeling. Use sdf-sampler directly for:
330
+
331
+ - Automated batch processing pipelines
332
+ - Integration into ML training workflows
333
+ - Custom analysis scripts
223
334
 
224
335
  ## License
225
336
 
@@ -1,8 +1,10 @@
1
- sdf_sampler/__init__.py,sha256=9LyfRS8RsgknQvp5TKUKKknuQD9Ovm74qlfbxlOvfCE,1891
1
+ sdf_sampler/__init__.py,sha256=Kd5g1IROdKbRTR_gNSO4ff1ddNVP4WZJ30JFUbILa5k,1891
2
+ sdf_sampler/__main__.py,sha256=6N7jJs2Efs1AmM7qXXXN9V-ErFTWpPqKzWTeiujtGhU,490
2
3
  sdf_sampler/analyzer.py,sha256=p5Pkoa01dBGFqqQs2wpWrr8ilPMsjkB4ODJEI2IWbdo,11674
4
+ sdf_sampler/cli.py,sha256=ykW1_cPBV6G1ZtKmq0ZyjgUmVK-ESqk3Ey6SLMRIhSQ,20144
3
5
  sdf_sampler/config.py,sha256=lrPM1ktFkv32RRtOz6R-ShUFlBZ2LvoAl1hRLThmgKw,5185
4
6
  sdf_sampler/io.py,sha256=DfdXJa_2KQhja_T_a-jlVADcAABeF-NR8e3_vnIJHnk,4968
5
- sdf_sampler/sampler.py,sha256=QVrI1TxfnSgQWYcJdRCEQqvFekPvFBUYJfrbjQj-ABY,15551
7
+ sdf_sampler/sampler.py,sha256=3yVT5LHB7-rIxdor-aSdkC6Qew5M-7UvZCKwtIZG0IQ,17744
6
8
  sdf_sampler/algorithms/__init__.py,sha256=pp7tSZ8q0zRXZ5S8D3tho7bJ62pmW8jceuuRXtiXIzU,777
7
9
  sdf_sampler/algorithms/flood_fill.py,sha256=iWGPPtOPSs0Cg7pxUlXUKGloFNCr8PuCHguRNy3c56c,7042
8
10
  sdf_sampler/algorithms/normal_idw.py,sha256=uX3MQDTDX0wVilwxDE9dFj9hm2xuBDHV-AZqspjz7sk,3270
@@ -19,7 +21,8 @@ sdf_sampler/sampling/box.py,sha256=qRAumR1z_7vU9rfNvk-B6xNu62UeHSyQNghYiTwVR_Y,3
19
21
  sdf_sampler/sampling/brush.py,sha256=CcAgOYLdYXMM3y_H4fIwyzRJ8PZivFxkUHP7d0ElpNM,1991
20
22
  sdf_sampler/sampling/ray_carve.py,sha256=EsfzEGk33q0iWVzOJKDAJi2iWEsY-JZXmEfEZ0dmNdg,4444
21
23
  sdf_sampler/sampling/sphere.py,sha256=Xqpwq-RcEnAD6HhoyIC-ErxRHDknDKMtYf6aWUJ43_U,1680
22
- sdf_sampler-0.1.0.dist-info/METADATA,sha256=CygvgjodkmWtCsBMMXz1MLEGufR1XGZUSabpA7dII-w,7165
23
- sdf_sampler-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
24
- sdf_sampler-0.1.0.dist-info/licenses/LICENSE,sha256=eeB8aLnEG-dgFYs2KqfMJaP52GFQT8sZPHwaYnHRW8E,1061
25
- sdf_sampler-0.1.0.dist-info/RECORD,,
24
+ sdf_sampler-0.3.0.dist-info/METADATA,sha256=MU48zB7pLEhlGEO5ZTDcfP0l7DBXH3u3_iZI0oZJ4FU,10481
25
+ sdf_sampler-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
26
+ sdf_sampler-0.3.0.dist-info/entry_points.txt,sha256=2IMWFbDYEqVUkpiRF1BlRMOhipeirPJSbv5PIOIZrvA,53
27
+ sdf_sampler-0.3.0.dist-info/licenses/LICENSE,sha256=eeB8aLnEG-dgFYs2KqfMJaP52GFQT8sZPHwaYnHRW8E,1061
28
+ sdf_sampler-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sdf-sampler = sdf_sampler.cli:main