PyPI - sdf-sampler - Versions diffs - 0.3.0__tar.gz → 0.4.0__tar.gz - Mend

sdf-sampler 0.3.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/CHANGELOG.md RENAMED Viewed

@@ -5,6 +5,15 @@ All notable changes to sdf-sampler will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.4.0] - 2025-01-30
+### Changed
+- **Default algorithms no longer include `normal_idw`** - The `normal_idw` algorithm is now opt-in only. Default algorithms are: `flood_fill`, `voxel_regions`, `normal_offset`. To use `normal_idw`, explicitly pass `algorithms=["normal_idw"]` or include it in your algorithm list.
+- **Surface point count is now a direct count** - Replaced `surface_point_ratio` with `surface_point_count`. Instead of specifying a percentage, you now specify the exact number of surface points to include.
+  - CLI: `--surface-point-count 1000` (default: 1000)
+  - SDK: `sampler.generate(..., include_surface_points=True, surface_point_count=1000)`
 ## [0.3.0] - 2025-01-29
 ### Added
@@ -15,8 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - Output mode control: `--flood-fill-output`, `--voxel-regions-output` (boxes/samples/both)
 - **Surface point inclusion**
   - `--include-surface-points` flag to include original points with phi=0
-  - `--surface-point-ratio` to control fraction included (default 10%)
-  - SDK: `sampler.generate(..., include_surface_points=True, surface_point_ratio=0.1)`
+  - `--surface-point-count` to specify number of surface points (default 1000)
+  - SDK: `sampler.generate(..., include_surface_points=True, surface_point_count=1000)`
 ## [0.2.0] - 2025-01-29

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sdf-sampler
-Version: 0.3.0
+Version: 0.4.0
 Summary: Auto-analysis and sampling of point clouds for SDF (Signed Distance Field) training data generation
 Project-URL: Repository, https://github.com/Chiark-Collective/sdf-sampler
 Author-email: Liam <liam@example.com>

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "sdf-sampler"
-version = "0.3.0"
+version = "0.4.0"
 description = "Auto-analysis and sampling of point clouds for SDF (Signed Distance Field) training data generation"
 readme = "README.md"
 license = { text = "MIT" }

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/src/sdf_sampler/__init__.py RENAMED Viewed

@@ -47,7 +47,7 @@ from sdf_sampler.models import (
 )
 from sdf_sampler.sampler import SDFSampler
-__version__ = "0.3.0"
+__version__ = "0.4.0"
 __all__ = [
     # Main classes

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/src/sdf_sampler/analyzer.py RENAMED Viewed

@@ -14,6 +14,7 @@ from sdf_sampler.algorithms.voxel_regions import generate_voxel_region_constrain
 from sdf_sampler.config import AnalyzerConfig, AutoAnalysisOptions
 from sdf_sampler.models.analysis import (
     ALL_ALGORITHMS,
+    DEFAULT_ALGORITHMS,
     AlgorithmStats,
     AlgorithmType,
     AnalysisResult,
@@ -93,7 +94,7 @@ class SDFAnalyzer:
                 raise ValueError(f"normals shape {normals.shape} doesn't match xyz {xyz.shape}")
         # Determine which algorithms to run
-        algo_list = algorithms if algorithms else [a.value for a in ALL_ALGORITHMS]
+        algo_list = algorithms if algorithms else [a.value for a in DEFAULT_ALGORITHMS]
         algo_list = [a for a in algo_list if a in [alg.value for alg in ALL_ALGORITHMS]]
         # Run algorithms and collect constraints

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/src/sdf_sampler/cli.py RENAMED Viewed

@@ -169,10 +169,10 @@ def add_output_options(parser: argparse.ArgumentParser) -> None:
         help="Include original surface points (phi=0) in output",
     )
     group.add_argument(
-        "--surface-point-ratio",
-        type=float,
-        default=0.1,
-        help="Ratio of surface points to include (default: 0.1 = 10%%)",
+        "--surface-point-count",
+        type=int,
+        default=1000,
+        help="Number of surface points to include (default: 1000)",
     )
@@ -463,7 +463,7 @@ def cmd_sample(args: argparse.Namespace) -> int:
     # Include surface points if requested
     if args.include_surface_points:
         samples = _add_surface_points(
-            samples, xyz, normals, args.surface_point_ratio, args.verbose
+            samples, xyz, normals, args.surface_point_count, args.verbose
         )
     if args.verbose:
@@ -541,7 +541,7 @@ def cmd_pipeline(args: argparse.Namespace) -> int:
     # Include surface points if requested
     if args.include_surface_points:
         samples = _add_surface_points(
-            samples, xyz, normals, args.surface_point_ratio, args.verbose
+            samples, xyz, normals, args.surface_point_count, args.verbose
         )
     if args.verbose:
@@ -557,14 +557,14 @@ def _add_surface_points(
     samples: list,
     xyz: np.ndarray,
     normals: np.ndarray | None,
-    ratio: float,
+    count: int,
     verbose: bool,
 ) -> list:
     """Add surface points to sample list."""
     from sdf_sampler.models import TrainingSample
-    n_surface = int(len(xyz) * ratio)
-    if n_surface == 0:
+    n_surface = min(count, len(xyz))
+    if n_surface <= 0:
         return samples
     # Subsample if needed

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/src/sdf_sampler/models/analysis.py RENAMED Viewed

@@ -26,6 +26,13 @@ ALL_ALGORITHMS = [
     AlgorithmType.NORMAL_IDW,
 ]
+# Default algorithms (excludes normal_idw which is opt-in)
+DEFAULT_ALGORITHMS = [
+    AlgorithmType.FLOOD_FILL,
+    AlgorithmType.VOXEL_REGIONS,
+    AlgorithmType.NORMAL_OFFSET,
+]
 class GeneratedConstraint(BaseModel):
     """A constraint generated by auto-analysis.

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/src/sdf_sampler/sampler.py RENAMED Viewed

@@ -66,7 +66,7 @@ class SDFSampler:
         strategy: str | SamplingStrategy = SamplingStrategy.INVERSE_SQUARE,
         seed: int | None = None,
         include_surface_points: bool = False,
-        surface_point_ratio: float = 0.1,
+        surface_point_count: int | None = None,
     ) -> list[TrainingSample]:
         """Generate training samples from constraints.
@@ -78,7 +78,7 @@ class SDFSampler:
             strategy: Sampling strategy (CONSTANT, DENSITY, or INVERSE_SQUARE)
             seed: Random seed for reproducibility
             include_surface_points: If True, include original surface points with phi=0
-            surface_point_ratio: Fraction of surface points to include (default 0.1 = 10%)
+            surface_point_count: Number of surface points to include (default: 1000, or len(xyz) if smaller)
         Returns:
             List of TrainingSample objects
@@ -160,8 +160,10 @@ class SDFSampler:
         # Add surface points if requested
         if include_surface_points:
+            # Default to 1000 surface points, or all points if smaller
+            count = surface_point_count if surface_point_count is not None else min(1000, len(xyz))
             samples.extend(
-                self._generate_surface_points(xyz, normals, surface_point_ratio, rng)
+                self._generate_surface_points(xyz, normals, count, rng)
             )
         return samples
@@ -170,7 +172,7 @@ class SDFSampler:
         self,
         xyz: np.ndarray,
         normals: np.ndarray | None,
-        ratio: float,
+        count: int,
         rng: np.random.Generator,
     ) -> list[TrainingSample]:
         """Generate surface point samples (phi=0) from the input point cloud.
@@ -178,14 +180,14 @@ class SDFSampler:
         Args:
             xyz: Point cloud positions (N, 3)
             normals: Optional point normals (N, 3)
-            ratio: Fraction of points to include (0.0 to 1.0)
+            count: Number of surface points to include
             rng: Random number generator
         Returns:
             List of TrainingSample objects with phi=0
         """
-        n_surface = int(len(xyz) * ratio)
-        if n_surface == 0:
+        n_surface = min(count, len(xyz))
+        if n_surface <= 0:
             return []
         # Subsample if needed

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/tests/test_equivalence.py RENAMED Viewed

@@ -759,3 +759,120 @@ class TestFullPipelineEquivalence:
                 f"Sample count ratio too high: {ratio:.2f} "
                 f"(standalone={standalone_sample_count}, backend={backend_sample_count})"
             )
+    @requires_backend
+    def test_inverse_square_pipeline_equivalence(self, trench_pointcloud):
+        """Test inverse_square sampling produces equivalent results.
+        This is the recommended production workflow: auto-analyze + inverse_square sampling.
+        """
+        xyz, normals = trench_pointcloud
+        # Shared analysis options
+        analysis_options = AutoAnalysisOptions(
+            flood_fill_output="samples",
+            flood_fill_sample_count=100,
+            voxel_regions_output="samples",
+            voxel_regions_sample_count=100,
+            idw_sample_count=100,
+            hull_filter_enabled=False,
+        )
+        # Run standalone with inverse_square
+        standalone_analyzer = SDFAnalyzer()
+        standalone_result = standalone_analyzer.analyze(
+            xyz=xyz,
+            normals=normals,
+            algorithms=["flood_fill", "voxel_regions", "normal_idw"],
+            options=analysis_options,
+        )
+        standalone_sampler = SDFSampler()
+        standalone_samples = standalone_sampler.generate(
+            xyz=xyz,
+            normals=normals,
+            constraints=standalone_result.constraints,
+            strategy="inverse_square",
+            total_samples=5000,
+            seed=42,
+            include_surface_points=False,  # Test without surface points first
+        )
+        # Run backend with inverse_square
+        from sdf_labeler_api.config import Settings
+        from sdf_labeler_api.services.auto_analysis_service import AutoAnalysisService
+        from sdf_labeler_api.services.sampling_service import SamplingService
+        from sdf_labeler_api.services.project_service import ProjectService
+        from sdf_labeler_api.services.constraint_service import ConstraintService
+        from sdf_labeler_api.models.project import ProjectCreate
+        from sdf_labeler_api.models.samples import SampleGenerationRequest, SamplingStrategy
+        with tempfile.TemporaryDirectory() as tmpdir:
+            data_dir = Path(tmpdir)
+            import sdf_labeler_api.config as backend_config
+            original_settings = backend_config.settings
+            backend_config.settings = Settings(data_dir=data_dir)
+            try:
+                project_service = ProjectService(data_dir)
+                project = project_service.create(ProjectCreate(name="test"))
+                project_id = project.id
+                setup_backend_project(data_dir, project_id, xyz, normals)
+                # Analyze
+                backend_analysis = AutoAnalysisService(backend_config.settings)
+                backend_options = get_backend_options(analysis_options)
+                backend_result = asyncio.run(backend_analysis.analyze(
+                    project_id=project_id,
+                    algorithms=["flood_fill", "voxel_regions", "normal_idw"],
+                    recompute=True,
+                    options=backend_options,
+                ))
+                # Add constraints to project
+                constraint_service = ConstraintService()
+                for gc in backend_result.generated_constraints:
+                    constraint_service.add_from_dict(project_id, gc.constraint)
+                # Sample with inverse_square
+                sampling_service = SamplingService()
+                request = SampleGenerationRequest(
+                    total_samples=5000,
+                    strategy=SamplingStrategy.INVERSE_SQUARE,
+                    seed=42,
+                )
+                backend_sample_result = sampling_service.generate(project_id, request)
+                backend_samples = backend_sample_result.samples
+            finally:
+                backend_config.settings = original_settings
+        # Compare results
+        print(f"\nInverse square pipeline comparison:")
+        print(f"  Standalone constraints: {len(standalone_result.constraints)}")
+        print(f"  Backend constraints: {len(backend_result.generated_constraints)}")
+        print(f"  Standalone samples: {len(standalone_samples)}")
+        print(f"  Backend samples: {len(backend_samples)}")
+        # Verify phi distribution is similar (more samples near 0)
+        standalone_near_surface = sum(1 for s in standalone_samples if abs(s.phi) < 0.1)
+        backend_near_surface = sum(1 for s in backend_samples if abs(s.phi) < 0.1)
+        print(f"  Standalone near-surface (|phi|<0.1): {standalone_near_surface}")
+        print(f"  Backend near-surface (|phi|<0.1): {backend_near_surface}")
+        # Both should have majority of samples near surface (inverse_square characteristic)
+        standalone_ratio = standalone_near_surface / len(standalone_samples) if standalone_samples else 0
+        backend_ratio = backend_near_surface / len(backend_samples) if backend_samples else 0
+        assert standalone_ratio > 0.3, f"Standalone should have >30% near-surface, got {standalone_ratio:.1%}"
+        assert backend_ratio > 0.3, f"Backend should have >30% near-surface, got {backend_ratio:.1%}"
+        # Ratios should be similar
+        if standalone_ratio > 0 and backend_ratio > 0:
+            ratio_diff = abs(standalone_ratio - backend_ratio)
+            assert ratio_diff < 0.2, (
+                f"Near-surface ratio difference too high: {ratio_diff:.1%} "
+                f"(standalone={standalone_ratio:.1%}, backend={backend_ratio:.1%})"
+            )

{sdf_sampler-0.3.0 → sdf_sampler-0.4.0}/uv.lock RENAMED Viewed

@@ -855,7 +855,7 @@ wheels = [
 [[package]]
 name = "sdf-sampler"
-version = "0.2.0"
+version = "0.3.0"
 source = { editable = "." }
 dependencies = [
     { name = "alphashape" },