datacompose 0.2.5.2__tar.gz → 0.2.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacompose might be problematic. Click here for more details.

Files changed (101) hide show
  1. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/CHANGELOG.md +41 -0
  2. {datacompose-0.2.5.2/datacompose.egg-info → datacompose-0.2.6.0}/PKG-INFO +1 -1
  3. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/__init__.py +1 -1
  4. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/operators/__init__.py +1 -1
  5. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/operators/primitives.py +57 -19
  6. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/phone_numbers/pyspark/pyspark_primitives.py +57 -57
  7. {datacompose-0.2.5.2 → datacompose-0.2.6.0/datacompose.egg-info}/PKG-INFO +1 -1
  8. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose.egg-info/SOURCES.txt +3 -8
  9. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/pyproject.toml +1 -1
  10. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_config.py +56 -57
  11. datacompose-0.2.6.0/tests/unit/operators/test_compose_conditions.py +594 -0
  12. datacompose-0.2.6.0/tests/unit/operators/test_conditional_auto_detection.py +192 -0
  13. datacompose-0.2.6.0/tests/unit/operators/test_conditional_core.py +733 -0
  14. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/operators/test_conditional_real_world.py +142 -7
  15. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/operators/test_operators.py +9 -9
  16. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/operators/test_primitives_complete.py +67 -65
  17. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_phone_numbers/test_phone_extraction.py +69 -50
  18. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_phone_numbers/test_phone_formatting.py +42 -18
  19. datacompose-0.2.5.2/tests/unit/operators/conditional_tests_common.py +0 -26
  20. datacompose-0.2.5.2/tests/unit/operators/conftest.py +0 -61
  21. datacompose-0.2.5.2/tests/unit/operators/test_conditional_complex_logic.py +0 -200
  22. datacompose-0.2.5.2/tests/unit/operators/test_conditional_data_driven.py +0 -117
  23. datacompose-0.2.5.2/tests/unit/operators/test_conditional_edge_cases.py +0 -150
  24. datacompose-0.2.5.2/tests/unit/operators/test_conditional_error_handling.py +0 -67
  25. datacompose-0.2.5.2/tests/unit/operators/test_conditional_parameters.py +0 -94
  26. datacompose-0.2.5.2/tests/unit/operators/test_conditional_performance.py +0 -106
  27. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/LICENSE +0 -0
  28. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/MANIFEST.in +0 -0
  29. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/README.md +0 -0
  30. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/__init__.py +0 -0
  31. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/colors.py +0 -0
  32. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/commands/__init__.py +0 -0
  33. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/commands/add.py +0 -0
  34. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/commands/init.py +0 -0
  35. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/commands/list.py +0 -0
  36. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/config.py +0 -0
  37. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/main.py +0 -0
  38. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/cli/validation.py +0 -0
  39. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/generators/__init__.py +0 -0
  40. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/generators/base.py +0 -0
  41. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/generators/pyspark/__init__.py +0 -0
  42. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/generators/pyspark/generator.py +0 -0
  43. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/__init__.py +0 -0
  44. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/discovery.py +0 -0
  45. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/__init__.py +0 -0
  46. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/addresses/__init__.py +0 -0
  47. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/addresses/pyspark/pyspark_primitives.py +0 -0
  48. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/emails/__init__.py +0 -0
  49. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/emails/pyspark/pyspark_primitives.py +0 -0
  50. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose/transformers/text/phone_numbers/__init__.py +0 -0
  51. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose.egg-info/dependency_links.txt +0 -0
  52. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose.egg-info/entry_points.txt +0 -0
  53. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose.egg-info/requires.txt +0 -0
  54. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/datacompose.egg-info/top_level.txt +0 -0
  55. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/setup.cfg +0 -0
  56. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/__init__.py +0 -0
  57. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/integration/__init__.py +0 -0
  58. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/integration/test_end_to_end.py +0 -0
  59. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/integration/test_full_workflow.py +0 -0
  60. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/integration/test_generated_imports.py +0 -0
  61. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/.venv/bin/activate_this.py +0 -0
  62. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/.venv/lib/python3.12/site-packages/_virtualenv.py +0 -0
  63. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/__init__.py +0 -0
  64. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/__init__.py +0 -0
  65. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/postgres/__init__.py +0 -0
  66. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/postgres/clean_emails/__init__.py +0 -0
  67. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/postgres/clean_emails/email_cleaner_udf_spec.yaml +0 -0
  68. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/postgres/clean_emails/test_email_cleaner_udf.py +0 -0
  69. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/spark/__init__.py +0 -0
  70. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/spark/clean_emails/__init__.py +0 -0
  71. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/spark/clean_emails/email_cleaner_udf.py +0 -0
  72. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/spark/clean_emails/email_cleaner_udf_spec.yaml +0 -0
  73. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/build/spark/clean_emails/test_email_cleaner_udf.py +0 -0
  74. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_add_command.py +0 -0
  75. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_add_command_complete.py +0 -0
  76. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_add_default_target.py +0 -0
  77. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_add_validation.py +0 -0
  78. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_init_command.py +0 -0
  79. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_init_command_complete.py +0 -0
  80. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_list_command.py +0 -0
  81. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_main.py +0 -0
  82. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_main_complete.py +0 -0
  83. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/cli/test_validation_complete.py +0 -0
  84. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/generators/__init__.py +0 -0
  85. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/generators/test_base_generator.py +0 -0
  86. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/generators/test_spark_generator.py +0 -0
  87. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/__init__.py +0 -0
  88. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/test_discovery.py +0 -0
  89. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/common/test_common.py +0 -0
  90. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_building_unit_extraction.py +0 -0
  91. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_city_state_extraction.py +0 -0
  92. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_clean_addresses.py +0 -0
  93. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_country_extraction.py +0 -0
  94. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_data_addresses.py +0 -0
  95. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_po_box_extraction.py +0 -0
  96. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_street_extraction.py +0 -0
  97. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_addresses/test_zip_code_extraction.py +0 -0
  98. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_emails/test_debug_long_emails.py +0 -0
  99. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_emails/test_email_extraction.py +0 -0
  100. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/unit/transformers/text/test_emails/test_email_optimized.py +0 -0
  101. {datacompose-0.2.5.2 → datacompose-0.2.6.0}/tests/yaml_specs/__init__.py +0 -0
@@ -7,6 +7,47 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.2.6.0] - 2025-08-24
11
+
12
+ ### Added
13
+ - **Automatic Conditional Detection**: Smart detection of conditional operators based on naming patterns
14
+ - Functions starting with `is_`, `has_`, `needs_`, `should_`, `can_`, `contains_`, `matches_`, `equals_`, `starts_with_`, `ends_with_` are automatically detected as conditionals
15
+ - Eliminates need for explicit `is_conditional=True` in most cases
16
+ - Explicit override still available when needed via `is_conditional` parameter
17
+ - **Phone Number Processing Pipeline**: Complete phone number validation and formatting example
18
+ - Letter-to-number conversion (1-800-FLOWERS)
19
+ - NANP validation and formatting
20
+ - Toll-free number detection
21
+ - E.164 and parentheses formatting
22
+
23
+ ### Changed
24
+ - **Conditional Operator Registration**: `is_conditional` parameter now optional with smart defaults
25
+ - **Test Organization**: Consolidated conditional tests into three focused files:
26
+ - `test_conditional_core.py` - Core functionality, logic, errors, parameters, and performance
27
+ - `test_conditional_real_world.py` - Real-world pipeline scenarios
28
+ - `test_conditional_auto_detection.py` - Auto-detection feature tests
29
+
30
+ ### Fixed
31
+ - **Phone Number Validation**: Updated NANP validation to be more flexible for testing scenarios
32
+
33
+ ## [0.2.5.3] - 2025-08-23
34
+
35
+ ### Added
36
+ - **Compose Decorator Enhancement**: Auto-detection of PrimitiveRegistry instances in function globals
37
+ - Compose decorator now automatically discovers all namespace instances without explicit passing
38
+ - Improved namespace resolution using function's global scope instead of module globals
39
+ - Better support for multiple namespaces in composed functions
40
+
41
+ ### Fixed
42
+ - **Namespace Resolution**: Fixed global namespace lookups to use function's own globals
43
+ - PipelineCompiler now correctly resolves namespaces from the decorated function's scope
44
+ - Fallback compose mode uses function globals for namespace discovery
45
+ - Prevents namespace resolution errors when registries are defined in different modules
46
+
47
+ ### Changed
48
+ - **Phone Number Tests**: Updated test imports and formatting for phone number primitives
49
+ - **Test Organization**: Added comprehensive conditional composition tests
50
+
10
51
  ## [0.2.5.2] - 2025-08-22
11
52
 
12
53
  ### Fixed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacompose
3
- Version: 0.2.5.2
3
+ Version: 0.2.6.0
4
4
  Summary: Copy-pasteable data transformation primitives for PySpark. Inspired by shadcn-svelte.
5
5
  Author: Datacompose Contributors
6
6
  Maintainer: Datacompose Contributors
@@ -2,4 +2,4 @@
2
2
  Datacompose CLI - Command-line interface for generating data cleaning UDFs.
3
3
  """
4
4
 
5
- __version__ = "0.2.4"
5
+ __version__ = "0.2.6.0"
@@ -18,4 +18,4 @@ __all__ = [
18
18
  "PrimitiveRegistry",
19
19
  ]
20
20
 
21
- __version__ = "0.2.4"
21
+ __version__ = "0.2.6.0"
@@ -16,9 +16,13 @@ from typing import Any, Callable, Dict, List, Optional, Sequence
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
18
  try:
19
- from pyspark.sql import Column # type: ignore
19
+ from pyspark.sql import Column
20
+ from pyspark.sql import functions as F
20
21
  except ImportError:
21
- pass
22
+ logging.debug("PySpark not available")
23
+
24
+ # Set up module logger
25
+ logger = logging.getLogger(__name__)
22
26
 
23
27
 
24
28
  class SmartPrimitive:
@@ -120,11 +124,15 @@ class PrimitiveRegistry:
120
124
  self._primitives = {}
121
125
  self._conditionals = {}
122
126
 
123
- def register(self, name: Optional[str] = None, is_conditional: bool = False):
127
+ def register(
128
+ self, name: Optional[str] = None, is_conditional: Optional[bool] = None
129
+ ):
124
130
  """Decorator to register a function as a SmartPrimitive in this namespace.
125
131
 
126
132
  Args:
127
133
  name: Optional name for the primitive (defaults to function name)
134
+ is_conditional: Optional flag to mark as conditional. If None, auto-detects
135
+ based on function name patterns.
128
136
 
129
137
  Returns:
130
138
  Decorator function that wraps the target function as a SmartPrimitive
@@ -139,7 +147,29 @@ class PrimitiveRegistry:
139
147
  def decorator(func: Callable):
140
148
  primitive_name = name or func.__name__
141
149
 
142
- if is_conditional:
150
+ # Auto-detect conditional if not explicitly specified
151
+ if is_conditional is None:
152
+ # Check common naming patterns for conditional functions
153
+ conditional_patterns = [
154
+ "is_",
155
+ "has_",
156
+ "needs_",
157
+ "should_",
158
+ "can_",
159
+ "contains_",
160
+ "matches_",
161
+ "equals_",
162
+ "starts_with_",
163
+ "ends_with_",
164
+ ]
165
+ is_conditional_auto = any(
166
+ primitive_name.startswith(pattern)
167
+ for pattern in conditional_patterns
168
+ )
169
+ else:
170
+ is_conditional_auto = is_conditional
171
+
172
+ if is_conditional_auto:
143
173
  self._conditionals[primitive_name] = SmartPrimitive(
144
174
  func, primitive_name
145
175
  )
@@ -217,9 +247,17 @@ class PrimitiveRegistry:
217
247
  pipeline.__doc__ = func.__doc__
218
248
  return pipeline
219
249
 
250
+ # Auto-detect ALL namespace instances from func.__globals__
251
+ # This allows using multiple namespaces without explicitly passing them
252
+ for var_name, var_value in func.__globals__.items():
253
+ if isinstance(var_value, PrimitiveRegistry):
254
+ # Found a namespace instance
255
+ if var_name not in namespaces:
256
+ namespaces[var_name] = var_value
257
+
220
258
  # Try to get the function as a string and parse it
221
259
  try:
222
- compiler = PipelineCompiler(namespaces, debug)
260
+ compiler = PipelineCompiler(namespaces, debug, func.__globals__)
223
261
  pipeline = compiler.compile(func)
224
262
 
225
263
  if debug and pipeline.steps:
@@ -270,7 +308,11 @@ def _fallback_compose(func: Callable, namespaces: Dict, debug: bool) -> Callable
270
308
  method_name = node.value.func.attr
271
309
  namespace = (
272
310
  namespaces.get(namespace_name) if namespace_name else None
273
- ) or (globals().get(namespace_name) if namespace_name else None)
311
+ ) or (
312
+ func.__globals__.get(namespace_name)
313
+ if namespace_name
314
+ else None
315
+ )
274
316
  if namespace and hasattr(namespace, method_name):
275
317
  method = getattr(namespace, method_name)
276
318
 
@@ -312,16 +354,6 @@ def _fallback_compose(func: Callable, namespaces: Dict, debug: bool) -> Callable
312
354
  return pipeline
313
355
 
314
356
 
315
- try:
316
- from pyspark.sql import Column
317
- from pyspark.sql import functions as F
318
- except ImportError:
319
- logging.debug("PySpark not available")
320
-
321
- # Set up module logger
322
- logger = logging.getLogger(__name__)
323
-
324
-
325
357
  @dataclass
326
358
  class CompiledStep:
327
359
  """A compiled pipeline step"""
@@ -452,9 +484,15 @@ class StablePipeline:
452
484
 
453
485
 
454
486
  class PipelineCompiler:
455
- def __init__(self, namespaces: Dict[str, Any], debug: bool = False):
487
+ def __init__(
488
+ self,
489
+ namespaces: Dict[str, Any],
490
+ debug: bool = False,
491
+ func_globals: Optional[Dict] = None,
492
+ ):
456
493
  self.namespaces = namespaces
457
494
  self.debug = debug
495
+ self.func_globals = func_globals or {}
458
496
 
459
497
  def compile(self, func: Callable) -> StablePipeline:
460
498
  try:
@@ -530,7 +568,7 @@ class PipelineCompiler:
530
568
 
531
569
  namespace = (
532
570
  self.namespaces.get(namespace_name) if namespace_name else None
533
- ) or (globals().get(namespace_name) if namespace_name else None)
571
+ ) or (self.func_globals.get(namespace_name) if namespace_name else None)
534
572
  if namespace and hasattr(namespace, method_name):
535
573
  method = getattr(namespace, method_name)
536
574
 
@@ -552,7 +590,7 @@ class PipelineCompiler:
552
590
 
553
591
  namespace = (
554
592
  self.namespaces.get(namespace_name) if namespace_name else None
555
- ) or (globals().get(namespace_name) if namespace_name else None)
593
+ ) or (self.func_globals.get(namespace_name) if namespace_name else None)
556
594
  if namespace and hasattr(namespace, method_name):
557
595
  method = getattr(namespace, method_name)
558
596
 
@@ -3,7 +3,7 @@ Phone number transformation primitives for PySpark.
3
3
 
4
4
  Preview Output:
5
5
  +------------------------+----------------+--------+---------+------------+-------+---------+------------+
6
- |phone |standardized |is_valid|area_code|local_number|has_ext|extension|is_toll_free|
6
+ |phone_numbers |standardized |is_valid|area_code|local_number|has_ext|extension|is_toll_free|
7
7
  +------------------------+----------------+--------+---------+------------+-------+---------+------------+
8
8
  | (555) 123-4567 |(555) 123-4567 |true |555 |1234567 |false |null |false |
9
9
  |+1-800-555-1234 |+1 800-555-1234 |true |800 |5551234 |false |null |true |
@@ -29,23 +29,23 @@ data = [
29
29
  ("123-45-67",),
30
30
  ("1-800-FLOWERS",),
31
31
  ]
32
- df = spark.createDataFrame(data, ["phone"])
32
+ df = spark.createDataFrame(data, ["phone_numbers"])
33
33
 
34
34
  # Apply transformations
35
35
  result_df = df.select(
36
- F.col("phone"),
37
- phone_numbers.standardize_phone(F.col("phone")).alias("standardized"),
38
- phone_numbers.is_valid_phone(F.col("phone")).alias("is_valid"),
36
+ F.col("phone_numbers"),
37
+ phone_numbers.standardize_phone_numbers(F.col("phone_numbers")).alias("standardized"),
38
+ phone_numbers.is_valid_phone_numbers(F.col("phone_numbers")).alias("is_valid"),
39
39
  phone_numbers.extract_area_code(
40
- phone_numbers.standardize_phone(F.col("phone"))
40
+ phone_numbers.standardize_phone_numbers(F.col("phone_numbers"))
41
41
  ).alias("area_code"),
42
42
  phone_numbers.extract_local_number(
43
- phone_numbers.standardize_phone(F.col("phone"))
43
+ phone_numbers.standardize_phone_numbers(F.col("phone_numbers"))
44
44
  ).alias("local_number"),
45
- phone_numbers.has_extension(F.col("phone")).alias("has_ext"),
46
- phone_numbers.extract_extension(F.col("phone")).alias("extension"),
45
+ phone_numbers.has_extension(F.col("phone_numbers")).alias("has_ext"),
46
+ phone_numbers.extract_extension(F.col("phone_numbers")).alias("extension"),
47
47
  phone_numbers.is_toll_free(
48
- phone_numbers.standardize_phone(F.col("phone"))
48
+ phone_numbers.standardize_phone_numbers(F.col("phone_numbers"))
49
49
  ).alias("is_toll_free")
50
50
  )
51
51
 
@@ -118,7 +118,7 @@ PHONE_KEYPAD_MAPPING = {
118
118
 
119
119
 
120
120
  @phone_numbers.register()
121
- def extract_phone_from_text(col: Column) -> Column:
121
+ def extract_phone_numbers_from_text(col: Column) -> Column:
122
122
  """
123
123
  Extract first phone number from text using regex patterns.
124
124
 
@@ -126,21 +126,21 @@ def extract_phone_from_text(col: Column) -> Column:
126
126
  col: Column containing text with potential phone numbers
127
127
 
128
128
  Returns:
129
- Column with extracted phone number or empty string
129
+ Column with extracted phone numbers or empty string
130
130
  """
131
- # Comprehensive phone pattern that matches various formats
131
+ # Comprehensive phone_numbers pattern that matches various formats
132
132
  # Handles: +1-555-123-4567, (555) 123-4567, 555.123.4567, 555-123-4567, etc.
133
- phone_pattern = (
133
+ phone_numbers_pattern = (
134
134
  r"(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}(\s*(ext|x)\.?\s*\d+)?"
135
135
  )
136
136
 
137
137
  return F.when(col.isNull(), F.lit("")).otherwise(
138
- F.regexp_extract(col, phone_pattern, 0)
138
+ F.regexp_extract(col, phone_numbers_pattern, 0)
139
139
  )
140
140
 
141
141
 
142
142
  @phone_numbers.register()
143
- def extract_all_phones_from_text(col: Column) -> Column:
143
+ def extract_all_phone_numbers_from_text(col: Column) -> Column:
144
144
  """
145
145
  Extract all phone numbers from text as an array.
146
146
 
@@ -150,13 +150,13 @@ def extract_all_phones_from_text(col: Column) -> Column:
150
150
  Returns:
151
151
  Column with array of phone numbers
152
152
  """
153
- # For simplicity, we'll return an array with just the first phone found
153
+ # For simplicity, we'll return an array with just the first phone_numbers found
154
154
  # A proper implementation would require more complex regex or UDF
155
155
  # This is a limitation of Spark SQL's regex capabilities
156
- first_phone = extract_phone_from_text(col)
156
+ first_phone_numbers = extract_phone_numbers_from_text(col)
157
157
 
158
158
  # Return array with single element or empty array
159
- return F.when(first_phone != "", F.array(first_phone)).otherwise(F.array())
159
+ return F.when(first_phone_numbers != "", F.array(first_phone_numbers)).otherwise(F.array())
160
160
 
161
161
 
162
162
  @phone_numbers.register()
@@ -366,7 +366,7 @@ def is_valid_international(
366
366
 
367
367
 
368
368
  @phone_numbers.register()
369
- def is_valid_phone(col: Column) -> Column:
369
+ def is_valid_phone_numbers(col: Column) -> Column:
370
370
  """
371
371
  Check if phone number is valid (NANP or international).
372
372
 
@@ -403,7 +403,7 @@ def is_premium_rate(col: Column) -> Column:
403
403
  Check if phone number is premium rate (900).
404
404
 
405
405
  Args:
406
- col: Column containing phone number
406
+ col: Column containing phophonene_numbers number
407
407
 
408
408
  Returns:
409
409
  Column with boolean indicating if premium rate
@@ -546,11 +546,11 @@ def format_nanp(col: Column) -> Column:
546
546
  """
547
547
  # Remove extension for validation but preserve it
548
548
  extension = extract_extension(col)
549
- phone_no_ext = remove_extension(col)
549
+ phone_numbers_no_ext = remove_extension(col)
550
550
 
551
- area_code = extract_area_code(phone_no_ext)
552
- exchange = extract_exchange(phone_no_ext)
553
- subscriber = extract_subscriber(phone_no_ext)
551
+ area_code = extract_area_code(phone_numbers_no_ext)
552
+ exchange = extract_exchange(phone_numbers_no_ext)
553
+ subscriber = extract_subscriber(phone_numbers_no_ext)
554
554
 
555
555
  base_format = F.concat(area_code, F.lit("-"), exchange, F.lit("-"), subscriber)
556
556
 
@@ -559,7 +559,7 @@ def format_nanp(col: Column) -> Column:
559
559
  (extension != ""), F.concat(base_format, F.lit(" ext. "), extension)
560
560
  ).otherwise(base_format)
561
561
 
562
- return F.when(is_valid_nanp(phone_no_ext), formatted).otherwise(F.lit(""))
562
+ return F.when(is_valid_nanp(phone_numbers_no_ext), formatted).otherwise(F.lit(""))
563
563
 
564
564
 
565
565
  @phone_numbers.register()
@@ -575,11 +575,11 @@ def format_nanp_paren(col: Column) -> Column:
575
575
  """
576
576
  # Remove extension for validation but preserve it
577
577
  extension = extract_extension(col)
578
- phone_no_ext = remove_extension(col)
578
+ phone_numbers_no_ext = remove_extension(col)
579
579
 
580
- area_code = extract_area_code(phone_no_ext)
581
- exchange = extract_exchange(phone_no_ext)
582
- subscriber = extract_subscriber(phone_no_ext)
580
+ area_code = extract_area_code(phone_numbers_no_ext)
581
+ exchange = extract_exchange(phone_numbers_no_ext)
582
+ subscriber = extract_subscriber(phone_numbers_no_ext)
583
583
 
584
584
  base_format = F.concat(
585
585
  F.lit("("), area_code, F.lit(") "), exchange, F.lit("-"), subscriber
@@ -590,7 +590,7 @@ def format_nanp_paren(col: Column) -> Column:
590
590
  (extension != ""), F.concat(base_format, F.lit(" ext. "), extension)
591
591
  ).otherwise(base_format)
592
592
 
593
- return F.when(is_valid_nanp(phone_no_ext), formatted).otherwise(F.lit(""))
593
+ return F.when(is_valid_nanp(phone_numbers_no_ext), formatted).otherwise(F.lit(""))
594
594
 
595
595
 
596
596
  @phone_numbers.register()
@@ -606,11 +606,11 @@ def format_nanp_dot(col: Column) -> Column:
606
606
  """
607
607
  # Remove extension for validation but preserve it
608
608
  extension = extract_extension(col)
609
- phone_no_ext = remove_extension(col)
609
+ phone_numbers_no_ext = remove_extension(col)
610
610
 
611
- area_code = extract_area_code(phone_no_ext)
612
- exchange = extract_exchange(phone_no_ext)
613
- subscriber = extract_subscriber(phone_no_ext)
611
+ area_code = extract_area_code(phone_numbers_no_ext)
612
+ exchange = extract_exchange(phone_numbers_no_ext)
613
+ subscriber = extract_subscriber(phone_numbers_no_ext)
614
614
 
615
615
  base_format = F.concat(area_code, F.lit("."), exchange, F.lit("."), subscriber)
616
616
 
@@ -619,7 +619,7 @@ def format_nanp_dot(col: Column) -> Column:
619
619
  (extension != ""), F.concat(base_format, F.lit(" ext. "), extension)
620
620
  ).otherwise(base_format)
621
621
 
622
- return F.when(is_valid_nanp(phone_no_ext), formatted).otherwise(F.lit(""))
622
+ return F.when(is_valid_nanp(phone_numbers_no_ext), formatted).otherwise(F.lit(""))
623
623
 
624
624
 
625
625
  @phone_numbers.register()
@@ -635,11 +635,11 @@ def format_nanp_space(col: Column) -> Column:
635
635
  """
636
636
  # Remove extension for validation but preserve it
637
637
  extension = extract_extension(col)
638
- phone_no_ext = remove_extension(col)
638
+ phone_numbers_no_ext = remove_extension(col)
639
639
 
640
- area_code = extract_area_code(phone_no_ext)
641
- exchange = extract_exchange(phone_no_ext)
642
- subscriber = extract_subscriber(phone_no_ext)
640
+ area_code = extract_area_code(phone_numbers_no_ext)
641
+ exchange = extract_exchange(phone_numbers_no_ext)
642
+ subscriber = extract_subscriber(phone_numbers_no_ext)
643
643
 
644
644
  base_format = F.concat(area_code, F.lit(" "), exchange, F.lit(" "), subscriber)
645
645
 
@@ -648,7 +648,7 @@ def format_nanp_space(col: Column) -> Column:
648
648
  (extension != ""), F.concat(base_format, F.lit(" ext. "), extension)
649
649
  ).otherwise(base_format)
650
650
 
651
- return F.when(is_valid_nanp(phone_no_ext), formatted).otherwise(F.lit(""))
651
+ return F.when(is_valid_nanp(phone_numbers_no_ext), formatted).otherwise(F.lit(""))
652
652
 
653
653
 
654
654
  @phone_numbers.register()
@@ -707,7 +707,7 @@ def format_e164(col: Column) -> Column:
707
707
 
708
708
  # Build E.164 format - only for valid phones
709
709
  return F.when(
710
- is_valid_phone(col),
710
+ is_valid_phone_numbers(col),
711
711
  F.when(
712
712
  (F.length(digits) == 10) & is_nanp, F.concat(F.lit("+"), F.lit("1"), digits)
713
713
  )
@@ -729,7 +729,7 @@ def format_e164(col: Column) -> Column:
729
729
 
730
730
 
731
731
  @phone_numbers.register()
732
- def standardize_phone(col: Column) -> Column:
732
+ def standardize_phone_numbers(col: Column) -> Column:
733
733
  """
734
734
  Standardize phone number with cleaning and NANP formatting.
735
735
 
@@ -783,7 +783,7 @@ def standardize_phone(col: Column) -> Column:
783
783
 
784
784
 
785
785
  @phone_numbers.register()
786
- def standardize_phone_e164(col: Column) -> Column:
786
+ def standardize_phone_numbers_e164(col: Column) -> Column:
787
787
  """
788
788
  Standardize phone number with cleaning and E.164 formatting.
789
789
 
@@ -800,11 +800,11 @@ def standardize_phone_e164(col: Column) -> Column:
800
800
  result = format_e164(cleaned)
801
801
 
802
802
  # Only return valid phone numbers
803
- return F.when(is_valid_phone(cleaned), result).otherwise(F.lit(""))
803
+ return F.when(is_valid_phone_numbers(cleaned), result).otherwise(F.lit(""))
804
804
 
805
805
 
806
806
  @phone_numbers.register()
807
- def standardize_phone_digits(col: Column) -> Column:
807
+ def standardize_phone_numbers_digits(col: Column) -> Column:
808
808
  """
809
809
  Standardize phone number and return digits only.
810
810
 
@@ -821,11 +821,11 @@ def standardize_phone_digits(col: Column) -> Column:
821
821
  result = extract_digits(cleaned)
822
822
 
823
823
  # Only return valid phone numbers
824
- return F.when(is_valid_phone(cleaned), result).otherwise(F.lit(""))
824
+ return F.when(is_valid_phone_numbers(cleaned), result).otherwise(F.lit(""))
825
825
 
826
826
 
827
827
  @phone_numbers.register()
828
- def clean_phone(col: Column) -> Column:
828
+ def clean_phone_numbers(col: Column) -> Column:
829
829
  """
830
830
  Clean and validate phone number, returning null for invalid numbers.
831
831
 
@@ -873,12 +873,12 @@ def clean_phone(col: Column) -> Column:
873
873
 
874
874
 
875
875
  @phone_numbers.register()
876
- def get_phone_type(col: Column) -> Column:
876
+ def get_phone_numbers_type(col: Column) -> Column:
877
877
  """
878
878
  Get phone number type (toll-free, premium, standard, international).
879
879
 
880
880
  Args:
881
- col: Column containing phone number
881
+ col: Column containing phone_numbers number
882
882
 
883
883
  Returns:
884
884
  Column with phone type
@@ -923,7 +923,7 @@ def get_region_from_area_code(col: Column) -> Column:
923
923
 
924
924
 
925
925
  @phone_numbers.register()
926
- def mask_phone(col: Column) -> Column:
926
+ def mask_phone_numbers(col: Column) -> Column:
927
927
  """
928
928
  Mask phone number for privacy keeping last 4 digits (e.g., ***-***-1234).
929
929
 
@@ -950,9 +950,9 @@ def mask_phone(col: Column) -> Column:
950
950
 
951
951
 
952
952
  @phone_numbers.register()
953
- def filter_valid_phones(col: Column) -> Column:
953
+ def filter_valid_phone_numbers_numbers(col: Column) -> Column:
954
954
  """
955
- Return phone number only if valid, otherwise return null.
955
+ Return phone_numbers number only if valid, otherwise return null.
956
956
 
957
957
  Args:
958
958
  col: Column containing phone number
@@ -960,13 +960,13 @@ def filter_valid_phones(col: Column) -> Column:
960
960
  Returns:
961
961
  Column with valid phone or null
962
962
  """
963
- return F.when(is_valid_phone(col), col).otherwise(F.lit(None))
963
+ return F.when(is_valid_phone_numbers(col), col).otherwise(F.lit(None))
964
964
 
965
965
 
966
966
  @phone_numbers.register()
967
- def filter_nanp_phones(col: Column) -> Column:
967
+ def filter_nanp_phone_numbers_numbers(col: Column) -> Column:
968
968
  """
969
- Return phone number only if valid NANP, otherwise return null.
969
+ Return phone_numbers number only if valid NANP, otherwise return null.
970
970
 
971
971
  Args:
972
972
  col: Column containing phone number
@@ -978,7 +978,7 @@ def filter_nanp_phones(col: Column) -> Column:
978
978
 
979
979
 
980
980
  @phone_numbers.register()
981
- def filter_toll_free_phones(col: Column) -> Column:
981
+ def filter_toll_free_phone_numbers_numbers(col: Column) -> Column:
982
982
  """
983
983
  Return phone number only if toll-free, otherwise return null.
984
984
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacompose
3
- Version: 0.2.5.2
3
+ Version: 0.2.6.0
4
4
  Summary: Copy-pasteable data transformation primitives for PySpark. Inspired by shadcn-svelte.
5
5
  Author: Datacompose Contributors
6
6
  Maintainer: Datacompose Contributors
@@ -67,14 +67,9 @@ tests/unit/cli/build/spark/clean_emails/test_email_cleaner_udf.py
67
67
  tests/unit/generators/__init__.py
68
68
  tests/unit/generators/test_base_generator.py
69
69
  tests/unit/generators/test_spark_generator.py
70
- tests/unit/operators/conditional_tests_common.py
71
- tests/unit/operators/conftest.py
72
- tests/unit/operators/test_conditional_complex_logic.py
73
- tests/unit/operators/test_conditional_data_driven.py
74
- tests/unit/operators/test_conditional_edge_cases.py
75
- tests/unit/operators/test_conditional_error_handling.py
76
- tests/unit/operators/test_conditional_parameters.py
77
- tests/unit/operators/test_conditional_performance.py
70
+ tests/unit/operators/test_compose_conditions.py
71
+ tests/unit/operators/test_conditional_auto_detection.py
72
+ tests/unit/operators/test_conditional_core.py
78
73
  tests/unit/operators/test_conditional_real_world.py
79
74
  tests/unit/operators/test_operators.py
80
75
  tests/unit/operators/test_primitives_complete.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "datacompose"
7
- version = "0.2.5.2"
7
+ version = "0.2.6.0"
8
8
  description = "Copy-pasteable data transformation primitives for PySpark. Inspired by shadcn-svelte."
9
9
  authors = [
10
10
  {name = "Datacompose Contributors"},