sdv 1.33.1.dev0__tar.gz → 1.33.2.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {sdv-1.33.1.dev0/sdv.egg-info → sdv-1.33.2.dev0}/PKG-INFO +2 -2
  2. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/pyproject.toml +2 -2
  3. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/__init__.py +1 -1
  4. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/multi_table.py +0 -13
  5. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/multi_table/base.py +10 -2
  6. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/multi_table/hma.py +51 -3
  7. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/sampling/hierarchical_sampler.py +1 -2
  8. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0/sdv.egg-info}/PKG-INFO +2 -2
  9. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/LICENSE +0 -0
  10. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/README.md +0 -0
  11. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/_utils.py +0 -0
  12. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/__init__.py +0 -0
  13. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/_errors.py +0 -0
  14. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/_utils.py +0 -0
  15. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/base.py +0 -0
  16. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/fixed_combinations.py +0 -0
  17. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/fixed_increments.py +0 -0
  18. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/inequality.py +0 -0
  19. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/one_hot_encoding.py +0 -0
  20. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/programmable_constraint.py +0 -0
  21. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/cag/range.py +0 -0
  22. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/constraints/__init__.py +0 -0
  23. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/constraints/base.py +0 -0
  24. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/constraints/errors.py +0 -0
  25. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/constraints/tabular.py +0 -0
  26. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/constraints/utils.py +0 -0
  27. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/data_processing/__init__.py +0 -0
  28. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/data_processing/data_processor.py +0 -0
  29. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/data_processing/datetime_formatter.py +0 -0
  30. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/data_processing/errors.py +0 -0
  31. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/data_processing/numerical_formatter.py +0 -0
  32. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/data_processing/utils.py +0 -0
  33. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/datasets/__init__.py +0 -0
  34. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/datasets/demo.py +0 -0
  35. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/datasets/local.py +0 -0
  36. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/errors.py +0 -0
  37. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/evaluation/__init__.py +0 -0
  38. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/evaluation/_utils.py +0 -0
  39. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/evaluation/multi_table.py +0 -0
  40. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/evaluation/single_table.py +0 -0
  41. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/io/__init__.py +0 -0
  42. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/io/local/__init__.py +0 -0
  43. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/io/local/local.py +0 -0
  44. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/lite/__init__.py +0 -0
  45. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/lite/single_table.py +0 -0
  46. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/logging/__init__.py +0 -0
  47. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/logging/logger.py +0 -0
  48. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/logging/sdv_logger_config.yml +0 -0
  49. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/logging/utils.py +0 -0
  50. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/__init__.py +0 -0
  51. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/errors.py +0 -0
  52. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/metadata.py +0 -0
  53. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/metadata_upgrader.py +0 -0
  54. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/single_table.py +0 -0
  55. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/utils.py +0 -0
  56. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metadata/visualization.py +0 -0
  57. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metrics/__init__.py +0 -0
  58. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metrics/demos.py +0 -0
  59. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metrics/relational.py +0 -0
  60. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metrics/tabular.py +0 -0
  61. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/metrics/timeseries.py +0 -0
  62. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/multi_table/__init__.py +0 -0
  63. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/multi_table/dayz.py +0 -0
  64. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/multi_table/utils.py +0 -0
  65. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/sampling/__init__.py +0 -0
  66. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/sampling/independent_sampler.py +0 -0
  67. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/sampling/tabular.py +0 -0
  68. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/sequential/__init__.py +0 -0
  69. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/sequential/par.py +0 -0
  70. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/__init__.py +0 -0
  71. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/base.py +0 -0
  72. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/copulagan.py +0 -0
  73. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/copulas.py +0 -0
  74. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/ctgan.py +0 -0
  75. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/dayz.py +0 -0
  76. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/single_table/utils.py +0 -0
  77. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/utils/__init__.py +0 -0
  78. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/utils/mixins.py +0 -0
  79. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/utils/poc.py +0 -0
  80. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/utils/utils.py +0 -0
  81. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv/version/__init__.py +0 -0
  82. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv.egg-info/SOURCES.txt +0 -0
  83. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv.egg-info/dependency_links.txt +0 -0
  84. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv.egg-info/entry_points.txt +0 -0
  85. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv.egg-info/requires.txt +0 -0
  86. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/sdv.egg-info/top_level.txt +0 -0
  87. {sdv-1.33.1.dev0 → sdv-1.33.2.dev0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdv
3
- Version: 1.33.1.dev0
3
+ Version: 1.33.2.dev0
4
4
  Summary: Generate synthetic data for single table, multi table and sequential data
5
5
  Author-email: "DataCebo, Inc." <info@sdv.dev>
6
6
  License-Expression: BUSL-1.1
@@ -9,7 +9,7 @@ Project-URL: Issue Tracker, https://github.com/sdv-dev/SDV/issues
9
9
  Project-URL: Changes, https://github.com/sdv-dev/SDV/blob/main/HISTORY.md
10
10
  Project-URL: Twitter, https://twitter.com/sdv_dev
11
11
  Project-URL: Chat, https://forum.datacebo.com
12
- Keywords: sdv,synthetic-data,synhtetic-data-generation,timeseries,single-table,multi-table
12
+ Keywords: sdv,synthetic-data,synthetic-data-generation,timeseries,single-table,multi-table
13
13
  Classifier: Development Status :: 5 - Production/Stable
14
14
  Classifier: Intended Audience :: Developers
15
15
  Classifier: Natural Language :: English
@@ -15,7 +15,7 @@ classifiers = [
15
15
  'Programming Language :: Python :: 3.14',
16
16
  'Topic :: Scientific/Engineering :: Artificial Intelligence',
17
17
  ]
18
- keywords = ['sdv', 'synthetic-data', 'synhtetic-data-generation', 'timeseries', 'single-table', 'multi-table']
18
+ keywords = ['sdv', 'synthetic-data', 'synthetic-data-generation', 'timeseries', 'single-table', 'multi-table']
19
19
  dynamic = ['version']
20
20
  license = 'BUSL-1.1'
21
21
  license-files = ['LICENSE']
@@ -150,7 +150,7 @@ namespaces = false
150
150
  version = {attr = 'sdv.__version__'}
151
151
 
152
152
  [tool.bumpversion]
153
- current_version = "1.33.1.dev0"
153
+ current_version = "1.33.2.dev0"
154
154
  parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
155
155
  serialize = [
156
156
  '{major}.{minor}.{patch}.{release}{candidate}',
@@ -6,7 +6,7 @@
6
6
 
7
7
  __author__ = 'DataCebo, Inc.'
8
8
  __email__ = 'info@sdv.dev'
9
- __version__ = '1.33.1.dev0'
9
+ __version__ = '1.33.2.dev0'
10
10
 
11
11
 
12
12
  import sys
@@ -160,16 +160,6 @@ class MultiTableMetadata:
160
160
  f'tables {errors}.'
161
161
  )
162
162
 
163
- def _validate_foreign_child_key(self, child_table_name, parent_table_name, child_foreign_key):
164
- child_primary_key = _cast_to_iterable(self.tables[child_table_name].primary_key)
165
- child_foreign_key = _cast_to_iterable(child_foreign_key)
166
- if set(child_foreign_key).intersection(set(child_primary_key)):
167
- raise InvalidMetadataError(
168
- f"Invalid relationship between table '{parent_table_name}' and table "
169
- f"'{child_table_name}'. A relationship must connect a primary key "
170
- 'with a non-primary key.'
171
- )
172
-
173
163
  def _validate_new_foreign_key_is_not_reused(
174
164
  self, parent_table_name, parent_primary_key, child_table_name, child_foreign_key
175
165
  ):
@@ -238,8 +228,6 @@ class MultiTableMetadata:
238
228
  parent_table_name, parent_primary_key, child_table_name, child_foreign_key
239
229
  )
240
230
 
241
- self._validate_foreign_child_key(child_table_name, parent_table_name, child_foreign_key)
242
-
243
231
  self._validate_relationship_sdtypes(
244
232
  parent_table_name, parent_primary_key, child_table_name, child_foreign_key
245
233
  )
@@ -312,7 +300,6 @@ class MultiTableMetadata:
312
300
  different
313
301
  ``sdtype``.
314
302
  - ``InvalidMetadataError`` if the relationship causes a circular dependency.
315
- - ``InvalidMetadataError`` if ``child_foreign_key`` is a primary key.
316
303
  """
317
304
  self._validate_relationship(
318
305
  parent_table_name, child_table_name, parent_primary_key, child_foreign_key
@@ -466,10 +466,18 @@ class BaseMultiTableSynthesizer:
466
466
  )
467
467
 
468
468
  def _assign_table_transformers(self, synthesizer, table_name, table_data):
469
- """Update the ``synthesizer`` to ignore the foreign keys while preprocessing the data."""
469
+ """Update the ``synthesizer`` to ignore the foreign keys while preprocessing the data.
470
+
471
+ This function iterates through all foreign keys in the table. For each foreign key,
472
+ if it is not the primary key in the table, then the transformer is set to None (
473
+ meaning no transformer is assigned).
474
+ """
470
475
  synthesizer.auto_assign_transformers(table_data)
476
+ primary_key = self.metadata.tables[table_name].primary_key
471
477
  foreign_key_columns = self.metadata._get_all_foreign_keys(table_name)
472
- column_name_to_transformers = {column_name: None for column_name in foreign_key_columns}
478
+ column_name_to_transformers = {
479
+ column_name: None for column_name in foreign_key_columns if column_name != primary_key
480
+ }
473
481
  synthesizer.update_transformers(column_name_to_transformers)
474
482
 
475
483
  def auto_assign_transformers(self, data):
@@ -348,9 +348,15 @@ class HMASynthesizer(BaseHierarchicalSampler, BaseMultiTableSynthesizer):
348
348
  table_meta = self._table_synthesizers[child_name].get_metadata()
349
349
 
350
350
  extension_rows = []
351
+ primary_key = self.metadata.tables[child_name].primary_key
351
352
  foreign_key_columns = self.metadata._get_all_foreign_keys(child_name)
352
- foreign_key_values = child_table[foreign_key].unique()
353
- child_table = child_table.set_index(foreign_key)
353
+ primary_key_is_a_foreign_key = primary_key and primary_key in foreign_key_columns
354
+ if primary_key_is_a_foreign_key and foreign_key == primary_key:
355
+ # data processor will set index of each table to the PK for table
356
+ foreign_key_values = child_table.index.unique()
357
+ else:
358
+ foreign_key_values = child_table[foreign_key].unique()
359
+ child_table = child_table.set_index(foreign_key)
354
360
 
355
361
  index = []
356
362
  scale_columns = None
@@ -447,6 +453,7 @@ class HMASynthesizer(BaseHierarchicalSampler, BaseMultiTableSynthesizer):
447
453
  child_table = tables[child_name]
448
454
 
449
455
  foreign_keys = self.metadata._get_foreign_keys(table_name, child_name)
456
+
450
457
  for foreign_key in foreign_keys:
451
458
  progress_bar_desc = (
452
459
  f'({self._learned_relationships + 1}/{len(self.metadata.relationships)}) '
@@ -497,6 +504,7 @@ class HMASynthesizer(BaseHierarchicalSampler, BaseMultiTableSynthesizer):
497
504
  self._learned_relationships = 0
498
505
  parent_map = self.metadata._get_parent_map()
499
506
  self._print(text='Learning relationships:')
507
+
500
508
  for table_name in processed_data:
501
509
  if not parent_map.get(table_name):
502
510
  self._augment_table(augmented_data[table_name], augmented_data, table_name)
@@ -517,7 +525,11 @@ class HMASynthesizer(BaseHierarchicalSampler, BaseMultiTableSynthesizer):
517
525
  keys (dict):
518
526
  A dictionary mapping with the foreign key and it's values within the table.
519
527
  """
528
+ primary_key = self.metadata.tables[table_name].primary_key
520
529
  foreign_keys = self.metadata._get_all_foreign_keys(table_name)
530
+ if primary_key and primary_key in foreign_keys:
531
+ foreign_keys.remove(primary_key)
532
+
521
533
  keys = {}
522
534
  for fk in foreign_keys:
523
535
  keys[fk] = table_data.pop(fk).to_numpy()
@@ -767,8 +779,44 @@ class HMASynthesizer(BaseHierarchicalSampler, BaseMultiTableSynthesizer):
767
779
  return likelihoods.apply(self._find_parent_id, axis=1, num_rows=num_rows)
768
780
 
769
781
  def _add_foreign_key_columns(self, child_table, parent_table, child_name, parent_name):
782
+ """Add foreign key columns in the child table.
783
+
784
+ This function adds foreign key columns to a child table.
785
+ If the foreign key column does not exist in the child table, it adds the column.
786
+ If the foreign key column already exists in the child table (e.g., when it is also a PK)
787
+ and it contains invalid references (FKs not found in parent table), it overwrites the
788
+ foreign key values (from the parent table's PK).
789
+
790
+ Args:
791
+ child_table (pd.DataFrame):
792
+ The child table which may or may not contain the FK columns.
793
+ parent_table (pd.DataFrame):
794
+ The parent table which contains the primary key column.
795
+ child_name (str):
796
+ The name of the child table in the metadata.
797
+ parent_name (str):
798
+ The name of the parent table in the metadata.
799
+
800
+ Returns:
801
+ None: The child_table is modified in-place.
802
+ """
803
+ parent_primary_key = self.metadata.tables[parent_name].primary_key
804
+ parent_id_values = None
770
805
  for foreign_key in self.metadata._get_foreign_keys(parent_name, child_name):
771
- if foreign_key not in child_table:
806
+ needs_assignment = foreign_key not in child_table
807
+
808
+ if not needs_assignment:
809
+ child_column = child_table[foreign_key].dropna()
810
+ if child_column.empty:
811
+ needs_assignment = True
812
+ else:
813
+ if parent_id_values is None:
814
+ parent_id_values = parent_table[parent_primary_key].dropna().unique()
815
+
816
+ if not child_column.isin(parent_id_values).all():
817
+ needs_assignment = True
818
+
819
+ if needs_assignment:
772
820
  parent_ids = self._find_parent_ids(
773
821
  child_table=child_table,
774
822
  parent_table=parent_table,
@@ -102,9 +102,8 @@ class BaseHierarchicalSampler:
102
102
  if len(sampled_rows):
103
103
  parent_key = self.metadata.tables[parent_name].primary_key
104
104
  if foreign_key in sampled_rows:
105
- # If foreign key is in sampeld rows raises `SettingWithCopyWarning`
106
105
  row_indices = sampled_rows.index
107
- sampled_rows[foreign_key].iloc[row_indices] = parent_row[parent_key]
106
+ sampled_rows.loc[row_indices, foreign_key] = parent_row[parent_key]
108
107
  else:
109
108
  sampled_rows[foreign_key] = (
110
109
  parent_row[parent_key] if parent_row is not None else np.nan
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdv
3
- Version: 1.33.1.dev0
3
+ Version: 1.33.2.dev0
4
4
  Summary: Generate synthetic data for single table, multi table and sequential data
5
5
  Author-email: "DataCebo, Inc." <info@sdv.dev>
6
6
  License-Expression: BUSL-1.1
@@ -9,7 +9,7 @@ Project-URL: Issue Tracker, https://github.com/sdv-dev/SDV/issues
9
9
  Project-URL: Changes, https://github.com/sdv-dev/SDV/blob/main/HISTORY.md
10
10
  Project-URL: Twitter, https://twitter.com/sdv_dev
11
11
  Project-URL: Chat, https://forum.datacebo.com
12
- Keywords: sdv,synthetic-data,synhtetic-data-generation,timeseries,single-table,multi-table
12
+ Keywords: sdv,synthetic-data,synthetic-data-generation,timeseries,single-table,multi-table
13
13
  Classifier: Development Status :: 5 - Production/Stable
14
14
  Classifier: Intended Audience :: Developers
15
15
  Classifier: Natural Language :: English
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes