dataforge-py 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/PKG-INFO +595 -1
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/README.md +594 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/pyproject.toml +8 -3
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/__init__.py +1 -1
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/anonymizer.py +5 -95
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/backend.py +24 -277
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/chaos.py +35 -119
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/cli.py +93 -67
- dataforge_py-0.5.0/src/dataforge/compat/__init__.py +15 -0
- dataforge_py-0.5.0/src/dataforge/compat/faker.py +172 -0
- dataforge_py-0.5.0/src/dataforge/compat/hypothesis.py +106 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/constraints.py +11 -80
- dataforge_py-0.5.0/src/dataforge/core.py +997 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/data/correlations/geo.py +0 -10
- dataforge_py-0.5.0/src/dataforge/decorators.py +81 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/inference.py +249 -151
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/company.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/person.py +0 -25
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/address.py +0 -25
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/person.py +0 -78
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/person.py +0 -80
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/address.py +0 -93
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/company.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/person.py +0 -178
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/address.py +0 -124
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/company.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/person.py +0 -185
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/address.py +0 -88
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/company.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/person.py +0 -180
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/address.py +0 -85
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/company.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/person.py +0 -178
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/person.py +0 -75
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/person.py +0 -73
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/person.py +0 -80
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/company.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/person.py +0 -26
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/company.py +0 -15
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/person.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/person.py +0 -75
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/company.py +0 -15
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/person.py +0 -50
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/company.py +0 -5
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/person.py +0 -25
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/company.py +0 -51
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/person.py +0 -26
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/company.py +0 -50
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/person.py +0 -25
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/company.py +0 -15
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/person.py +0 -55
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/company.py +0 -36
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/person.py +0 -26
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/person.py +0 -63
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/address.py +0 -10
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/person.py +0 -75
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/company.py +0 -15
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/person.py +0 -50
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/openapi.py +14 -108
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/address.py +15 -227
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/ai_prompt.py +20 -169
- dataforge_py-0.5.0/src/dataforge/providers/automotive.py +213 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/barcode.py +5 -59
- dataforge_py-0.5.0/src/dataforge/providers/base.py +59 -0
- dataforge_py-0.5.0/src/dataforge/providers/color.py +123 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/company.py +5 -58
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/crypto.py +4 -64
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/datetime.py +12 -146
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/ecommerce.py +5 -115
- dataforge_py-0.5.0/src/dataforge/providers/education.py +101 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/file.py +18 -106
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/finance.py +34 -174
- dataforge_py-0.5.0/src/dataforge/providers/food.py +236 -0
- dataforge_py-0.5.0/src/dataforge/providers/geo.py +186 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/government.py +0 -36
- dataforge_py-0.5.0/src/dataforge/providers/hardware.py +226 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/internet.py +9 -124
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/legal.py +12 -210
- dataforge_py-0.5.0/src/dataforge/providers/llm.py +495 -0
- dataforge_py-0.5.0/src/dataforge/providers/logistics.py +285 -0
- dataforge_py-0.5.0/src/dataforge/providers/lorem.py +107 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/medical.py +8 -161
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/misc.py +9 -104
- dataforge_py-0.5.0/src/dataforge/providers/music.py +270 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/network.py +11 -107
- dataforge_py-0.5.0/src/dataforge/providers/payment.py +196 -0
- dataforge_py-0.5.0/src/dataforge/providers/person.py +96 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/phone.py +2 -31
- dataforge_py-0.5.0/src/dataforge/providers/profile.py +102 -0
- dataforge_py-0.5.0/src/dataforge/providers/real_estate.py +224 -0
- dataforge_py-0.5.0/src/dataforge/providers/science.py +216 -0
- dataforge_py-0.5.0/src/dataforge/providers/social_media.py +234 -0
- dataforge_py-0.5.0/src/dataforge/providers/sports.py +249 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/text.py +3 -115
- dataforge_py-0.5.0/src/dataforge/providers/weather.py +204 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/registry.py +5 -62
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/relational.py +5 -105
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/schema.py +146 -593
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/schema_io.py +8 -301
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/seeder.py +8 -115
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/streaming.py +9 -177
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/timeseries.py +8 -145
- dataforge_py-0.5.0/src/dataforge/transforms.py +240 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/tui/app.py +0 -4
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/unique.py +3 -49
- dataforge_py-0.5.0/src/dataforge/validation.py +257 -0
- dataforge_py-0.4.0/src/dataforge/core.py +0 -1757
- dataforge_py-0.4.0/src/dataforge/decorators.py +0 -172
- dataforge_py-0.4.0/src/dataforge/providers/ai_chat.py +0 -170
- dataforge_py-0.4.0/src/dataforge/providers/automotive.py +0 -416
- dataforge_py-0.4.0/src/dataforge/providers/base.py +0 -34
- dataforge_py-0.4.0/src/dataforge/providers/color.py +0 -247
- dataforge_py-0.4.0/src/dataforge/providers/education.py +0 -234
- dataforge_py-0.4.0/src/dataforge/providers/food.py +0 -476
- dataforge_py-0.4.0/src/dataforge/providers/geo.py +0 -332
- dataforge_py-0.4.0/src/dataforge/providers/hardware.py +0 -478
- dataforge_py-0.4.0/src/dataforge/providers/llm.py +0 -726
- dataforge_py-0.4.0/src/dataforge/providers/logistics.py +0 -545
- dataforge_py-0.4.0/src/dataforge/providers/lorem.py +0 -241
- dataforge_py-0.4.0/src/dataforge/providers/music.py +0 -505
- dataforge_py-0.4.0/src/dataforge/providers/payment.py +0 -300
- dataforge_py-0.4.0/src/dataforge/providers/person.py +0 -195
- dataforge_py-0.4.0/src/dataforge/providers/profile.py +0 -265
- dataforge_py-0.4.0/src/dataforge/providers/real_estate.py +0 -470
- dataforge_py-0.4.0/src/dataforge/providers/science.py +0 -365
- dataforge_py-0.4.0/src/dataforge/providers/social_media.py +0 -442
- dataforge_py-0.4.0/src/dataforge/providers/sports.py +0 -494
- dataforge_py-0.4.0/src/dataforge/providers/weather.py +0 -452
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/data/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/data/correlations/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/address.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/address.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/address.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/company.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/address.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/internet.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/phone.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/__init__.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/py.typed +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/pytest_plugin.py +0 -0
- {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/tui/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: dataforge-py
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: High-performance fake data generator for testing
|
|
5
5
|
Author: Ivan Rener
|
|
6
6
|
Author-email: Ivan Rener <ivan.rener@multitude.com>
|
|
@@ -53,6 +53,16 @@ forge.person.first_name(count=1_000_000) # 1M names in ~55ms
|
|
|
53
53
|
- [Unique Values](#unique-values)
|
|
54
54
|
- [Locales](#locales) (17 locales)
|
|
55
55
|
- **Advanced Features**
|
|
56
|
+
- [Faker Compatibility Layer](#faker-compatibility-layer)
|
|
57
|
+
- [Multi-Locale Mixing](#multi-locale-mixing)
|
|
58
|
+
- [Dynamic Fields (`define()`)](#dynamic-fields-define)
|
|
59
|
+
- [Field Transform Pipelines (`pipe()`)](#field-transform-pipelines-pipe)
|
|
60
|
+
- [Type-Driven Schema](#type-driven-schema)
|
|
61
|
+
- [Data Contract Validation](#data-contract-validation)
|
|
62
|
+
- [Hypothesis Strategy Bridge](#hypothesis-strategy-bridge)
|
|
63
|
+
- [HTTP Mock Data Server](#http-mock-data-server)
|
|
64
|
+
- [XLSX Export](#xlsx-export)
|
|
65
|
+
- [Statistical Distribution Fitting](#statistical-distribution-fitting)
|
|
56
66
|
- [Time-Series Generation](#time-series-generation)
|
|
57
67
|
- [Schema Inference](#schema-inference)
|
|
58
68
|
- [Chaos Testing](#chaos-testing)
|
|
@@ -93,6 +103,16 @@ forge.person.first_name(count=1_000_000) # 1M names in ~55ms
|
|
|
93
103
|
- **OpenAPI / JSON Schema Import** — generate fake data from API specs with `$ref` resolution
|
|
94
104
|
- **Streaming to Queues** — emit data to HTTP, Kafka, or RabbitMQ with token-bucket rate limiting
|
|
95
105
|
- **Interactive TUI** — terminal UI for browsing providers, building schemas, and exporting data
|
|
106
|
+
- **Faker Compatibility** — drop-in `Faker` replacement with 57 method mappings for painless migration
|
|
107
|
+
- **Multi-Locale Mixing** — pass a list of locales to randomly blend data from multiple languages
|
|
108
|
+
- **Dynamic Fields** — `define()` custom fields with element lists, weighted pools, or arbitrary callables
|
|
109
|
+
- **Transform Pipelines** — `pipe()` chains composable transforms (casing, truncation, hashing, redaction) onto any field
|
|
110
|
+
- **Type-Driven Schema** — auto-generate schemas from `@dataclass` and `TypedDict` classes via annotation introspection
|
|
111
|
+
- **Data Contract Validation** — validate generated or imported data against semantic regex patterns and non-empty constraints
|
|
112
|
+
- **Hypothesis Bridge** — `strategy()` and `forge_strategy()` integrate DataForge fields into Hypothesis property-based tests
|
|
113
|
+
- **HTTP Mock Server** — `dataforge --serve` starts a zero-dependency JSON API that returns fake data on every GET
|
|
114
|
+
- **XLSX Export** — `to_excel()` writes schemas to `.xlsx` files using streaming `openpyxl` workbooks
|
|
115
|
+
- **Distribution Fitting** — infer Normal, LogNormal, Exponential, Beta, and Zipf distributions from numeric columns
|
|
96
116
|
- **27 Providers** — person, address, internet, company, phone, finance, datetime, color, file, network, lorem, barcode, misc, automotive, crypto, ecommerce, education, geo, government, medical, payment, profile, science, text, ai\_prompt, llm, ai\_chat
|
|
97
117
|
- **17 Locales** — en\_US, en\_GB, en\_AU, en\_CA, de\_DE, fr\_FR, es\_ES, it\_IT, pt\_BR, nl\_NL, pl\_PL, ru\_RU, ar\_SA, hi\_IN, ja\_JP, ko\_KR, zh\_CN
|
|
98
118
|
|
|
@@ -114,6 +134,8 @@ pip install polars # to_polars()
|
|
|
114
134
|
pip install pandas # to_dataframe()
|
|
115
135
|
pip install pydantic # schema_from_pydantic()
|
|
116
136
|
pip install sqlalchemy # schema_from_sqlalchemy(), DatabaseSeeder
|
|
137
|
+
pip install openpyxl # to_excel()
|
|
138
|
+
pip install hypothesis # Hypothesis strategy bridge
|
|
117
139
|
```
|
|
118
140
|
|
|
119
141
|
**Optional extras** (bundled in pyproject.toml):
|
|
@@ -862,6 +884,567 @@ forge.person.full_name() # "田中太郎"
|
|
|
862
884
|
|
|
863
885
|
---
|
|
864
886
|
|
|
887
|
+
## Faker Compatibility Layer
|
|
888
|
+
|
|
889
|
+
Drop-in replacement for the `faker` library. Provides the same `Faker` class API so you can migrate existing code by changing a single import — while gaining DataForge's performance.
|
|
890
|
+
|
|
891
|
+
```python
|
|
892
|
+
# Before (faker)
|
|
893
|
+
# from faker import Faker
|
|
894
|
+
|
|
895
|
+
# After (dataforge — same API, faster)
|
|
896
|
+
from dataforge.compat import Faker
|
|
897
|
+
|
|
898
|
+
fake = Faker(locale="en_US", seed=42)
|
|
899
|
+
|
|
900
|
+
fake.name() # "James Smith"
|
|
901
|
+
fake.email() # "james.smith@gmail.com"
|
|
902
|
+
fake.address() # "4821 Oak Ave, Chicago, IL 60614"
|
|
903
|
+
fake.company() # "Acme Corp"
|
|
904
|
+
fake.phone_number() # "(555) 123-4567"
|
|
905
|
+
fake.date() # "2024-03-15"
|
|
906
|
+
fake.text() # "Lorem ipsum dolor sit amet..."
|
|
907
|
+
```
|
|
908
|
+
|
|
909
|
+
### Supported Methods
|
|
910
|
+
|
|
911
|
+
The compatibility layer maps 57 Faker method names to DataForge fields. A few common mappings:
|
|
912
|
+
|
|
913
|
+
| Faker method | DataForge field |
|
|
914
|
+
|-------------|-----------------|
|
|
915
|
+
| `name()` | `full_name` |
|
|
916
|
+
| `first_name()` | `first_name` |
|
|
917
|
+
| `last_name()` | `last_name` |
|
|
918
|
+
| `email()` | `email` |
|
|
919
|
+
| `address()` | `full_address` |
|
|
920
|
+
| `company()` | `company_name` |
|
|
921
|
+
| `phone_number()` | `phone_number` |
|
|
922
|
+
| `date()` | `date` |
|
|
923
|
+
| `city()` | `city` |
|
|
924
|
+
| `state()` | `state` |
|
|
925
|
+
| `zipcode()` | `zip_code` |
|
|
926
|
+
| `url()` | `url` |
|
|
927
|
+
| `ipv4()` | `ipv4` |
|
|
928
|
+
| `uuid4()` | `uuid4` |
|
|
929
|
+
| `ssn()` | `ssn` |
|
|
930
|
+
| `credit_card_number()` | `credit_card_number` |
|
|
931
|
+
|
|
932
|
+
Any method not in the explicit map falls back to DataForge's alias lookup, then to a direct field name match. Resolved methods are cached for subsequent calls.
|
|
933
|
+
|
|
934
|
+
### Seeding
|
|
935
|
+
|
|
936
|
+
```python
|
|
937
|
+
# Global seed (class method)
|
|
938
|
+
Faker.seed(42)
|
|
939
|
+
|
|
940
|
+
# Instance seed
|
|
941
|
+
fake = Faker(seed=42)
|
|
942
|
+
fake.seed_instance(99)
|
|
943
|
+
|
|
944
|
+
# Multi-locale
|
|
945
|
+
fake = Faker(["en_US", "fr_FR", "de_DE"])
|
|
946
|
+
fake.name() # randomly picks a locale per call
|
|
947
|
+
```
|
|
948
|
+
|
|
949
|
+
---
|
|
950
|
+
|
|
951
|
+
## Multi-Locale Mixing
|
|
952
|
+
|
|
953
|
+
Pass a list of locales to blend data from multiple languages in a single forge instance. Each generation call randomly selects one of the configured locales.
|
|
954
|
+
|
|
955
|
+
```python
|
|
956
|
+
from dataforge import DataForge
|
|
957
|
+
|
|
958
|
+
forge = DataForge(locale=["en_US", "fr_FR", "ja_JP"], seed=42)
|
|
959
|
+
|
|
960
|
+
# Each call randomly picks a locale
|
|
961
|
+
forge.person.full_name() # "James Smith" or "Jean Dupont" or "田中太郎"
|
|
962
|
+
forge.address.city() # "Chicago" or "Paris" or "東京"
|
|
963
|
+
|
|
964
|
+
# Check configured locales
|
|
965
|
+
forge.locales # ("en_US", "fr_FR", "ja_JP")
|
|
966
|
+
forge.locale # "en_US" (primary)
|
|
967
|
+
```
|
|
968
|
+
|
|
969
|
+
### How It Works
|
|
970
|
+
|
|
971
|
+
- A child `DataForge` instance is created for each locale
|
|
972
|
+
- Each child gets a deterministic sub-seed derived from the parent seed
|
|
973
|
+
- On every provider access, one child is selected at random
|
|
974
|
+
- Seeding is reproducible: the same seed always produces the same locale sequence
|
|
975
|
+
|
|
976
|
+
### Schema Integration
|
|
977
|
+
|
|
978
|
+
Multi-locale works seamlessly with the Schema API:
|
|
979
|
+
|
|
980
|
+
```python
|
|
981
|
+
forge = DataForge(locale=["en_US", "de_DE", "es_ES"], seed=42)
|
|
982
|
+
schema = forge.schema(["first_name", "last_name", "city"])
|
|
983
|
+
rows = schema.generate(100)
|
|
984
|
+
# Rows contain a mix of English, German, and Spanish names and cities
|
|
985
|
+
```
|
|
986
|
+
|
|
987
|
+
---
|
|
988
|
+
|
|
989
|
+
## Dynamic Fields (`define()`)
|
|
990
|
+
|
|
991
|
+
Define custom fields that can be used anywhere a built-in field name is accepted — in schemas, bulk export, and CLI.
|
|
992
|
+
|
|
993
|
+
```python
|
|
994
|
+
from dataforge import DataForge
|
|
995
|
+
|
|
996
|
+
forge = DataForge(seed=42)
|
|
997
|
+
|
|
998
|
+
# From a list of elements (uniform random)
|
|
999
|
+
forge.define("status", elements=["active", "inactive", "pending"])
|
|
1000
|
+
forge.status() # "active"
|
|
1001
|
+
|
|
1002
|
+
# With weighted probabilities
|
|
1003
|
+
forge.define("priority", elements=["low", "medium", "high"], weights=[0.5, 0.3, 0.2])
|
|
1004
|
+
forge.priority() # "low" (50% of the time)
|
|
1005
|
+
|
|
1006
|
+
# From a callable
|
|
1007
|
+
forge.define("score", func=lambda: round(random.gauss(75, 10), 1))
|
|
1008
|
+
forge.score() # 78.3
|
|
1009
|
+
```
|
|
1010
|
+
|
|
1011
|
+
### Batch Generation
|
|
1012
|
+
|
|
1013
|
+
Custom fields support `count=N` like built-in fields:
|
|
1014
|
+
|
|
1015
|
+
```python
|
|
1016
|
+
forge.define("tier", elements=["free", "pro", "enterprise"])
|
|
1017
|
+
tiers = forge.tier(count=1000) # list of 1000 random tiers
|
|
1018
|
+
```
|
|
1019
|
+
|
|
1020
|
+
### Schema Integration
|
|
1021
|
+
|
|
1022
|
+
Custom fields are resolved by name in schemas:
|
|
1023
|
+
|
|
1024
|
+
```python
|
|
1025
|
+
forge.define("status", elements=["active", "inactive", "pending"])
|
|
1026
|
+
schema = forge.schema({
|
|
1027
|
+
"Name": "full_name",
|
|
1028
|
+
"Email": "email",
|
|
1029
|
+
"Status": "status",
|
|
1030
|
+
})
|
|
1031
|
+
rows = schema.generate(100)
|
|
1032
|
+
# [{"Name": "James Smith", "Email": "...", "Status": "active"}, ...]
|
|
1033
|
+
```
|
|
1034
|
+
|
|
1035
|
+
---
|
|
1036
|
+
|
|
1037
|
+
## Field Transform Pipelines (`pipe()`)
|
|
1038
|
+
|
|
1039
|
+
Chain composable post-generation transforms onto any field. The `pipe()` function creates a field spec that first generates data from a provider, then applies one or more transform functions in sequence.
|
|
1040
|
+
|
|
1041
|
+
```python
|
|
1042
|
+
from dataforge import DataForge
|
|
1043
|
+
from dataforge.transforms import pipe, upper, lower, truncate, maybe_null
|
|
1044
|
+
|
|
1045
|
+
forge = DataForge(seed=42)
|
|
1046
|
+
|
|
1047
|
+
schema = forge.schema({
|
|
1048
|
+
"Username": pipe("username", upper),
|
|
1049
|
+
"Bio": pipe("sentence", truncate(50)),
|
|
1050
|
+
"Email": pipe("email", lower),
|
|
1051
|
+
"Phone": pipe("phone_number", maybe_null(0.2)),
|
|
1052
|
+
})
|
|
1053
|
+
rows = schema.generate(100)
|
|
1054
|
+
# [{"Username": "JSMITH42", "Bio": "Lorem ipsum dolor...", ...}, ...]
|
|
1055
|
+
```
|
|
1056
|
+
|
|
1057
|
+
### Built-in Transforms
|
|
1058
|
+
|
|
1059
|
+
**Case transforms:**
|
|
1060
|
+
|
|
1061
|
+
| Transform | Description | Example |
|
|
1062
|
+
|-----------|-------------|---------|
|
|
1063
|
+
| `upper` | Uppercase | `"hello"` -> `"HELLO"` |
|
|
1064
|
+
| `lower` | Lowercase | `"Hello"` -> `"hello"` |
|
|
1065
|
+
| `title_case` | Title Case | `"hello world"` -> `"Hello World"` |
|
|
1066
|
+
| `snake_case` | snake_case | `"Hello World"` -> `"hello_world"` |
|
|
1067
|
+
| `camel_case` | camelCase | `"hello world"` -> `"helloWorld"` |
|
|
1068
|
+
| `kebab_case` | kebab-case | `"Hello World"` -> `"hello-world"` |
|
|
1069
|
+
|
|
1070
|
+
**String transforms:**
|
|
1071
|
+
|
|
1072
|
+
| Transform | Description | Example |
|
|
1073
|
+
|-----------|-------------|---------|
|
|
1074
|
+
| `truncate(n, suffix="...")` | Truncate to *n* chars | `"Hello World"` -> `"Hello..."` |
|
|
1075
|
+
| `strip` | Strip whitespace | `" hello "` -> `"hello"` |
|
|
1076
|
+
| `prefix(pre)` | Prepend string | `"world"` -> `"hello_world"` |
|
|
1077
|
+
| `suffix(suf)` | Append string | `"hello"` -> `"hello_world"` |
|
|
1078
|
+
| `wrap(before, after)` | Wrap with delimiters | `"hi"` -> `"[hi]"` |
|
|
1079
|
+
| `replace(old, new)` | String replacement | `"a-b"` -> `"a_b"` |
|
|
1080
|
+
|
|
1081
|
+
**Data transforms:**
|
|
1082
|
+
|
|
1083
|
+
| Transform | Description |
|
|
1084
|
+
|-----------|-------------|
|
|
1085
|
+
| `maybe_null(probability)` | Replace with `None` at given rate |
|
|
1086
|
+
| `hash_with(algorithm)` | Hash value (SHA-256, MD5, etc.) |
|
|
1087
|
+
| `encode_b64` | Base64-encode |
|
|
1088
|
+
| `decode_b64` | Base64-decode |
|
|
1089
|
+
| `redact(char, keep_start, keep_end)` | Redact middle characters |
|
|
1090
|
+
| `apply_if(condition, transform)` | Conditionally apply a transform |
|
|
1091
|
+
|
|
1092
|
+
### Chaining Multiple Transforms
|
|
1093
|
+
|
|
1094
|
+
Transforms are applied left to right:
|
|
1095
|
+
|
|
1096
|
+
```python
|
|
1097
|
+
from dataforge.transforms import pipe, lower, prefix, truncate
|
|
1098
|
+
|
|
1099
|
+
schema = forge.schema({
|
|
1100
|
+
"slug": pipe("full_name", lower, replace(" ", "-"), truncate(20)),
|
|
1101
|
+
})
|
|
1102
|
+
# "James Smith" -> "james smith" -> "james-smith" -> "james-smith"
|
|
1103
|
+
```
|
|
1104
|
+
|
|
1105
|
+
### Custom Transforms
|
|
1106
|
+
|
|
1107
|
+
Any `(value) -> value` callable works as a transform:
|
|
1108
|
+
|
|
1109
|
+
```python
|
|
1110
|
+
schema = forge.schema({
|
|
1111
|
+
"price": pipe("price", lambda v: f"${v}"),
|
|
1112
|
+
"name": pipe("first_name", str.upper),
|
|
1113
|
+
})
|
|
1114
|
+
```
|
|
1115
|
+
|
|
1116
|
+
---
|
|
1117
|
+
|
|
1118
|
+
## Type-Driven Schema
|
|
1119
|
+
|
|
1120
|
+
Auto-generate schemas from Python `@dataclass` and `TypedDict` classes. Field names and type annotations are matched to DataForge providers via a 3-tier resolution: exact registry match, alias heuristic (~90 common field names), then type-based fallback.
|
|
1121
|
+
|
|
1122
|
+
### From Dataclasses
|
|
1123
|
+
|
|
1124
|
+
```python
|
|
1125
|
+
from dataclasses import dataclass
|
|
1126
|
+
from dataforge import DataForge
|
|
1127
|
+
|
|
1128
|
+
@dataclass
|
|
1129
|
+
class User:
|
|
1130
|
+
first_name: str
|
|
1131
|
+
last_name: str
|
|
1132
|
+
email: str
|
|
1133
|
+
age: int
|
|
1134
|
+
is_active: bool
|
|
1135
|
+
|
|
1136
|
+
forge = DataForge(seed=42)
|
|
1137
|
+
schema = forge.schema_from_dataclass(User)
|
|
1138
|
+
rows = schema.generate(100)
|
|
1139
|
+
# [{"first_name": "James", "last_name": "Smith", "email": "...", ...}, ...]
|
|
1140
|
+
```
|
|
1141
|
+
|
|
1142
|
+
### From TypedDicts
|
|
1143
|
+
|
|
1144
|
+
```python
|
|
1145
|
+
from typing import TypedDict
|
|
1146
|
+
|
|
1147
|
+
class Product(TypedDict):
|
|
1148
|
+
product_name: str
|
|
1149
|
+
sku: str
|
|
1150
|
+
price: float
|
|
1151
|
+
category: str
|
|
1152
|
+
|
|
1153
|
+
schema = forge.schema_from_typed_dict(Product)
|
|
1154
|
+
rows = schema.generate(100)
|
|
1155
|
+
```
|
|
1156
|
+
|
|
1157
|
+
### Resolution Order
|
|
1158
|
+
|
|
1159
|
+
For each field, DataForge tries:
|
|
1160
|
+
|
|
1161
|
+
1. **Exact match** — field name exists in the provider registry (e.g. `email` -> `internet.email`)
|
|
1162
|
+
2. **Alias heuristic** — field name appears in `_FIELD_ALIASES` (e.g. `user_email` -> `email`, `zip` -> `zip_code`)
|
|
1163
|
+
3. **Type fallback** — Python type maps to a default field (`bool` -> `boolean`, `datetime` -> `datetime`, `UUID` -> `uuid4`)
|
|
1164
|
+
|
|
1165
|
+
Fields that cannot be resolved emit a `UserWarning` and are skipped. A `ValueError` is raised if no fields could be mapped at all.
|
|
1166
|
+
|
|
1167
|
+
---
|
|
1168
|
+
|
|
1169
|
+
## Data Contract Validation
|
|
1170
|
+
|
|
1171
|
+
Validate that generated or imported data conforms to expected semantic patterns. The validator checks each cell against 14 regex-based semantic validators and enforces non-empty constraints for identity fields.
|
|
1172
|
+
|
|
1173
|
+
```python
|
|
1174
|
+
from dataforge.validation import validate_records, validate_csv
|
|
1175
|
+
|
|
1176
|
+
# Validate in-memory records
|
|
1177
|
+
report = validate_records(
|
|
1178
|
+
records=[
|
|
1179
|
+
{"email": "alice@test.com", "name": "Alice"},
|
|
1180
|
+
{"email": "not-an-email", "name": ""},
|
|
1181
|
+
],
|
|
1182
|
+
field_map={"email": "email", "name": "full_name"},
|
|
1183
|
+
)
|
|
1184
|
+
|
|
1185
|
+
print(report.is_valid) # False
|
|
1186
|
+
print(report.violation_count) # 2
|
|
1187
|
+
print(report.summary())
|
|
1188
|
+
```
|
|
1189
|
+
|
|
1190
|
+
### Semantic Validators
|
|
1191
|
+
|
|
1192
|
+
The following field types are validated with regex patterns:
|
|
1193
|
+
|
|
1194
|
+
| Field | Pattern |
|
|
1195
|
+
|-------|---------|
|
|
1196
|
+
| `email` | RFC-compliant `user@domain.tld` |
|
|
1197
|
+
| `ipv4` | Dotted quad `0-255.0-255.0-255.0-255` |
|
|
1198
|
+
| `ipv6` | Hex groups with colons |
|
|
1199
|
+
| `url` | `http(s)://...` |
|
|
1200
|
+
| `uuid4` | 8-4-4-4-12 hex format |
|
|
1201
|
+
| `date` | ISO `YYYY-MM-DD` |
|
|
1202
|
+
| `datetime` | ISO `YYYY-MM-DD HH:MM:SS` |
|
|
1203
|
+
| `time` | `HH:MM:SS` |
|
|
1204
|
+
| `phone_number` | Digit groups with optional delimiters |
|
|
1205
|
+
| `zip_code` | 5-digit or 5+4 US zip |
|
|
1206
|
+
| `ssn` | `NNN-NN-NNNN` |
|
|
1207
|
+
| `mac_address` | Hex pairs with colons |
|
|
1208
|
+
| `hex_color` | `#RRGGBB` |
|
|
1209
|
+
| `credit_card_number` | 13-19 digits |
|
|
1210
|
+
|
|
1211
|
+
### Non-Empty Constraints
|
|
1212
|
+
|
|
1213
|
+
Identity fields (`first_name`, `last_name`, `full_name`, `email`, `city`, `state`, `country`, `company_name`, `job_title`, `username`, `domain_name`) must be non-null and non-empty unless listed in `null_fields`.
|
|
1214
|
+
|
|
1215
|
+
### CSV Validation
|
|
1216
|
+
|
|
1217
|
+
```python
|
|
1218
|
+
report = validate_csv(
|
|
1219
|
+
path="users.csv",
|
|
1220
|
+
field_map={"email": "email", "phone": "phone_number"},
|
|
1221
|
+
max_rows=10_000,
|
|
1222
|
+
delimiter=",",
|
|
1223
|
+
encoding="utf-8",
|
|
1224
|
+
)
|
|
1225
|
+
```
|
|
1226
|
+
|
|
1227
|
+
### Schema Integration
|
|
1228
|
+
|
|
1229
|
+
Validate data directly from a schema:
|
|
1230
|
+
|
|
1231
|
+
```python
|
|
1232
|
+
schema = forge.schema({"Email": "email", "Phone": "phone_number"})
|
|
1233
|
+
rows = schema.generate(1000)
|
|
1234
|
+
report = schema.validate(rows)
|
|
1235
|
+
# or validate a CSV file:
|
|
1236
|
+
report = schema.validate("users.csv")
|
|
1237
|
+
```
|
|
1238
|
+
|
|
1239
|
+
### Violation Reports
|
|
1240
|
+
|
|
1241
|
+
```python
|
|
1242
|
+
report = validate_records(records, field_map)
|
|
1243
|
+
|
|
1244
|
+
report.is_valid # bool — True if no violations
|
|
1245
|
+
report.violation_count # int — total number of violations
|
|
1246
|
+
report.total_rows # int — rows checked
|
|
1247
|
+
report.total_columns # int — columns checked
|
|
1248
|
+
|
|
1249
|
+
# Group by column
|
|
1250
|
+
by_col = report.violations_by_column()
|
|
1251
|
+
# {"email": [Violation(row=1, column="email", ...), ...]}
|
|
1252
|
+
|
|
1253
|
+
# Human-readable summary (up to 5 violations per column)
|
|
1254
|
+
print(report.summary())
|
|
1255
|
+
```
|
|
1256
|
+
|
|
1257
|
+
---
|
|
1258
|
+
|
|
1259
|
+
## Hypothesis Strategy Bridge
|
|
1260
|
+
|
|
1261
|
+
Integrate DataForge fields into [Hypothesis](https://hypothesis.readthedocs.io/) property-based tests. Requires `pip install hypothesis`.
|
|
1262
|
+
|
|
1263
|
+
```python
|
|
1264
|
+
from dataforge.compat.hypothesis import strategy, forge_strategy
|
|
1265
|
+
|
|
1266
|
+
# Single-field strategy
|
|
1267
|
+
@given(email=strategy("email"))
|
|
1268
|
+
def test_emails_contain_at(email):
|
|
1269
|
+
assert "@" in email
|
|
1270
|
+
|
|
1271
|
+
# Multi-field strategy (returns dicts)
|
|
1272
|
+
@given(row=forge_strategy(["first_name", "email", "city"]))
|
|
1273
|
+
def test_row_has_keys(row):
|
|
1274
|
+
assert "first_name" in row
|
|
1275
|
+
assert "email" in row
|
|
1276
|
+
assert "city" in row
|
|
1277
|
+
```
|
|
1278
|
+
|
|
1279
|
+
### `strategy(field, locale, **kwargs)`
|
|
1280
|
+
|
|
1281
|
+
Creates a Hypothesis `SearchStrategy` that yields values from a single DataForge field.
|
|
1282
|
+
|
|
1283
|
+
```python
|
|
1284
|
+
from hypothesis import given
|
|
1285
|
+
from dataforge.compat.hypothesis import strategy
|
|
1286
|
+
|
|
1287
|
+
@given(name=strategy("first_name", locale="fr_FR"))
|
|
1288
|
+
def test_french_names(name):
|
|
1289
|
+
assert isinstance(name, str) and len(name) > 0
|
|
1290
|
+
```
|
|
1291
|
+
|
|
1292
|
+
Parameters:
|
|
1293
|
+
- `field` — DataForge field name (e.g. `"email"`, `"person.full_name"`)
|
|
1294
|
+
- `locale` — locale code (default: `"en_US"`)
|
|
1295
|
+
- `**kwargs` — forwarded to the provider method
|
|
1296
|
+
|
|
1297
|
+
### `forge_strategy(fields, locale)`
|
|
1298
|
+
|
|
1299
|
+
Creates a strategy that yields `dict[str, Any]` with the given field columns.
|
|
1300
|
+
|
|
1301
|
+
```python
|
|
1302
|
+
@given(row=forge_strategy({"Name": "full_name", "City": "city"}))
|
|
1303
|
+
def test_row_types(row):
|
|
1304
|
+
assert isinstance(row["Name"], str)
|
|
1305
|
+
assert isinstance(row["City"], str)
|
|
1306
|
+
```
|
|
1307
|
+
|
|
1308
|
+
---
|
|
1309
|
+
|
|
1310
|
+
## HTTP Mock Data Server
|
|
1311
|
+
|
|
1312
|
+
Start a zero-dependency HTTP server that returns fake JSON data on every GET request. Useful for frontend prototyping, integration tests, and API mocking.
|
|
1313
|
+
|
|
1314
|
+
```bash
|
|
1315
|
+
# Start the server (default: port 8080)
|
|
1316
|
+
dataforge --serve first_name email city
|
|
1317
|
+
|
|
1318
|
+
# Custom port and row count
|
|
1319
|
+
dataforge --serve --port 3000 --count 50 first_name email city
|
|
1320
|
+
|
|
1321
|
+
# With a seed for reproducible responses
|
|
1322
|
+
dataforge --serve --seed 42 --port 8080 first_name email city
|
|
1323
|
+
|
|
1324
|
+
# Custom column names
|
|
1325
|
+
dataforge --serve Name=full_name Email=email City=city
|
|
1326
|
+
```
|
|
1327
|
+
|
|
1328
|
+
### Endpoints
|
|
1329
|
+
|
|
1330
|
+
| Method | Path | Description |
|
|
1331
|
+
|--------|------|-------------|
|
|
1332
|
+
| `GET` | `/?count=N` | Returns *N* rows as a JSON array (default: `--count` value) |
|
|
1333
|
+
|
|
1334
|
+
### Response Format
|
|
1335
|
+
|
|
1336
|
+
```bash
|
|
1337
|
+
$ curl "http://localhost:8080/?count=3"
|
|
1338
|
+
```
|
|
1339
|
+
|
|
1340
|
+
```json
|
|
1341
|
+
[
|
|
1342
|
+
{"first_name": "James", "email": "james.smith@gmail.com", "city": "Chicago"},
|
|
1343
|
+
{"first_name": "Maria", "email": "maria.garcia@yahoo.com", "city": "Houston"},
|
|
1344
|
+
{"first_name": "David", "email": "david.jones@outlook.com", "city": "Phoenix"}
|
|
1345
|
+
]
|
|
1346
|
+
```
|
|
1347
|
+
|
|
1348
|
+
Response headers include `Content-Type: application/json; charset=utf-8` and `Access-Control-Allow-Origin: *` for CORS support.
|
|
1349
|
+
|
|
1350
|
+
### Schema File
|
|
1351
|
+
|
|
1352
|
+
Load field definitions from a JSON, YAML, or TOML schema file:
|
|
1353
|
+
|
|
1354
|
+
```bash
|
|
1355
|
+
dataforge --serve --schema my_schema.yaml --port 8080
|
|
1356
|
+
```
|
|
1357
|
+
|
|
1358
|
+
---
|
|
1359
|
+
|
|
1360
|
+
## XLSX Export
|
|
1361
|
+
|
|
1362
|
+
Export schema data to Excel `.xlsx` files. Requires `pip install openpyxl`.
|
|
1363
|
+
|
|
1364
|
+
```python
|
|
1365
|
+
from dataforge import DataForge
|
|
1366
|
+
|
|
1367
|
+
forge = DataForge(seed=42)
|
|
1368
|
+
|
|
1369
|
+
# Via Schema
|
|
1370
|
+
schema = forge.schema(["first_name", "last_name", "email", "city"])
|
|
1371
|
+
rows_written = schema.to_excel("users.xlsx", count=1000, sheet_name="Users")
|
|
1372
|
+
|
|
1373
|
+
# Via DataForge convenience method
|
|
1374
|
+
rows_written = forge.to_excel(
|
|
1375
|
+
fields={"Name": "full_name", "Email": "email", "City": "city"},
|
|
1376
|
+
path="contacts.xlsx",
|
|
1377
|
+
count=5000,
|
|
1378
|
+
sheet_name="Contacts",
|
|
1379
|
+
)
|
|
1380
|
+
```
|
|
1381
|
+
|
|
1382
|
+
### Parameters
|
|
1383
|
+
|
|
1384
|
+
| Parameter | Type | Default | Description |
|
|
1385
|
+
|-----------|------|---------|-------------|
|
|
1386
|
+
| `path` | `str` | (required) | Output file path |
|
|
1387
|
+
| `count` | `int` | `10` | Number of rows to generate |
|
|
1388
|
+
| `sheet_name` | `str` | `"Sheet1"` | Excel worksheet name |
|
|
1389
|
+
|
|
1390
|
+
The writer uses `openpyxl`'s write-only mode (`Workbook(write_only=True)`) for memory-efficient streaming of large datasets. Returns the number of rows written.
|
|
1391
|
+
|
|
1392
|
+
---
|
|
1393
|
+
|
|
1394
|
+
## Statistical Distribution Fitting
|
|
1395
|
+
|
|
1396
|
+
The schema inferrer can detect statistical distributions in numeric columns and report the best-fitting distribution with its parameters. This runs automatically during `infer_schema()` when columns contain at least 20 numeric values.
|
|
1397
|
+
|
|
1398
|
+
```python
|
|
1399
|
+
from dataforge import DataForge
|
|
1400
|
+
from dataforge.inference import SchemaInferrer
|
|
1401
|
+
|
|
1402
|
+
forge = DataForge(seed=42)
|
|
1403
|
+
inferrer = SchemaInferrer(forge)
|
|
1404
|
+
|
|
1405
|
+
# Infer from data with numeric columns
|
|
1406
|
+
schema = inferrer.from_records([
|
|
1407
|
+
{"value": 2.3, "count": 5},
|
|
1408
|
+
{"value": 1.8, "count": 12},
|
|
1409
|
+
# ... (20+ records for distribution detection)
|
|
1410
|
+
])
|
|
1411
|
+
|
|
1412
|
+
# Inspect detected distributions
|
|
1413
|
+
for analysis in inferrer.analyses:
|
|
1414
|
+
if analysis.distribution:
|
|
1415
|
+
print(f"{analysis.name}: {analysis.distribution}")
|
|
1416
|
+
# "value: {'name': 'normal', 'params': {'mean': 2.1, 'std': 0.4}}"
|
|
1417
|
+
```
|
|
1418
|
+
|
|
1419
|
+
### Supported Distributions
|
|
1420
|
+
|
|
1421
|
+
| Distribution | Condition | Parameters |
|
|
1422
|
+
|-------------|-----------|------------|
|
|
1423
|
+
| Normal | Always tested | `mean`, `std` |
|
|
1424
|
+
| LogNormal | All values > 0 | `mu`, `sigma` (of log-values) |
|
|
1425
|
+
| Exponential | All values > 0, skew > 1.5 | `rate` |
|
|
1426
|
+
| Beta | All values in (0, 1] | `alpha`, `beta` |
|
|
1427
|
+
| Zipf | Integer values >= 1, 5+ distinct ranks | `s` (exponent) |
|
|
1428
|
+
|
|
1429
|
+
### How It Works
|
|
1430
|
+
|
|
1431
|
+
1. **Single-pass moment accumulation** — computes mean, variance, skewness, and kurtosis in one pass over the data
|
|
1432
|
+
2. **Jarque-Bera test** — used for Normal and LogNormal goodness-of-fit testing
|
|
1433
|
+
3. **Least-squares R^2** — used for Zipf power-law fitting on log-rank vs log-frequency
|
|
1434
|
+
4. **Best fit selection** — each candidate distribution gets a score; the lowest score wins
|
|
1435
|
+
|
|
1436
|
+
Distribution results are stored in `ColumnAnalysis.distribution` and included in `inferrer.describe()` output.
|
|
1437
|
+
|
|
1438
|
+
### Convenience Methods
|
|
1439
|
+
|
|
1440
|
+
```python
|
|
1441
|
+
# Via DataForge
|
|
1442
|
+
schema = forge.infer_schema(records)
|
|
1443
|
+
schema = forge.infer_schema_from_csv("data.csv", max_rows=1000)
|
|
1444
|
+
```
|
|
1445
|
+
|
|
1446
|
+
---
|
|
1447
|
+
|
|
865
1448
|
## Time-Series Generation
|
|
866
1449
|
|
|
867
1450
|
Generate synthetic time-series data with configurable trends, seasonality, noise, anomalies, regime changes, missing data, and spikes.
|
|
@@ -1440,6 +2023,17 @@ The [`examples/`](examples/) directory contains comprehensive real-world usage e
|
|
|
1440
2023
|
| [`08_streaming.py`](examples/08_streaming.py) | HTTP/Kafka/RabbitMQ streaming with rate limiting |
|
|
1441
2024
|
| [`09_tui.py`](examples/09_tui.py) | Interactive TUI launch and keyboard shortcuts |
|
|
1442
2025
|
| [`10_real_world_scenarios.py`](examples/10_real_world_scenarios.py) | Combined scenarios: e-commerce, healthcare, IoT, API testing |
|
|
2026
|
+
| [`11_faker_compat.py`](examples/11_faker_compat.py) | Faker compatibility layer — migrating from faker to DataForge |
|
|
2027
|
+
| [`12_multi_locale.py`](examples/12_multi_locale.py) | Multi-locale data generation for internationalized test data |
|
|
2028
|
+
| [`13_dynamic_fields.py`](examples/13_dynamic_fields.py) | Dynamic fields with `define()` — custom data pools and generators |
|
|
2029
|
+
| [`14_transform_pipelines.py`](examples/14_transform_pipelines.py) | Transform pipelines with `pipe()` — post-generation data transformation |
|
|
2030
|
+
| [`15_type_driven_schema.py`](examples/15_type_driven_schema.py) | Type-driven schema generation from dataclasses and TypedDicts |
|
|
2031
|
+
| [`16_data_validation.py`](examples/16_data_validation.py) | Data contract validation — ensuring data quality with semantic rules |
|
|
2032
|
+
| [`17_hypothesis_bridge.py`](examples/17_hypothesis_bridge.py) | Hypothesis strategy bridge — property-based testing with DataForge |
|
|
2033
|
+
| [`18_mock_server.py`](examples/18_mock_server.py) | HTTP mock data server — serving fake data over HTTP |
|
|
2034
|
+
| [`19_xlsx_export.py`](examples/19_xlsx_export.py) | XLSX export — generating Excel spreadsheets with DataForge schemas |
|
|
2035
|
+
| [`20_distribution_fitting.py`](examples/20_distribution_fitting.py) | Distribution fitting — inferring statistical distributions from data |
|
|
2036
|
+
| [`21_advanced_scenarios.py`](examples/21_advanced_scenarios.py) | Advanced multi-feature workflows combining DataForge capabilities |
|
|
1443
2037
|
|
|
1444
2038
|
## Benchmarks
|
|
1445
2039
|
|