dataforge-py 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/PKG-INFO +595 -1
  2. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/README.md +594 -0
  3. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/pyproject.toml +8 -3
  4. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/__init__.py +1 -1
  5. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/anonymizer.py +5 -95
  6. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/backend.py +24 -277
  7. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/chaos.py +35 -119
  8. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/cli.py +93 -67
  9. dataforge_py-0.5.0/src/dataforge/compat/__init__.py +15 -0
  10. dataforge_py-0.5.0/src/dataforge/compat/faker.py +172 -0
  11. dataforge_py-0.5.0/src/dataforge/compat/hypothesis.py +106 -0
  12. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/constraints.py +11 -80
  13. dataforge_py-0.5.0/src/dataforge/core.py +997 -0
  14. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/data/correlations/geo.py +0 -10
  15. dataforge_py-0.5.0/src/dataforge/decorators.py +81 -0
  16. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/inference.py +249 -151
  17. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/address.py +0 -10
  18. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/company.py +0 -55
  19. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/person.py +0 -25
  20. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/address.py +0 -25
  21. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/person.py +0 -78
  22. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/address.py +0 -10
  23. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/person.py +0 -80
  24. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/address.py +0 -93
  25. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/company.py +0 -55
  26. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/person.py +0 -178
  27. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/address.py +0 -124
  28. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/company.py +0 -55
  29. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/person.py +0 -185
  30. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/address.py +0 -88
  31. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/company.py +0 -55
  32. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/person.py +0 -180
  33. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/address.py +0 -85
  34. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/company.py +0 -55
  35. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/person.py +0 -178
  36. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/address.py +0 -10
  37. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/person.py +0 -75
  38. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/address.py +0 -10
  39. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/person.py +0 -73
  40. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/address.py +0 -10
  41. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/person.py +0 -80
  42. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/address.py +0 -10
  43. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/company.py +0 -55
  44. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/person.py +0 -26
  45. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/company.py +0 -15
  46. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/person.py +0 -55
  47. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/address.py +0 -10
  48. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/person.py +0 -75
  49. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/company.py +0 -15
  50. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/person.py +0 -50
  51. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/address.py +0 -10
  52. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/company.py +0 -5
  53. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/person.py +0 -25
  54. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/address.py +0 -10
  55. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/company.py +0 -51
  56. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/person.py +0 -26
  57. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/address.py +0 -10
  58. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/company.py +0 -50
  59. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/person.py +0 -25
  60. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/company.py +0 -15
  61. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/person.py +0 -55
  62. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/address.py +0 -10
  63. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/company.py +0 -36
  64. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/person.py +0 -26
  65. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/address.py +0 -10
  66. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/person.py +0 -63
  67. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/address.py +0 -10
  68. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/person.py +0 -75
  69. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/company.py +0 -15
  70. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/person.py +0 -50
  71. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/openapi.py +14 -108
  72. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/address.py +15 -227
  73. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/ai_prompt.py +20 -169
  74. dataforge_py-0.5.0/src/dataforge/providers/automotive.py +213 -0
  75. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/barcode.py +5 -59
  76. dataforge_py-0.5.0/src/dataforge/providers/base.py +59 -0
  77. dataforge_py-0.5.0/src/dataforge/providers/color.py +123 -0
  78. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/company.py +5 -58
  79. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/crypto.py +4 -64
  80. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/datetime.py +12 -146
  81. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/ecommerce.py +5 -115
  82. dataforge_py-0.5.0/src/dataforge/providers/education.py +101 -0
  83. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/file.py +18 -106
  84. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/finance.py +34 -174
  85. dataforge_py-0.5.0/src/dataforge/providers/food.py +236 -0
  86. dataforge_py-0.5.0/src/dataforge/providers/geo.py +186 -0
  87. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/government.py +0 -36
  88. dataforge_py-0.5.0/src/dataforge/providers/hardware.py +226 -0
  89. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/internet.py +9 -124
  90. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/legal.py +12 -210
  91. dataforge_py-0.5.0/src/dataforge/providers/llm.py +495 -0
  92. dataforge_py-0.5.0/src/dataforge/providers/logistics.py +285 -0
  93. dataforge_py-0.5.0/src/dataforge/providers/lorem.py +107 -0
  94. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/medical.py +8 -161
  95. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/misc.py +9 -104
  96. dataforge_py-0.5.0/src/dataforge/providers/music.py +270 -0
  97. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/network.py +11 -107
  98. dataforge_py-0.5.0/src/dataforge/providers/payment.py +196 -0
  99. dataforge_py-0.5.0/src/dataforge/providers/person.py +96 -0
  100. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/phone.py +2 -31
  101. dataforge_py-0.5.0/src/dataforge/providers/profile.py +102 -0
  102. dataforge_py-0.5.0/src/dataforge/providers/real_estate.py +224 -0
  103. dataforge_py-0.5.0/src/dataforge/providers/science.py +216 -0
  104. dataforge_py-0.5.0/src/dataforge/providers/social_media.py +234 -0
  105. dataforge_py-0.5.0/src/dataforge/providers/sports.py +249 -0
  106. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/text.py +3 -115
  107. dataforge_py-0.5.0/src/dataforge/providers/weather.py +204 -0
  108. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/registry.py +5 -62
  109. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/relational.py +5 -105
  110. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/schema.py +146 -593
  111. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/schema_io.py +8 -301
  112. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/seeder.py +8 -115
  113. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/streaming.py +9 -177
  114. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/timeseries.py +8 -145
  115. dataforge_py-0.5.0/src/dataforge/transforms.py +240 -0
  116. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/tui/app.py +0 -4
  117. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/unique.py +3 -49
  118. dataforge_py-0.5.0/src/dataforge/validation.py +257 -0
  119. dataforge_py-0.4.0/src/dataforge/core.py +0 -1757
  120. dataforge_py-0.4.0/src/dataforge/decorators.py +0 -172
  121. dataforge_py-0.4.0/src/dataforge/providers/ai_chat.py +0 -170
  122. dataforge_py-0.4.0/src/dataforge/providers/automotive.py +0 -416
  123. dataforge_py-0.4.0/src/dataforge/providers/base.py +0 -34
  124. dataforge_py-0.4.0/src/dataforge/providers/color.py +0 -247
  125. dataforge_py-0.4.0/src/dataforge/providers/education.py +0 -234
  126. dataforge_py-0.4.0/src/dataforge/providers/food.py +0 -476
  127. dataforge_py-0.4.0/src/dataforge/providers/geo.py +0 -332
  128. dataforge_py-0.4.0/src/dataforge/providers/hardware.py +0 -478
  129. dataforge_py-0.4.0/src/dataforge/providers/llm.py +0 -726
  130. dataforge_py-0.4.0/src/dataforge/providers/logistics.py +0 -545
  131. dataforge_py-0.4.0/src/dataforge/providers/lorem.py +0 -241
  132. dataforge_py-0.4.0/src/dataforge/providers/music.py +0 -505
  133. dataforge_py-0.4.0/src/dataforge/providers/payment.py +0 -300
  134. dataforge_py-0.4.0/src/dataforge/providers/person.py +0 -195
  135. dataforge_py-0.4.0/src/dataforge/providers/profile.py +0 -265
  136. dataforge_py-0.4.0/src/dataforge/providers/real_estate.py +0 -470
  137. dataforge_py-0.4.0/src/dataforge/providers/science.py +0 -365
  138. dataforge_py-0.4.0/src/dataforge/providers/social_media.py +0 -442
  139. dataforge_py-0.4.0/src/dataforge/providers/sports.py +0 -494
  140. dataforge_py-0.4.0/src/dataforge/providers/weather.py +0 -452
  141. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/data/__init__.py +0 -0
  142. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/data/correlations/__init__.py +0 -0
  143. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/__init__.py +0 -0
  144. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/__init__.py +0 -0
  145. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/internet.py +0 -0
  146. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ar_SA/phone.py +0 -0
  147. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/__init__.py +0 -0
  148. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/company.py +0 -0
  149. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/internet.py +0 -0
  150. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/da_DK/phone.py +0 -0
  151. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/__init__.py +0 -0
  152. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/company.py +0 -0
  153. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/internet.py +0 -0
  154. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/de_DE/phone.py +0 -0
  155. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/__init__.py +0 -0
  156. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/internet.py +0 -0
  157. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_AU/phone.py +0 -0
  158. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/__init__.py +0 -0
  159. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/internet.py +0 -0
  160. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_CA/phone.py +0 -0
  161. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/__init__.py +0 -0
  162. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/internet.py +0 -0
  163. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_GB/phone.py +0 -0
  164. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/__init__.py +0 -0
  165. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/internet.py +0 -0
  166. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/en_US/phone.py +0 -0
  167. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/__init__.py +0 -0
  168. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/company.py +0 -0
  169. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/internet.py +0 -0
  170. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/es_ES/phone.py +0 -0
  171. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/__init__.py +0 -0
  172. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/company.py +0 -0
  173. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/internet.py +0 -0
  174. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fi_FI/phone.py +0 -0
  175. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/__init__.py +0 -0
  176. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/company.py +0 -0
  177. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/internet.py +0 -0
  178. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/fr_FR/phone.py +0 -0
  179. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/__init__.py +0 -0
  180. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/internet.py +0 -0
  181. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/hi_IN/phone.py +0 -0
  182. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/__init__.py +0 -0
  183. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/address.py +0 -0
  184. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/internet.py +0 -0
  185. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/it_IT/phone.py +0 -0
  186. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/__init__.py +0 -0
  187. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/company.py +0 -0
  188. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/internet.py +0 -0
  189. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ja_JP/phone.py +0 -0
  190. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/__init__.py +0 -0
  191. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/address.py +0 -0
  192. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/internet.py +0 -0
  193. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ko_KR/phone.py +0 -0
  194. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/__init__.py +0 -0
  195. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/internet.py +0 -0
  196. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nb_NO/phone.py +0 -0
  197. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/__init__.py +0 -0
  198. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/internet.py +0 -0
  199. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/nl_NL/phone.py +0 -0
  200. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/__init__.py +0 -0
  201. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/internet.py +0 -0
  202. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pl_PL/phone.py +0 -0
  203. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/__init__.py +0 -0
  204. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/address.py +0 -0
  205. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/internet.py +0 -0
  206. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/pt_BR/phone.py +0 -0
  207. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/__init__.py +0 -0
  208. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/internet.py +0 -0
  209. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/ru_RU/phone.py +0 -0
  210. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/__init__.py +0 -0
  211. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/company.py +0 -0
  212. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/internet.py +0 -0
  213. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/sv_SE/phone.py +0 -0
  214. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/__init__.py +0 -0
  215. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/company.py +0 -0
  216. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/internet.py +0 -0
  217. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/tr_TR/phone.py +0 -0
  218. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/__init__.py +0 -0
  219. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/address.py +0 -0
  220. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/internet.py +0 -0
  221. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/locales/zh_CN/phone.py +0 -0
  222. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/providers/__init__.py +0 -0
  223. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/py.typed +0 -0
  224. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/pytest_plugin.py +0 -0
  225. {dataforge_py-0.4.0 → dataforge_py-0.5.0}/src/dataforge/tui/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dataforge-py
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: High-performance fake data generator for testing
5
5
  Author: Ivan Rener
6
6
  Author-email: Ivan Rener <ivan.rener@multitude.com>
@@ -53,6 +53,16 @@ forge.person.first_name(count=1_000_000) # 1M names in ~55ms
53
53
  - [Unique Values](#unique-values)
54
54
  - [Locales](#locales) (17 locales)
55
55
  - **Advanced Features**
56
+ - [Faker Compatibility Layer](#faker-compatibility-layer)
57
+ - [Multi-Locale Mixing](#multi-locale-mixing)
58
+ - [Dynamic Fields (`define()`)](#dynamic-fields-define)
59
+ - [Field Transform Pipelines (`pipe()`)](#field-transform-pipelines-pipe)
60
+ - [Type-Driven Schema](#type-driven-schema)
61
+ - [Data Contract Validation](#data-contract-validation)
62
+ - [Hypothesis Strategy Bridge](#hypothesis-strategy-bridge)
63
+ - [HTTP Mock Data Server](#http-mock-data-server)
64
+ - [XLSX Export](#xlsx-export)
65
+ - [Statistical Distribution Fitting](#statistical-distribution-fitting)
56
66
  - [Time-Series Generation](#time-series-generation)
57
67
  - [Schema Inference](#schema-inference)
58
68
  - [Chaos Testing](#chaos-testing)
@@ -93,6 +103,16 @@ forge.person.first_name(count=1_000_000) # 1M names in ~55ms
93
103
  - **OpenAPI / JSON Schema Import** — generate fake data from API specs with `$ref` resolution
94
104
  - **Streaming to Queues** — emit data to HTTP, Kafka, or RabbitMQ with token-bucket rate limiting
95
105
  - **Interactive TUI** — terminal UI for browsing providers, building schemas, and exporting data
106
+ - **Faker Compatibility** — drop-in `Faker` replacement with 57 method mappings for painless migration
107
+ - **Multi-Locale Mixing** — pass a list of locales to randomly blend data from multiple languages
108
+ - **Dynamic Fields** — `define()` custom fields with element lists, weighted pools, or arbitrary callables
109
+ - **Transform Pipelines** — `pipe()` chains composable transforms (casing, truncation, hashing, redaction) onto any field
110
+ - **Type-Driven Schema** — auto-generate schemas from `@dataclass` and `TypedDict` classes via annotation introspection
111
+ - **Data Contract Validation** — validate generated or imported data against semantic regex patterns and non-empty constraints
112
+ - **Hypothesis Bridge** — `strategy()` and `forge_strategy()` integrate DataForge fields into Hypothesis property-based tests
113
+ - **HTTP Mock Server** — `dataforge --serve` starts a zero-dependency JSON API that returns fake data on every GET
114
+ - **XLSX Export** — `to_excel()` writes schemas to `.xlsx` files using streaming `openpyxl` workbooks
115
+ - **Distribution Fitting** — infer Normal, LogNormal, Exponential, Beta, and Zipf distributions from numeric columns
96
116
  - **27 Providers** — person, address, internet, company, phone, finance, datetime, color, file, network, lorem, barcode, misc, automotive, crypto, ecommerce, education, geo, government, medical, payment, profile, science, text, ai\_prompt, llm, ai\_chat
97
117
  - **17 Locales** — en\_US, en\_GB, en\_AU, en\_CA, de\_DE, fr\_FR, es\_ES, it\_IT, pt\_BR, nl\_NL, pl\_PL, ru\_RU, ar\_SA, hi\_IN, ja\_JP, ko\_KR, zh\_CN
98
118
 
@@ -114,6 +134,8 @@ pip install polars # to_polars()
114
134
  pip install pandas # to_dataframe()
115
135
  pip install pydantic # schema_from_pydantic()
116
136
  pip install sqlalchemy # schema_from_sqlalchemy(), DatabaseSeeder
137
+ pip install openpyxl # to_excel()
138
+ pip install hypothesis # Hypothesis strategy bridge
117
139
  ```
118
140
 
119
141
  **Optional extras** (bundled in pyproject.toml):
@@ -862,6 +884,567 @@ forge.person.full_name() # "田中太郎"
862
884
 
863
885
  ---
864
886
 
887
+ ## Faker Compatibility Layer
888
+
889
+ Drop-in replacement for the `faker` library. Provides the same `Faker` class API so you can migrate existing code by changing a single import — while gaining DataForge's performance.
890
+
891
+ ```python
892
+ # Before (faker)
893
+ # from faker import Faker
894
+
895
+ # After (dataforge — same API, faster)
896
+ from dataforge.compat import Faker
897
+
898
+ fake = Faker(locale="en_US", seed=42)
899
+
900
+ fake.name() # "James Smith"
901
+ fake.email() # "james.smith@gmail.com"
902
+ fake.address() # "4821 Oak Ave, Chicago, IL 60614"
903
+ fake.company() # "Acme Corp"
904
+ fake.phone_number() # "(555) 123-4567"
905
+ fake.date() # "2024-03-15"
906
+ fake.text() # "Lorem ipsum dolor sit amet..."
907
+ ```
908
+
909
+ ### Supported Methods
910
+
911
+ The compatibility layer maps 57 Faker method names to DataForge fields. A few common mappings:
912
+
913
+ | Faker method | DataForge field |
914
+ |-------------|-----------------|
915
+ | `name()` | `full_name` |
916
+ | `first_name()` | `first_name` |
917
+ | `last_name()` | `last_name` |
918
+ | `email()` | `email` |
919
+ | `address()` | `full_address` |
920
+ | `company()` | `company_name` |
921
+ | `phone_number()` | `phone_number` |
922
+ | `date()` | `date` |
923
+ | `city()` | `city` |
924
+ | `state()` | `state` |
925
+ | `zipcode()` | `zip_code` |
926
+ | `url()` | `url` |
927
+ | `ipv4()` | `ipv4` |
928
+ | `uuid4()` | `uuid4` |
929
+ | `ssn()` | `ssn` |
930
+ | `credit_card_number()` | `credit_card_number` |
931
+
932
+ Any method not in the explicit map falls back to DataForge's alias lookup, then to a direct field name match. Resolved methods are cached for subsequent calls.
933
+
934
+ ### Seeding
935
+
936
+ ```python
937
+ # Global seed (class method)
938
+ Faker.seed(42)
939
+
940
+ # Instance seed
941
+ fake = Faker(seed=42)
942
+ fake.seed_instance(99)
943
+
944
+ # Multi-locale
945
+ fake = Faker(["en_US", "fr_FR", "de_DE"])
946
+ fake.name() # randomly picks a locale per call
947
+ ```
948
+
949
+ ---
950
+
951
+ ## Multi-Locale Mixing
952
+
953
+ Pass a list of locales to blend data from multiple languages in a single forge instance. Each generation call randomly selects one of the configured locales.
954
+
955
+ ```python
956
+ from dataforge import DataForge
957
+
958
+ forge = DataForge(locale=["en_US", "fr_FR", "ja_JP"], seed=42)
959
+
960
+ # Each call randomly picks a locale
961
+ forge.person.full_name() # "James Smith" or "Jean Dupont" or "田中太郎"
962
+ forge.address.city() # "Chicago" or "Paris" or "東京"
963
+
964
+ # Check configured locales
965
+ forge.locales # ("en_US", "fr_FR", "ja_JP")
966
+ forge.locale # "en_US" (primary)
967
+ ```
968
+
969
+ ### How It Works
970
+
971
+ - A child `DataForge` instance is created for each locale
972
+ - Each child gets a deterministic sub-seed derived from the parent seed
973
+ - On every provider access, one child is selected at random
974
+ - Seeding is reproducible: the same seed always produces the same locale sequence
975
+
976
+ ### Schema Integration
977
+
978
+ Multi-locale works seamlessly with the Schema API:
979
+
980
+ ```python
981
+ forge = DataForge(locale=["en_US", "de_DE", "es_ES"], seed=42)
982
+ schema = forge.schema(["first_name", "last_name", "city"])
983
+ rows = schema.generate(100)
984
+ # Rows contain a mix of English, German, and Spanish names and cities
985
+ ```
986
+
987
+ ---
988
+
989
+ ## Dynamic Fields (`define()`)
990
+
991
+ Define custom fields that can be used anywhere a built-in field name is accepted — in schemas, bulk export, and CLI.
992
+
993
+ ```python
994
+ from dataforge import DataForge
995
+
996
+ forge = DataForge(seed=42)
997
+
998
+ # From a list of elements (uniform random)
999
+ forge.define("status", elements=["active", "inactive", "pending"])
1000
+ forge.status() # "active"
1001
+
1002
+ # With weighted probabilities
1003
+ forge.define("priority", elements=["low", "medium", "high"], weights=[0.5, 0.3, 0.2])
1004
+ forge.priority() # "low" (50% of the time)
1005
+
1006
+ # From a callable
1007
+ forge.define("score", func=lambda: round(random.gauss(75, 10), 1))
1008
+ forge.score() # 78.3
1009
+ ```
1010
+
1011
+ ### Batch Generation
1012
+
1013
+ Custom fields support `count=N` like built-in fields:
1014
+
1015
+ ```python
1016
+ forge.define("tier", elements=["free", "pro", "enterprise"])
1017
+ tiers = forge.tier(count=1000) # list of 1000 random tiers
1018
+ ```
1019
+
1020
+ ### Schema Integration
1021
+
1022
+ Custom fields are resolved by name in schemas:
1023
+
1024
+ ```python
1025
+ forge.define("status", elements=["active", "inactive", "pending"])
1026
+ schema = forge.schema({
1027
+ "Name": "full_name",
1028
+ "Email": "email",
1029
+ "Status": "status",
1030
+ })
1031
+ rows = schema.generate(100)
1032
+ # [{"Name": "James Smith", "Email": "...", "Status": "active"}, ...]
1033
+ ```
1034
+
1035
+ ---
1036
+
1037
+ ## Field Transform Pipelines (`pipe()`)
1038
+
1039
+ Chain composable post-generation transforms onto any field. The `pipe()` function creates a field spec that first generates data from a provider, then applies one or more transform functions in sequence.
1040
+
1041
+ ```python
1042
+ from dataforge import DataForge
1043
+ from dataforge.transforms import pipe, upper, lower, truncate, maybe_null
1044
+
1045
+ forge = DataForge(seed=42)
1046
+
1047
+ schema = forge.schema({
1048
+ "Username": pipe("username", upper),
1049
+ "Bio": pipe("sentence", truncate(50)),
1050
+ "Email": pipe("email", lower),
1051
+ "Phone": pipe("phone_number", maybe_null(0.2)),
1052
+ })
1053
+ rows = schema.generate(100)
1054
+ # [{"Username": "JSMITH42", "Bio": "Lorem ipsum dolor...", ...}, ...]
1055
+ ```
1056
+
1057
+ ### Built-in Transforms
1058
+
1059
+ **Case transforms:**
1060
+
1061
+ | Transform | Description | Example |
1062
+ |-----------|-------------|---------|
1063
+ | `upper` | Uppercase | `"hello"` -> `"HELLO"` |
1064
+ | `lower` | Lowercase | `"Hello"` -> `"hello"` |
1065
+ | `title_case` | Title Case | `"hello world"` -> `"Hello World"` |
1066
+ | `snake_case` | snake_case | `"Hello World"` -> `"hello_world"` |
1067
+ | `camel_case` | camelCase | `"hello world"` -> `"helloWorld"` |
1068
+ | `kebab_case` | kebab-case | `"Hello World"` -> `"hello-world"` |
1069
+
1070
+ **String transforms:**
1071
+
1072
+ | Transform | Description | Example |
1073
+ |-----------|-------------|---------|
1074
+ | `truncate(n, suffix="...")` | Truncate to *n* chars | `"Hello World"` -> `"Hello..."` |
1075
+ | `strip` | Strip whitespace | `" hello "` -> `"hello"` |
1076
+ | `prefix(pre)` | Prepend string | `"world"` -> `"hello_world"` |
1077
+ | `suffix(suf)` | Append string | `"hello"` -> `"hello_world"` |
1078
+ | `wrap(before, after)` | Wrap with delimiters | `"hi"` -> `"[hi]"` |
1079
+ | `replace(old, new)` | String replacement | `"a-b"` -> `"a_b"` |
1080
+
1081
+ **Data transforms:**
1082
+
1083
+ | Transform | Description |
1084
+ |-----------|-------------|
1085
+ | `maybe_null(probability)` | Replace with `None` at given rate |
1086
+ | `hash_with(algorithm)` | Hash value (SHA-256, MD5, etc.) |
1087
+ | `encode_b64` | Base64-encode |
1088
+ | `decode_b64` | Base64-decode |
1089
+ | `redact(char, keep_start, keep_end)` | Redact middle characters |
1090
+ | `apply_if(condition, transform)` | Conditionally apply a transform |
1091
+
1092
+ ### Chaining Multiple Transforms
1093
+
1094
+ Transforms are applied left to right:
1095
+
1096
+ ```python
1097
+ from dataforge.transforms import pipe, lower, prefix, truncate
1098
+
1099
+ schema = forge.schema({
1100
+ "slug": pipe("full_name", lower, replace(" ", "-"), truncate(20)),
1101
+ })
1102
+ # "James Smith" -> "james smith" -> "james-smith" -> "james-smith"
1103
+ ```
1104
+
1105
+ ### Custom Transforms
1106
+
1107
+ Any `(value) -> value` callable works as a transform:
1108
+
1109
+ ```python
1110
+ schema = forge.schema({
1111
+ "price": pipe("price", lambda v: f"${v}"),
1112
+ "name": pipe("first_name", str.upper),
1113
+ })
1114
+ ```
1115
+
1116
+ ---
1117
+
1118
+ ## Type-Driven Schema
1119
+
1120
+ Auto-generate schemas from Python `@dataclass` and `TypedDict` classes. Field names and type annotations are matched to DataForge providers via a 3-tier resolution: exact registry match, alias heuristic (~90 common field names), then type-based fallback.
1121
+
1122
+ ### From Dataclasses
1123
+
1124
+ ```python
1125
+ from dataclasses import dataclass
1126
+ from dataforge import DataForge
1127
+
1128
+ @dataclass
1129
+ class User:
1130
+ first_name: str
1131
+ last_name: str
1132
+ email: str
1133
+ age: int
1134
+ is_active: bool
1135
+
1136
+ forge = DataForge(seed=42)
1137
+ schema = forge.schema_from_dataclass(User)
1138
+ rows = schema.generate(100)
1139
+ # [{"first_name": "James", "last_name": "Smith", "email": "...", ...}, ...]
1140
+ ```
1141
+
1142
+ ### From TypedDicts
1143
+
1144
+ ```python
1145
+ from typing import TypedDict
1146
+
1147
+ class Product(TypedDict):
1148
+ product_name: str
1149
+ sku: str
1150
+ price: float
1151
+ category: str
1152
+
1153
+ schema = forge.schema_from_typed_dict(Product)
1154
+ rows = schema.generate(100)
1155
+ ```
1156
+
1157
+ ### Resolution Order
1158
+
1159
+ For each field, DataForge tries:
1160
+
1161
+ 1. **Exact match** — field name exists in the provider registry (e.g. `email` -> `internet.email`)
1162
+ 2. **Alias heuristic** — field name appears in `_FIELD_ALIASES` (e.g. `user_email` -> `email`, `zip` -> `zip_code`)
1163
+ 3. **Type fallback** — Python type maps to a default field (`bool` -> `boolean`, `datetime` -> `datetime`, `UUID` -> `uuid4`)
1164
+
1165
+ Fields that cannot be resolved emit a `UserWarning` and are skipped. A `ValueError` is raised if no fields could be mapped at all.
1166
+
1167
+ ---
1168
+
1169
+ ## Data Contract Validation
1170
+
1171
+ Validate that generated or imported data conforms to expected semantic patterns. The validator checks each cell against 14 regex-based semantic validators and enforces non-empty constraints for identity fields.
1172
+
1173
+ ```python
1174
+ from dataforge.validation import validate_records, validate_csv
1175
+
1176
+ # Validate in-memory records
1177
+ report = validate_records(
1178
+ records=[
1179
+ {"email": "alice@test.com", "name": "Alice"},
1180
+ {"email": "not-an-email", "name": ""},
1181
+ ],
1182
+ field_map={"email": "email", "name": "full_name"},
1183
+ )
1184
+
1185
+ print(report.is_valid) # False
1186
+ print(report.violation_count) # 2
1187
+ print(report.summary())
1188
+ ```
1189
+
1190
+ ### Semantic Validators
1191
+
1192
+ The following field types are validated with regex patterns:
1193
+
1194
+ | Field | Pattern |
1195
+ |-------|---------|
1196
+ | `email` | RFC-compliant `user@domain.tld` |
1197
+ | `ipv4` | Dotted quad `0-255.0-255.0-255.0-255` |
1198
+ | `ipv6` | Hex groups with colons |
1199
+ | `url` | `http(s)://...` |
1200
+ | `uuid4` | 8-4-4-4-12 hex format |
1201
+ | `date` | ISO `YYYY-MM-DD` |
1202
+ | `datetime` | ISO `YYYY-MM-DD HH:MM:SS` |
1203
+ | `time` | `HH:MM:SS` |
1204
+ | `phone_number` | Digit groups with optional delimiters |
1205
+ | `zip_code` | 5-digit or 5+4 US zip |
1206
+ | `ssn` | `NNN-NN-NNNN` |
1207
+ | `mac_address` | Hex pairs with colons |
1208
+ | `hex_color` | `#RRGGBB` |
1209
+ | `credit_card_number` | 13-19 digits |
1210
+
1211
+ ### Non-Empty Constraints
1212
+
1213
+ Identity fields (`first_name`, `last_name`, `full_name`, `email`, `city`, `state`, `country`, `company_name`, `job_title`, `username`, `domain_name`) must be non-null and non-empty unless listed in `null_fields`.
1214
+
1215
+ ### CSV Validation
1216
+
1217
+ ```python
1218
+ report = validate_csv(
1219
+ path="users.csv",
1220
+ field_map={"email": "email", "phone": "phone_number"},
1221
+ max_rows=10_000,
1222
+ delimiter=",",
1223
+ encoding="utf-8",
1224
+ )
1225
+ ```
1226
+
1227
+ ### Schema Integration
1228
+
1229
+ Validate data directly from a schema:
1230
+
1231
+ ```python
1232
+ schema = forge.schema({"Email": "email", "Phone": "phone_number"})
1233
+ rows = schema.generate(1000)
1234
+ report = schema.validate(rows)
1235
+ # or validate a CSV file:
1236
+ report = schema.validate("users.csv")
1237
+ ```
1238
+
1239
+ ### Violation Reports
1240
+
1241
+ ```python
1242
+ report = validate_records(records, field_map)
1243
+
1244
+ report.is_valid # bool — True if no violations
1245
+ report.violation_count # int — total number of violations
1246
+ report.total_rows # int — rows checked
1247
+ report.total_columns # int — columns checked
1248
+
1249
+ # Group by column
1250
+ by_col = report.violations_by_column()
1251
+ # {"email": [Violation(row=1, column="email", ...), ...]}
1252
+
1253
+ # Human-readable summary (up to 5 violations per column)
1254
+ print(report.summary())
1255
+ ```
1256
+
1257
+ ---
1258
+
1259
+ ## Hypothesis Strategy Bridge
1260
+
1261
+ Integrate DataForge fields into [Hypothesis](https://hypothesis.readthedocs.io/) property-based tests. Requires `pip install hypothesis`.
1262
+
1263
+ ```python
1264
+ from dataforge.compat.hypothesis import strategy, forge_strategy
1265
+
1266
+ # Single-field strategy
1267
+ @given(email=strategy("email"))
1268
+ def test_emails_contain_at(email):
1269
+ assert "@" in email
1270
+
1271
+ # Multi-field strategy (returns dicts)
1272
+ @given(row=forge_strategy(["first_name", "email", "city"]))
1273
+ def test_row_has_keys(row):
1274
+ assert "first_name" in row
1275
+ assert "email" in row
1276
+ assert "city" in row
1277
+ ```
1278
+
1279
+ ### `strategy(field, locale, **kwargs)`
1280
+
1281
+ Creates a Hypothesis `SearchStrategy` that yields values from a single DataForge field.
1282
+
1283
+ ```python
1284
+ from hypothesis import given
1285
+ from dataforge.compat.hypothesis import strategy
1286
+
1287
+ @given(name=strategy("first_name", locale="fr_FR"))
1288
+ def test_french_names(name):
1289
+ assert isinstance(name, str) and len(name) > 0
1290
+ ```
1291
+
1292
+ Parameters:
1293
+ - `field` — DataForge field name (e.g. `"email"`, `"person.full_name"`)
1294
+ - `locale` — locale code (default: `"en_US"`)
1295
+ - `**kwargs` — forwarded to the provider method
1296
+
1297
+ ### `forge_strategy(fields, locale)`
1298
+
1299
+ Creates a strategy that yields `dict[str, Any]` with the given field columns.
1300
+
1301
+ ```python
1302
+ @given(row=forge_strategy({"Name": "full_name", "City": "city"}))
1303
+ def test_row_types(row):
1304
+ assert isinstance(row["Name"], str)
1305
+ assert isinstance(row["City"], str)
1306
+ ```
1307
+
1308
+ ---
1309
+
1310
+ ## HTTP Mock Data Server
1311
+
1312
+ Start a zero-dependency HTTP server that returns fake JSON data on every GET request. Useful for frontend prototyping, integration tests, and API mocking.
1313
+
1314
+ ```bash
1315
+ # Start the server (default: port 8080)
1316
+ dataforge --serve first_name email city
1317
+
1318
+ # Custom port and row count
1319
+ dataforge --serve --port 3000 --count 50 first_name email city
1320
+
1321
+ # With a seed for reproducible responses
1322
+ dataforge --serve --seed 42 --port 8080 first_name email city
1323
+
1324
+ # Custom column names
1325
+ dataforge --serve Name=full_name Email=email City=city
1326
+ ```
1327
+
1328
+ ### Endpoints
1329
+
1330
+ | Method | Path | Description |
1331
+ |--------|------|-------------|
1332
+ | `GET` | `/?count=N` | Returns *N* rows as a JSON array (default: `--count` value) |
1333
+
1334
+ ### Response Format
1335
+
1336
+ ```bash
1337
+ $ curl "http://localhost:8080/?count=3"
1338
+ ```
1339
+
1340
+ ```json
1341
+ [
1342
+ {"first_name": "James", "email": "james.smith@gmail.com", "city": "Chicago"},
1343
+ {"first_name": "Maria", "email": "maria.garcia@yahoo.com", "city": "Houston"},
1344
+ {"first_name": "David", "email": "david.jones@outlook.com", "city": "Phoenix"}
1345
+ ]
1346
+ ```
1347
+
1348
+ Response headers include `Content-Type: application/json; charset=utf-8` and `Access-Control-Allow-Origin: *` for CORS support.
1349
+
1350
+ ### Schema File
1351
+
1352
+ Load field definitions from a JSON, YAML, or TOML schema file:
1353
+
1354
+ ```bash
1355
+ dataforge --serve --schema my_schema.yaml --port 8080
1356
+ ```
1357
+
1358
+ ---
1359
+
1360
+ ## XLSX Export
1361
+
1362
+ Export schema data to Excel `.xlsx` files. Requires `pip install openpyxl`.
1363
+
1364
+ ```python
1365
+ from dataforge import DataForge
1366
+
1367
+ forge = DataForge(seed=42)
1368
+
1369
+ # Via Schema
1370
+ schema = forge.schema(["first_name", "last_name", "email", "city"])
1371
+ rows_written = schema.to_excel("users.xlsx", count=1000, sheet_name="Users")
1372
+
1373
+ # Via DataForge convenience method
1374
+ rows_written = forge.to_excel(
1375
+ fields={"Name": "full_name", "Email": "email", "City": "city"},
1376
+ path="contacts.xlsx",
1377
+ count=5000,
1378
+ sheet_name="Contacts",
1379
+ )
1380
+ ```
1381
+
1382
+ ### Parameters
1383
+
1384
+ | Parameter | Type | Default | Description |
1385
+ |-----------|------|---------|-------------|
1386
+ | `path` | `str` | (required) | Output file path |
1387
+ | `count` | `int` | `10` | Number of rows to generate |
1388
+ | `sheet_name` | `str` | `"Sheet1"` | Excel worksheet name |
1389
+
1390
+ The writer uses `openpyxl`'s write-only mode (`Workbook(write_only=True)`) for memory-efficient streaming of large datasets. Returns the number of rows written.
1391
+
1392
+ ---
1393
+
1394
+ ## Statistical Distribution Fitting
1395
+
1396
+ The schema inferrer can detect statistical distributions in numeric columns and report the best-fitting distribution with its parameters. This runs automatically during `infer_schema()` when columns contain at least 20 numeric values.
1397
+
1398
+ ```python
1399
+ from dataforge import DataForge
1400
+ from dataforge.inference import SchemaInferrer
1401
+
1402
+ forge = DataForge(seed=42)
1403
+ inferrer = SchemaInferrer(forge)
1404
+
1405
+ # Infer from data with numeric columns
1406
+ schema = inferrer.from_records([
1407
+ {"value": 2.3, "count": 5},
1408
+ {"value": 1.8, "count": 12},
1409
+ # ... (20+ records for distribution detection)
1410
+ ])
1411
+
1412
+ # Inspect detected distributions
1413
+ for analysis in inferrer.analyses:
1414
+ if analysis.distribution:
1415
+ print(f"{analysis.name}: {analysis.distribution}")
1416
+ # "value: {'name': 'normal', 'params': {'mean': 2.1, 'std': 0.4}}"
1417
+ ```
1418
+
1419
+ ### Supported Distributions
1420
+
1421
+ | Distribution | Condition | Parameters |
1422
+ |-------------|-----------|------------|
1423
+ | Normal | Always tested | `mean`, `std` |
1424
+ | LogNormal | All values > 0 | `mu`, `sigma` (of log-values) |
1425
+ | Exponential | All values > 0, skew > 1.5 | `rate` |
1426
+ | Beta | All values in (0, 1] | `alpha`, `beta` |
1427
+ | Zipf | Integer values >= 1, 5+ distinct ranks | `s` (exponent) |
1428
+
1429
+ ### How It Works
1430
+
1431
+ 1. **Single-pass moment accumulation** — computes mean, variance, skewness, and kurtosis in one pass over the data
1432
+ 2. **Jarque-Bera test** — used for Normal and LogNormal goodness-of-fit testing
1433
+ 3. **Least-squares R^2** — used for Zipf power-law fitting on log-rank vs log-frequency
1434
+ 4. **Best fit selection** — each candidate distribution gets a score; the lowest score wins
1435
+
1436
+ Distribution results are stored in `ColumnAnalysis.distribution` and included in `inferrer.describe()` output.
1437
+
1438
+ ### Convenience Methods
1439
+
1440
+ ```python
1441
+ # Via DataForge
1442
+ schema = forge.infer_schema(records)
1443
+ schema = forge.infer_schema_from_csv("data.csv", max_rows=1000)
1444
+ ```
1445
+
1446
+ ---
1447
+
865
1448
  ## Time-Series Generation
866
1449
 
867
1450
  Generate synthetic time-series data with configurable trends, seasonality, noise, anomalies, regime changes, missing data, and spikes.
@@ -1440,6 +2023,17 @@ The [`examples/`](examples/) directory contains comprehensive real-world usage e
1440
2023
  | [`08_streaming.py`](examples/08_streaming.py) | HTTP/Kafka/RabbitMQ streaming with rate limiting |
1441
2024
  | [`09_tui.py`](examples/09_tui.py) | Interactive TUI launch and keyboard shortcuts |
1442
2025
  | [`10_real_world_scenarios.py`](examples/10_real_world_scenarios.py) | Combined scenarios: e-commerce, healthcare, IoT, API testing |
2026
+ | [`11_faker_compat.py`](examples/11_faker_compat.py) | Faker compatibility layer — migrating from faker to DataForge |
2027
+ | [`12_multi_locale.py`](examples/12_multi_locale.py) | Multi-locale data generation for internationalized test data |
2028
+ | [`13_dynamic_fields.py`](examples/13_dynamic_fields.py) | Dynamic fields with `define()` — custom data pools and generators |
2029
+ | [`14_transform_pipelines.py`](examples/14_transform_pipelines.py) | Transform pipelines with `pipe()` — post-generation data transformation |
2030
+ | [`15_type_driven_schema.py`](examples/15_type_driven_schema.py) | Type-driven schema generation from dataclasses and TypedDicts |
2031
+ | [`16_data_validation.py`](examples/16_data_validation.py) | Data contract validation — ensuring data quality with semantic rules |
2032
+ | [`17_hypothesis_bridge.py`](examples/17_hypothesis_bridge.py) | Hypothesis strategy bridge — property-based testing with DataForge |
2033
+ | [`18_mock_server.py`](examples/18_mock_server.py) | HTTP mock data server — serving fake data over HTTP |
2034
+ | [`19_xlsx_export.py`](examples/19_xlsx_export.py) | XLSX export — generating Excel spreadsheets with DataForge schemas |
2035
+ | [`20_distribution_fitting.py`](examples/20_distribution_fitting.py) | Distribution fitting — inferring statistical distributions from data |
2036
+ | [`21_advanced_scenarios.py`](examples/21_advanced_scenarios.py) | Advanced multi-feature workflows combining DataForge capabilities |
1443
2037
 
1444
2038
  ## Benchmarks
1445
2039