datachain 0.30.6__tar.gz → 0.30.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (423) hide show
  1. {datachain-0.30.6 → datachain-0.30.7}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.30.6 → datachain-0.30.7}/PKG-INFO +1 -1
  3. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/udfs/parallel.py +2 -2
  4. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/sqlite.py +18 -15
  5. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/warehouse.py +7 -1
  6. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/database.py +2 -2
  7. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/datachain.py +28 -28
  8. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/records.py +2 -4
  9. datachain-0.30.7/src/datachain/lib/settings.py +214 -0
  10. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/udf.py +3 -20
  11. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/batch.py +2 -2
  12. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/dataset.py +44 -17
  13. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/dispatch.py +6 -0
  14. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/udf.py +2 -0
  15. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/utils.py +9 -10
  16. {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/PKG-INFO +1 -1
  17. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_datachain.py +5 -5
  18. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_to_database.py +1 -1
  19. datachain-0.30.7/tests/func/test_warehouse.py +87 -0
  20. datachain-0.30.7/tests/unit/lib/test_settings.py +472 -0
  21. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_utils.py +1 -1
  22. datachain-0.30.6/src/datachain/lib/settings.py +0 -111
  23. datachain-0.30.6/tests/func/test_warehouse.py +0 -35
  24. datachain-0.30.6/tests/unit/lib/test_settings.py +0 -61
  25. {datachain-0.30.6 → datachain-0.30.7}/.cruft.json +0 -0
  26. {datachain-0.30.6 → datachain-0.30.7}/.gitattributes +0 -0
  27. {datachain-0.30.6 → datachain-0.30.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  28. {datachain-0.30.6 → datachain-0.30.7}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  29. {datachain-0.30.6 → datachain-0.30.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  30. {datachain-0.30.6 → datachain-0.30.7}/.github/codecov.yaml +0 -0
  31. {datachain-0.30.6 → datachain-0.30.7}/.github/dependabot.yml +0 -0
  32. {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/benchmarks.yml +0 -0
  33. {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/release.yml +0 -0
  34. {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/tests-studio.yml +0 -0
  35. {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/tests.yml +0 -0
  36. {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/update-template.yaml +0 -0
  37. {datachain-0.30.6 → datachain-0.30.7}/.gitignore +0 -0
  38. {datachain-0.30.6 → datachain-0.30.7}/CODE_OF_CONDUCT.rst +0 -0
  39. {datachain-0.30.6 → datachain-0.30.7}/LICENSE +0 -0
  40. {datachain-0.30.6 → datachain-0.30.7}/README.rst +0 -0
  41. {datachain-0.30.6 → datachain-0.30.7}/docs/assets/captioned_cartoons.png +0 -0
  42. {datachain-0.30.6 → datachain-0.30.7}/docs/assets/datachain-white.svg +0 -0
  43. {datachain-0.30.6 → datachain-0.30.7}/docs/assets/datachain.svg +0 -0
  44. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/login.md +0 -0
  45. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/logout.md +0 -0
  46. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/team.md +0 -0
  47. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/token.md +0 -0
  48. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/index.md +0 -0
  49. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/cancel.md +0 -0
  50. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/clusters.md +0 -0
  51. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/logs.md +0 -0
  52. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/ls.md +0 -0
  53. {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/run.md +0 -0
  54. {datachain-0.30.6 → datachain-0.30.7}/docs/contributing.md +0 -0
  55. {datachain-0.30.6 → datachain-0.30.7}/docs/css/github-permalink-style.css +0 -0
  56. {datachain-0.30.6 → datachain-0.30.7}/docs/examples.md +0 -0
  57. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/db_migrations.md +0 -0
  58. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/delta.md +0 -0
  59. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/env.md +0 -0
  60. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/index.md +0 -0
  61. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/namespaces.md +0 -0
  62. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/processing.md +0 -0
  63. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/remotes.md +0 -0
  64. {datachain-0.30.6 → datachain-0.30.7}/docs/guide/retry.md +0 -0
  65. {datachain-0.30.6 → datachain-0.30.7}/docs/index.md +0 -0
  66. {datachain-0.30.6 → datachain-0.30.7}/docs/overrides/main.html +0 -0
  67. {datachain-0.30.6 → datachain-0.30.7}/docs/quick-start.md +0 -0
  68. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/arrowrow.md +0 -0
  69. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/bbox.md +0 -0
  70. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/file.md +0 -0
  71. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/imagefile.md +0 -0
  72. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/index.md +0 -0
  73. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/pose.md +0 -0
  74. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/segment.md +0 -0
  75. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/tarvfile.md +0 -0
  76. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/textfile.md +0 -0
  77. {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/videofile.md +0 -0
  78. {datachain-0.30.6 → datachain-0.30.7}/docs/references/datachain.md +0 -0
  79. {datachain-0.30.6 → datachain-0.30.7}/docs/references/func.md +0 -0
  80. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/aggregate.md +0 -0
  81. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/array.md +0 -0
  82. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/conditional.md +0 -0
  83. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/numeric.md +0 -0
  84. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/path.md +0 -0
  85. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/random.md +0 -0
  86. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/string.md +0 -0
  87. {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/window.md +0 -0
  88. {datachain-0.30.6 → datachain-0.30.7}/docs/references/index.md +0 -0
  89. {datachain-0.30.6 → datachain-0.30.7}/docs/references/toolkit.md +0 -0
  90. {datachain-0.30.6 → datachain-0.30.7}/docs/references/torch.md +0 -0
  91. {datachain-0.30.6 → datachain-0.30.7}/docs/references/udf.md +0 -0
  92. {datachain-0.30.6 → datachain-0.30.7}/docs/tutorials.md +0 -0
  93. {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  94. {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  95. {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/openimage-detect.py +0 -0
  96. {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/ultralytics-bbox.py +0 -0
  97. {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/ultralytics-pose.py +0 -0
  98. {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/ultralytics-segment.py +0 -0
  99. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/common_sql_functions.py +0 -0
  100. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/json-csv-reader.py +0 -0
  101. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/nested_datamodel.py +0 -0
  102. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/torch-loader.py +0 -0
  103. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/udfs/simple.py +0 -0
  104. {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/udfs/stateful.py +0 -0
  105. {datachain-0.30.6 → datachain-0.30.7}/examples/incremental_processing/delta.py +0 -0
  106. {datachain-0.30.6 → datachain-0.30.7}/examples/incremental_processing/retry.py +0 -0
  107. {datachain-0.30.6 → datachain-0.30.7}/examples/incremental_processing/utils.py +0 -0
  108. {datachain-0.30.6 → datachain-0.30.7}/examples/llm_and_nlp/claude-query.py +0 -0
  109. {datachain-0.30.6 → datachain-0.30.7}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  110. {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/audio-to-text.py +0 -0
  111. {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/clip_inference.py +0 -0
  112. {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/hf_pipeline.py +0 -0
  113. {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/openai_image_desc_lib.py +0 -0
  114. {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/wds.py +0 -0
  115. {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/wds_filtered.py +0 -0
  116. {datachain-0.30.6 → datachain-0.30.7}/mkdocs.yml +0 -0
  117. {datachain-0.30.6 → datachain-0.30.7}/noxfile.py +0 -0
  118. {datachain-0.30.6 → datachain-0.30.7}/pyproject.toml +0 -0
  119. {datachain-0.30.6 → datachain-0.30.7}/setup.cfg +0 -0
  120. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/__init__.py +0 -0
  121. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/__main__.py +0 -0
  122. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/asyn.py +0 -0
  123. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cache.py +0 -0
  124. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/__init__.py +0 -0
  125. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/catalog.py +0 -0
  126. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/datasource.py +0 -0
  127. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/loader.py +0 -0
  128. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/__init__.py +0 -0
  129. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/__init__.py +0 -0
  130. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/datasets.py +0 -0
  131. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/du.py +0 -0
  132. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/index.py +0 -0
  133. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/ls.py +0 -0
  134. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/misc.py +0 -0
  135. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/query.py +0 -0
  136. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/show.py +0 -0
  137. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/__init__.py +0 -0
  138. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/job.py +0 -0
  139. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/studio.py +0 -0
  140. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/utils.py +0 -0
  141. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/utils.py +0 -0
  142. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/__init__.py +0 -0
  143. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/azure.py +0 -0
  144. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/fileslice.py +0 -0
  145. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/fsspec.py +0 -0
  146. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/gcs.py +0 -0
  147. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/hf.py +0 -0
  148. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/local.py +0 -0
  149. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/s3.py +0 -0
  150. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/config.py +0 -0
  151. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/__init__.py +0 -0
  152. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/db_engine.py +0 -0
  153. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/job.py +0 -0
  154. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/metastore.py +0 -0
  155. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/schema.py +0 -0
  156. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/serializer.py +0 -0
  157. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/dataset.py +0 -0
  158. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/delta.py +0 -0
  159. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/diff/__init__.py +0 -0
  160. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/error.py +0 -0
  161. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/fs/__init__.py +0 -0
  162. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/fs/reference.py +0 -0
  163. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/fs/utils.py +0 -0
  164. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/__init__.py +0 -0
  165. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/aggregate.py +0 -0
  166. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/array.py +0 -0
  167. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/base.py +0 -0
  168. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/conditional.py +0 -0
  169. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/func.py +0 -0
  170. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/numeric.py +0 -0
  171. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/path.py +0 -0
  172. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/random.py +0 -0
  173. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/string.py +0 -0
  174. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/window.py +0 -0
  175. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/job.py +0 -0
  176. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/__init__.py +0 -0
  177. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/arrow.py +0 -0
  178. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/audio.py +0 -0
  179. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/clip.py +0 -0
  180. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/__init__.py +0 -0
  181. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/flatten.py +0 -0
  182. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/python_to_sql.py +0 -0
  183. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/sql_to_python.py +0 -0
  184. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/unflatten.py +0 -0
  185. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  186. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/data_model.py +0 -0
  187. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dataset_info.py +0 -0
  188. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/__init__.py +0 -0
  189. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/csv.py +0 -0
  190. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/datasets.py +0 -0
  191. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/hf.py +0 -0
  192. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/json.py +0 -0
  193. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/listings.py +0 -0
  194. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/pandas.py +0 -0
  195. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/parquet.py +0 -0
  196. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/storage.py +0 -0
  197. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/utils.py +0 -0
  198. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/values.py +0 -0
  199. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/file.py +0 -0
  200. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/hf.py +0 -0
  201. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/image.py +0 -0
  202. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/listing.py +0 -0
  203. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/listing_info.py +0 -0
  204. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/meta_formats.py +0 -0
  205. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/model_store.py +0 -0
  206. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/namespaces.py +0 -0
  207. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/projects.py +0 -0
  208. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/pytorch.py +0 -0
  209. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/signal_schema.py +0 -0
  210. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/tar.py +0 -0
  211. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/text.py +0 -0
  212. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/udf_signature.py +0 -0
  213. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/utils.py +0 -0
  214. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/video.py +0 -0
  215. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/webdataset.py +0 -0
  216. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/webdataset_laion.py +0 -0
  217. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/listing.py +0 -0
  218. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/__init__.py +0 -0
  219. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/bbox.py +0 -0
  220. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/pose.py +0 -0
  221. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/segment.py +0 -0
  222. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/__init__.py +0 -0
  223. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/bbox.py +0 -0
  224. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/pose.py +0 -0
  225. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/segment.py +0 -0
  226. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/utils.py +0 -0
  227. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/namespace.py +0 -0
  228. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/node.py +0 -0
  229. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/nodes_fetcher.py +0 -0
  230. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/nodes_thread_pool.py +0 -0
  231. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/progress.py +0 -0
  232. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/project.py +0 -0
  233. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/py.typed +0 -0
  234. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/__init__.py +0 -0
  235. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/metrics.py +0 -0
  236. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/params.py +0 -0
  237. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/queue.py +0 -0
  238. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/schema.py +0 -0
  239. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/session.py +0 -0
  240. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/utils.py +0 -0
  241. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/remote/__init__.py +0 -0
  242. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/remote/studio.py +0 -0
  243. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/script_meta.py +0 -0
  244. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/semver.py +0 -0
  245. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/__init__.py +0 -0
  246. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/default/__init__.py +0 -0
  247. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/default/base.py +0 -0
  248. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/__init__.py +0 -0
  249. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/aggregate.py +0 -0
  250. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/array.py +0 -0
  251. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/conditional.py +0 -0
  252. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/numeric.py +0 -0
  253. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/path.py +0 -0
  254. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/random.py +0 -0
  255. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/string.py +0 -0
  256. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/postgresql_dialect.py +0 -0
  257. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/postgresql_types.py +0 -0
  258. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/selectable.py +0 -0
  259. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/__init__.py +0 -0
  260. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/base.py +0 -0
  261. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/types.py +0 -0
  262. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/vector.py +0 -0
  263. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/types.py +0 -0
  264. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/utils.py +0 -0
  265. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/studio.py +0 -0
  266. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/telemetry.py +0 -0
  267. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/toolkit/__init__.py +0 -0
  268. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/toolkit/split.py +0 -0
  269. {datachain-0.30.6 → datachain-0.30.7}/src/datachain/torch/__init__.py +0 -0
  270. {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/SOURCES.txt +0 -0
  271. {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/dependency_links.txt +0 -0
  272. {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/entry_points.txt +0 -0
  273. {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/requires.txt +0 -0
  274. {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/top_level.txt +0 -0
  275. {datachain-0.30.6 → datachain-0.30.7}/tests/__init__.py +0 -0
  276. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/__init__.py +0 -0
  277. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/conftest.py +0 -0
  278. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  279. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/.dvc/config +0 -0
  280. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/.gitignore +0 -0
  281. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  282. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/test_datachain.py +0 -0
  283. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/test_ls.py +0 -0
  284. {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/test_version.py +0 -0
  285. {datachain-0.30.6 → datachain-0.30.7}/tests/conftest.py +0 -0
  286. {datachain-0.30.6 → datachain-0.30.7}/tests/data.py +0 -0
  287. {datachain-0.30.6 → datachain-0.30.7}/tests/examples/__init__.py +0 -0
  288. {datachain-0.30.6 → datachain-0.30.7}/tests/examples/test_examples.py +0 -0
  289. {datachain-0.30.6 → datachain-0.30.7}/tests/examples/test_wds_e2e.py +0 -0
  290. {datachain-0.30.6 → datachain-0.30.7}/tests/examples/wds_data.py +0 -0
  291. {datachain-0.30.6 → datachain-0.30.7}/tests/func/__init__.py +0 -0
  292. {datachain-0.30.6 → datachain-0.30.7}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  293. {datachain-0.30.6 → datachain-0.30.7}/tests/func/data/lena.jpg +0 -0
  294. {datachain-0.30.6 → datachain-0.30.7}/tests/func/fake-service-account-credentials.json +0 -0
  295. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/__init__.py +0 -0
  296. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_aggregate.py +0 -0
  297. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_array.py +0 -0
  298. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_conditional.py +0 -0
  299. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_numeric.py +0 -0
  300. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_path.py +0 -0
  301. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_random.py +0 -0
  302. {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_string.py +0 -0
  303. {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/__init__.py +0 -0
  304. {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/running-mask0.png +0 -0
  305. {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/running-mask1.png +0 -0
  306. {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/running.jpg +0 -0
  307. {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/ships.jpg +0 -0
  308. {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/test_yolo.py +0 -0
  309. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_audio.py +0 -0
  310. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_batching.py +0 -0
  311. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_catalog.py +0 -0
  312. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_client.py +0 -0
  313. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_cloud_transfer.py +0 -0
  314. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_data_storage.py +0 -0
  315. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_datachain_merge.py +0 -0
  316. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_dataset_query.py +0 -0
  317. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_datasets.py +0 -0
  318. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_delta.py +0 -0
  319. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_feature_pickling.py +0 -0
  320. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_file.py +0 -0
  321. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_hf.py +0 -0
  322. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_hidden_field.py +0 -0
  323. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_image.py +0 -0
  324. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_listing.py +0 -0
  325. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_ls.py +0 -0
  326. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_meta_formats.py +0 -0
  327. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_metastore.py +0 -0
  328. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_metrics.py +0 -0
  329. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_mutate.py +0 -0
  330. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_pull.py +0 -0
  331. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_pytorch.py +0 -0
  332. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_query.py +0 -0
  333. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_read_database.py +0 -0
  334. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_read_dataset_remote.py +0 -0
  335. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  336. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_retry.py +0 -0
  337. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_session.py +0 -0
  338. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_studio_datetime_parsing.py +0 -0
  339. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_toolkit.py +0 -0
  340. {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_video.py +0 -0
  341. {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class.py +0 -0
  342. {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class_exception.py +0 -0
  343. {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class_parallel.py +0 -0
  344. {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  345. {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/name_len_slow.py +0 -0
  346. {datachain-0.30.6 → datachain-0.30.7}/tests/test_atomicity.py +0 -0
  347. {datachain-0.30.6 → datachain-0.30.7}/tests/test_cli_e2e.py +0 -0
  348. {datachain-0.30.6 → datachain-0.30.7}/tests/test_cli_studio.py +0 -0
  349. {datachain-0.30.6 → datachain-0.30.7}/tests/test_import_time.py +0 -0
  350. {datachain-0.30.6 → datachain-0.30.7}/tests/test_query_e2e.py +0 -0
  351. {datachain-0.30.6 → datachain-0.30.7}/tests/test_telemetry.py +0 -0
  352. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/__init__.py +0 -0
  353. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/__init__.py +0 -0
  354. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/conftest.py +0 -0
  355. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_arrow.py +0 -0
  356. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_audio.py +0 -0
  357. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_clip.py +0 -0
  358. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_datachain.py +0 -0
  359. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  360. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_datachain_merge.py +0 -0
  361. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_diff.py +0 -0
  362. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_feature.py +0 -0
  363. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_feature_utils.py +0 -0
  364. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_file.py +0 -0
  365. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_hf.py +0 -0
  366. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_image.py +0 -0
  367. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_listing_info.py +0 -0
  368. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_namespace.py +0 -0
  369. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_partition_by.py +0 -0
  370. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_project.py +0 -0
  371. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_python_to_sql.py +0 -0
  372. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_schema.py +0 -0
  373. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_signal_schema.py +0 -0
  374. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_sql_to_python.py +0 -0
  375. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_text.py +0 -0
  376. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_udf.py +0 -0
  377. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_udf_signature.py +0 -0
  378. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_utils.py +0 -0
  379. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_webdataset.py +0 -0
  380. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/__init__.py +0 -0
  381. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_bbox.py +0 -0
  382. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_pose.py +0 -0
  383. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_segment.py +0 -0
  384. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_utils.py +0 -0
  385. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/__init__.py +0 -0
  386. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/sqlite/__init__.py +0 -0
  387. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/sqlite/test_types.py +0 -0
  388. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/sqlite/test_utils.py +0 -0
  389. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_array.py +0 -0
  390. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_conditional.py +0 -0
  391. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_path.py +0 -0
  392. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_random.py +0 -0
  393. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_selectable.py +0 -0
  394. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_string.py +0 -0
  395. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_asyn.py +0 -0
  396. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_cache.py +0 -0
  397. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_catalog.py +0 -0
  398. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_catalog_loader.py +0 -0
  399. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_cli_datasets.py +0 -0
  400. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_cli_parsing.py +0 -0
  401. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_client.py +0 -0
  402. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_client_gcs.py +0 -0
  403. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_client_s3.py +0 -0
  404. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_config.py +0 -0
  405. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_data_storage.py +0 -0
  406. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_database_engine.py +0 -0
  407. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_dataset.py +0 -0
  408. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_dispatch.py +0 -0
  409. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_fileslice.py +0 -0
  410. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_func.py +0 -0
  411. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_listing.py +0 -0
  412. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_metastore.py +0 -0
  413. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_module_exports.py +0 -0
  414. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_pytorch.py +0 -0
  415. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_query.py +0 -0
  416. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_query_metrics.py +0 -0
  417. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_query_params.py +0 -0
  418. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_script_meta.py +0 -0
  419. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_semver.py +0 -0
  420. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_serializer.py +0 -0
  421. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_session.py +0 -0
  422. {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_warehouse.py +0 -0
  423. {datachain-0.30.6 → datachain-0.30.7}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.10'
27
+ rev: 'v0.12.11'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.6
3
+ Version: 0.30.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -1,7 +1,7 @@
1
1
  """
2
2
  This is a simple UDF to demonstrate local parallel processing with multiprocessing.
3
3
 
4
- In add_signals specify either parallel=-1 to use processes equal to the number
4
+ In add_signals specify either parallel=True to use processes equal to the number
5
5
  of CPUs/cores on your current machine, or parallel=N for N processes.
6
6
  The default if parallel is not specified is to run single-threaded.
7
7
 
@@ -33,7 +33,7 @@ def path_len_benchmark(path: str) -> int:
33
33
  (
34
34
  dc.read_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
35
35
  # Try to disable to see the difference in performance
36
- .settings(parallel=-1)
36
+ .settings(parallel=True)
37
37
  .map(path_len=path_len_benchmark, params=["file.path"])
38
38
  .show()
39
39
  )
@@ -37,6 +37,7 @@ from datachain import semver
37
37
  from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
38
38
  from datachain.data_storage.db_engine import DatabaseEngine
39
39
  from datachain.data_storage.schema import DefaultSchema
40
+ from datachain.data_storage.warehouse import INSERT_BATCH_SIZE
40
41
  from datachain.dataset import DatasetRecord, StorageURI
41
42
  from datachain.error import DataChainError, OutdatedDatabaseSchemaError
42
43
  from datachain.namespace import Namespace
@@ -44,7 +45,7 @@ from datachain.project import Project
44
45
  from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
45
46
  from datachain.sql.sqlite.base import load_usearch_extension
46
47
  from datachain.sql.types import SQLType
47
- from datachain.utils import DataChainDir, batched_it
48
+ from datachain.utils import DataChainDir, batched, batched_it
48
49
 
49
50
  if TYPE_CHECKING:
50
51
  from sqlalchemy.dialects.sqlite import Insert
@@ -712,19 +713,21 @@ class SQLiteWarehouse(AbstractWarehouse):
712
713
  def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
713
714
  return (e.model_dump() for e in entries)
714
715
 
715
- def insert_rows(self, table: Table, rows: Iterable[dict[str, Any]]) -> None:
716
- rows = list(rows)
717
- if not rows:
718
- return
719
-
720
- with self.db.transaction() as conn:
721
- # transactions speeds up inserts significantly as there is no separate
722
- # transaction created for each insert row
723
- self.db.executemany(
724
- table.insert().values({f: bindparam(f) for f in rows[0]}),
725
- rows,
726
- conn=conn,
727
- )
716
+ def insert_rows(
717
+ self,
718
+ table: Table,
719
+ rows: Iterable[dict[str, Any]],
720
+ batch_size: int = INSERT_BATCH_SIZE,
721
+ ) -> None:
722
+ for row_chunk in batched(rows, batch_size):
723
+ with self.db.transaction() as conn:
724
+ # transactions speeds up inserts significantly as there is no separate
725
+ # transaction created for each insert row
726
+ self.db.executemany(
727
+ table.insert().values({f: bindparam(f) for f in row_chunk[0]}),
728
+ row_chunk,
729
+ conn=conn,
730
+ )
728
731
 
729
732
  def insert_dataset_rows(self, df, dataset: DatasetRecord, version: str) -> int:
730
733
  dr = self.dataset_rows(dataset, version)
@@ -797,7 +800,7 @@ class SQLiteWarehouse(AbstractWarehouse):
797
800
  .limit(None)
798
801
  )
799
802
 
800
- for batch in batched_it(ids, 10_000):
803
+ for batch in batched_it(ids, INSERT_BATCH_SIZE):
801
804
  batch_ids = [row[0] for row in batch]
802
805
  select_q._where_criteria = (col_id.in_(batch_ids),)
803
806
  q = table.insert().from_select(list(select_q.selected_columns), select_q)
@@ -43,6 +43,7 @@ if TYPE_CHECKING:
43
43
  logger = logging.getLogger("datachain")
44
44
 
45
45
  SELECT_BATCH_SIZE = 100_000 # number of rows to fetch at a time
46
+ INSERT_BATCH_SIZE = 10_000 # number of rows to insert at a time
46
47
 
47
48
 
48
49
  class AbstractWarehouse(ABC, Serializable):
@@ -415,7 +416,12 @@ class AbstractWarehouse(ABC, Serializable):
415
416
  """Convert File entries so they can be passed on to `insert_rows()`"""
416
417
 
417
418
  @abstractmethod
418
- def insert_rows(self, table: sa.Table, rows: Iterable[dict[str, Any]]) -> None:
419
+ def insert_rows(
420
+ self,
421
+ table: sa.Table,
422
+ rows: Iterable[dict[str, Any]],
423
+ batch_size: int = INSERT_BATCH_SIZE,
424
+ ) -> None:
419
425
  """Does batch inserts of any kind of rows into table"""
420
426
 
421
427
  def insert_rows_done(self, table: sa.Table) -> None:
@@ -73,7 +73,7 @@ def to_database(
73
73
  table_name: str,
74
74
  connection: "ConnectionType",
75
75
  *,
76
- batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
76
+ batch_size: int = DEFAULT_DATABASE_BATCH_SIZE,
77
77
  on_conflict: Optional[str] = None,
78
78
  conflict_columns: Optional[list[str]] = None,
79
79
  column_mapping: Optional[dict[str, Optional[str]]] = None,
@@ -124,7 +124,7 @@ def to_database(
124
124
  table.create(conn, checkfirst=True)
125
125
 
126
126
  rows_iter = chain._leaf_values()
127
- for batch in batched(rows_iter, batch_rows):
127
+ for batch in batched(rows_iter, batch_size):
128
128
  rows_affected = _process_batch(
129
129
  conn,
130
130
  table,
@@ -342,15 +342,15 @@ class DataChain:
342
342
 
343
343
  def settings(
344
344
  self,
345
- cache=None,
346
- parallel=None,
347
- workers=None,
348
- min_task_size=None,
349
- prefetch: Optional[int] = None,
350
- sys: Optional[bool] = None,
345
+ cache: Optional[bool] = None,
346
+ prefetch: Optional[Union[bool, int]] = None,
347
+ parallel: Optional[Union[bool, int]] = None,
348
+ workers: Optional[int] = None,
351
349
  namespace: Optional[str] = None,
352
350
  project: Optional[str] = None,
353
- batch_rows: Optional[int] = None,
351
+ min_task_size: Optional[int] = None,
352
+ batch_size: Optional[int] = None,
353
+ sys: Optional[bool] = None,
354
354
  ) -> "Self":
355
355
  """Change settings for chain.
356
356
 
@@ -359,23 +359,23 @@ class DataChain:
359
359
 
360
360
  Parameters:
361
361
  cache : data caching. (default=False)
362
+ prefetch : number of workers to use for downloading files in advance.
363
+ This is enabled by default and uses 2 workers.
364
+ To disable prefetching, set it to 0 or False.
362
365
  parallel : number of thread for processors. True is a special value to
363
366
  enable all available CPUs. (default=1)
364
367
  workers : number of distributed workers. Only for Studio mode. (default=1)
365
- min_task_size : minimum number of tasks. (default=1)
366
- prefetch : number of workers to use for downloading files in advance.
367
- This is enabled by default and uses 2 workers.
368
- To disable prefetching, set it to 0.
369
368
  namespace : namespace name.
370
369
  project : project name.
371
- batch_rows : row limit per insert to balance speed and memory usage.
370
+ min_task_size : minimum number of tasks. (default=1)
371
+ batch_size : row limit per insert to balance speed and memory usage.
372
372
  (default=2000)
373
373
 
374
374
  Example:
375
375
  ```py
376
376
  chain = (
377
377
  chain
378
- .settings(cache=True, parallel=8, batch_rows=300)
378
+ .settings(cache=True, parallel=8, batch_size=300)
379
379
  .map(laion=process_webdataset(spec=WDSLaion), params="file")
380
380
  )
381
381
  ```
@@ -385,14 +385,14 @@ class DataChain:
385
385
  settings = copy.copy(self._settings)
386
386
  settings.add(
387
387
  Settings(
388
- cache,
389
- parallel,
390
- workers,
391
- min_task_size,
392
- prefetch,
393
- namespace,
394
- project,
395
- batch_rows,
388
+ cache=cache,
389
+ prefetch=prefetch,
390
+ parallel=parallel,
391
+ workers=workers,
392
+ namespace=namespace,
393
+ project=project,
394
+ min_task_size=min_task_size,
395
+ batch_size=batch_size,
396
396
  )
397
397
  )
398
398
  return self._evolve(settings=settings, _sys=sys)
@@ -745,7 +745,7 @@ class DataChain:
745
745
 
746
746
  return self._evolve(
747
747
  query=self._query.add_signals(
748
- udf_obj.to_udf_wrapper(self._settings.batch_rows),
748
+ udf_obj.to_udf_wrapper(self._settings.batch_size),
749
749
  **self._settings.to_dict(),
750
750
  ),
751
751
  signal_schema=self.signals_schema | udf_obj.output,
@@ -783,7 +783,7 @@ class DataChain:
783
783
  udf_obj.prefetch = prefetch
784
784
  return self._evolve(
785
785
  query=self._query.generate(
786
- udf_obj.to_udf_wrapper(self._settings.batch_rows),
786
+ udf_obj.to_udf_wrapper(self._settings.batch_size),
787
787
  **self._settings.to_dict(),
788
788
  ),
789
789
  signal_schema=udf_obj.output,
@@ -919,7 +919,7 @@ class DataChain:
919
919
  udf_obj = self._udf_to_obj(Aggregator, func, params, output, signal_map)
920
920
  return self._evolve(
921
921
  query=self._query.generate(
922
- udf_obj.to_udf_wrapper(self._settings.batch_rows),
922
+ udf_obj.to_udf_wrapper(self._settings.batch_size),
923
923
  partition_by=processed_partition_by,
924
924
  **self._settings.to_dict(),
925
925
  ),
@@ -968,7 +968,7 @@ class DataChain:
968
968
 
969
969
  return self._evolve(
970
970
  query=self._query.add_signals(
971
- udf_obj.to_udf_wrapper(self._settings.batch_rows, batch=batch),
971
+ udf_obj.to_udf_wrapper(self._settings.batch_size, batch=batch),
972
972
  **self._settings.to_dict(),
973
973
  ),
974
974
  signal_schema=self.signals_schema | udf_obj.output,
@@ -2314,7 +2314,7 @@ class DataChain:
2314
2314
  table_name: str,
2315
2315
  connection: "ConnectionType",
2316
2316
  *,
2317
- batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
2317
+ batch_size: int = DEFAULT_DATABASE_BATCH_SIZE,
2318
2318
  on_conflict: Optional[str] = None,
2319
2319
  conflict_columns: Optional[list[str]] = None,
2320
2320
  column_mapping: Optional[dict[str, Optional[str]]] = None,
@@ -2336,7 +2336,7 @@ class DataChain:
2336
2336
  library. If a DBAPI2 object, only sqlite3 is supported. The user is
2337
2337
  responsible for engine disposal and connection closure for the
2338
2338
  SQLAlchemy connectable; str connections are closed automatically.
2339
- batch_rows: Number of rows to insert per batch for optimal performance.
2339
+ batch_size: Number of rows to insert per batch for optimal performance.
2340
2340
  Larger batches are faster but use more memory. Default: 10,000.
2341
2341
  on_conflict: Strategy for handling duplicate rows (requires table
2342
2342
  constraints):
@@ -2417,7 +2417,7 @@ class DataChain:
2417
2417
  self,
2418
2418
  table_name,
2419
2419
  connection,
2420
- batch_rows=batch_rows,
2420
+ batch_size=batch_size,
2421
2421
  on_conflict=on_conflict,
2422
2422
  conflict_columns=conflict_columns,
2423
2423
  column_mapping=column_mapping,
@@ -31,7 +31,7 @@ def read_records(
31
31
 
32
32
  Parameters:
33
33
  to_insert : records (or a single record) to insert. Each record is
34
- a dictionary of signals and theirs values.
34
+ a dictionary of signals and their values.
35
35
  schema : describes chain signals and their corresponding types
36
36
 
37
37
  Example:
@@ -45,7 +45,6 @@ def read_records(
45
45
  """
46
46
  from datachain.query.dataset import adjust_outputs, get_col_types
47
47
  from datachain.sql.types import SQLType
48
- from datachain.utils import batched
49
48
 
50
49
  from .datasets import read_dataset
51
50
 
@@ -96,7 +95,6 @@ def read_records(
96
95
  {c.name: c.type for c in columns if isinstance(c.type, SQLType)},
97
96
  )
98
97
  records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
99
- for chunk in batched(records, READ_RECORDS_BATCH_SIZE):
100
- warehouse.insert_rows(table, chunk)
98
+ warehouse.insert_rows(table, records, batch_size=READ_RECORDS_BATCH_SIZE)
101
99
  warehouse.insert_rows_done(table)
102
100
  return read_dataset(name=dsr.full_name, session=session, settings=settings)
@@ -0,0 +1,214 @@
1
+ from typing import Any, Optional, Union
2
+
3
+ from datachain.lib.utils import DataChainParamsError
4
+
5
+ DEFAULT_CACHE = False
6
+ DEFAULT_PREFETCH = 2
7
+ DEFAULT_BATCH_SIZE = 2_000
8
+
9
+
10
+ class SettingsError(DataChainParamsError):
11
+ def __init__(self, msg: str) -> None:
12
+ super().__init__(f"Dataset settings error: {msg}")
13
+
14
+
15
+ class Settings:
16
+ """Settings for datachain."""
17
+
18
+ _cache: Optional[bool]
19
+ _prefetch: Optional[int]
20
+ _parallel: Optional[Union[bool, int]]
21
+ _workers: Optional[int]
22
+ _namespace: Optional[str]
23
+ _project: Optional[str]
24
+ _min_task_size: Optional[int]
25
+ _batch_size: Optional[int]
26
+
27
+ def __init__( # noqa: C901, PLR0912
28
+ self,
29
+ cache: Optional[bool] = None,
30
+ prefetch: Optional[Union[bool, int]] = None,
31
+ parallel: Optional[Union[bool, int]] = None,
32
+ workers: Optional[int] = None,
33
+ namespace: Optional[str] = None,
34
+ project: Optional[str] = None,
35
+ min_task_size: Optional[int] = None,
36
+ batch_size: Optional[int] = None,
37
+ ) -> None:
38
+ if cache is None:
39
+ self._cache = None
40
+ else:
41
+ if not isinstance(cache, bool):
42
+ raise SettingsError(
43
+ "'cache' argument must be bool"
44
+ f" while {cache.__class__.__name__} was given"
45
+ )
46
+ self._cache = cache
47
+
48
+ if prefetch is None or prefetch is True:
49
+ self._prefetch = None
50
+ elif prefetch is False:
51
+ self._prefetch = 0 # disable prefetch (False == 0)
52
+ else:
53
+ if not isinstance(prefetch, int):
54
+ raise SettingsError(
55
+ "'prefetch' argument must be int or bool"
56
+ f" while {prefetch.__class__.__name__} was given"
57
+ )
58
+ if prefetch < 0:
59
+ raise SettingsError(
60
+ "'prefetch' argument must be non-negative integer"
61
+ f", {prefetch} was given"
62
+ )
63
+ self._prefetch = prefetch
64
+
65
+ if parallel is None or parallel is False:
66
+ self._parallel = None
67
+ elif parallel is True:
68
+ self._parallel = True
69
+ else:
70
+ if not isinstance(parallel, int):
71
+ raise SettingsError(
72
+ "'parallel' argument must be int or bool"
73
+ f" while {parallel.__class__.__name__} was given"
74
+ )
75
+ if parallel <= 0:
76
+ raise SettingsError(
77
+ "'parallel' argument must be positive integer"
78
+ f", {parallel} was given"
79
+ )
80
+ self._parallel = parallel
81
+
82
+ if workers is None:
83
+ self._workers = None
84
+ else:
85
+ if not isinstance(workers, int) or isinstance(workers, bool):
86
+ raise SettingsError(
87
+ "'workers' argument must be int"
88
+ f" while {workers.__class__.__name__} was given"
89
+ )
90
+ if workers <= 0:
91
+ raise SettingsError(
92
+ f"'workers' argument must be positive integer, {workers} was given"
93
+ )
94
+ self._workers = workers
95
+
96
+ if namespace is None:
97
+ self._namespace = None
98
+ else:
99
+ if not isinstance(namespace, str):
100
+ raise SettingsError(
101
+ "'namespace' argument must be str"
102
+ f", {namespace.__class__.__name__} was given"
103
+ )
104
+ self._namespace = namespace
105
+
106
+ if project is None:
107
+ self._project = None
108
+ else:
109
+ if not isinstance(project, str):
110
+ raise SettingsError(
111
+ "'project' argument must be str"
112
+ f", {project.__class__.__name__} was given"
113
+ )
114
+ self._project = project
115
+
116
+ if min_task_size is None:
117
+ self._min_task_size = None
118
+ else:
119
+ if not isinstance(min_task_size, int) or isinstance(min_task_size, bool):
120
+ raise SettingsError(
121
+ "'min_task_size' argument must be int"
122
+ f", {min_task_size.__class__.__name__} was given"
123
+ )
124
+ if min_task_size <= 0:
125
+ raise SettingsError(
126
+ "'min_task_size' argument must be positive integer"
127
+ f", {min_task_size} was given"
128
+ )
129
+ self._min_task_size = min_task_size
130
+
131
+ if batch_size is None:
132
+ self._batch_size = None
133
+ else:
134
+ if not isinstance(batch_size, int) or isinstance(batch_size, bool):
135
+ raise SettingsError(
136
+ "'batch_size' argument must be int"
137
+ f", {batch_size.__class__.__name__} was given"
138
+ )
139
+ if batch_size <= 0:
140
+ raise SettingsError(
141
+ "'batch_size' argument must be positive integer"
142
+ f", {batch_size} was given"
143
+ )
144
+ self._batch_size = batch_size
145
+
146
+ @property
147
+ def cache(self) -> bool:
148
+ return self._cache if self._cache is not None else DEFAULT_CACHE
149
+
150
+ @property
151
+ def prefetch(self) -> Optional[int]:
152
+ return self._prefetch if self._prefetch is not None else DEFAULT_PREFETCH
153
+
154
+ @property
155
+ def parallel(self) -> Optional[Union[bool, int]]:
156
+ return self._parallel if self._parallel is not None else None
157
+
158
+ @property
159
+ def workers(self) -> Optional[int]:
160
+ return self._workers if self._workers is not None else None
161
+
162
+ @property
163
+ def namespace(self) -> Optional[str]:
164
+ return self._namespace if self._namespace is not None else None
165
+
166
+ @property
167
+ def project(self) -> Optional[str]:
168
+ return self._project if self._project is not None else None
169
+
170
+ @property
171
+ def min_task_size(self) -> Optional[int]:
172
+ return self._min_task_size if self._min_task_size is not None else None
173
+
174
+ @property
175
+ def batch_size(self) -> int:
176
+ return self._batch_size if self._batch_size is not None else DEFAULT_BATCH_SIZE
177
+
178
+ def to_dict(self) -> dict[str, Any]:
179
+ res: dict[str, Any] = {}
180
+ if self._cache is not None:
181
+ res["cache"] = self.cache
182
+ if self._prefetch is not None:
183
+ res["prefetch"] = self.prefetch
184
+ if self._parallel is not None:
185
+ res["parallel"] = self.parallel
186
+ if self._workers is not None:
187
+ res["workers"] = self.workers
188
+ if self._min_task_size is not None:
189
+ res["min_task_size"] = self.min_task_size
190
+ if self._namespace is not None:
191
+ res["namespace"] = self.namespace
192
+ if self._project is not None:
193
+ res["project"] = self.project
194
+ if self._batch_size is not None:
195
+ res["batch_size"] = self.batch_size
196
+ return res
197
+
198
+ def add(self, settings: "Settings") -> None:
199
+ if settings._cache is not None:
200
+ self._cache = settings._cache
201
+ if settings._prefetch is not None:
202
+ self._prefetch = settings._prefetch
203
+ if settings._parallel is not None:
204
+ self._parallel = settings._parallel
205
+ if settings._workers is not None:
206
+ self._workers = settings._workers
207
+ if settings._namespace is not None:
208
+ self._namespace = settings._namespace
209
+ if settings._project is not None:
210
+ self._project = settings._project
211
+ if settings._min_task_size is not None:
212
+ self._min_task_size = settings._min_task_size
213
+ if settings._batch_size is not None:
214
+ self._batch_size = settings._batch_size
@@ -54,23 +54,11 @@ UDFOutputSpec = Mapping[str, ColumnType]
54
54
  UDFResult = dict[str, Any]
55
55
 
56
56
 
57
- @attrs.define
58
- class UDFProperties:
59
- udf: "UDFAdapter"
60
-
61
- def get_batching(self, use_partitioning: bool = False) -> BatchingStrategy:
62
- return self.udf.get_batching(use_partitioning)
63
-
64
- @property
65
- def batch_rows(self):
66
- return self.udf.batch_rows
67
-
68
-
69
57
  @attrs.define(slots=False)
70
58
  class UDFAdapter:
71
59
  inner: "UDFBase"
72
60
  output: UDFOutputSpec
73
- batch_rows: Optional[int] = None
61
+ batch_size: Optional[int] = None
74
62
  batch: int = 1
75
63
 
76
64
  def get_batching(self, use_partitioning: bool = False) -> BatchingStrategy:
@@ -83,11 +71,6 @@ class UDFAdapter:
83
71
  return Batch(self.batch)
84
72
  raise ValueError(f"invalid batch size {self.batch}")
85
73
 
86
- @property
87
- def properties(self):
88
- # For backwards compatibility.
89
- return UDFProperties(self)
90
-
91
74
  def run(
92
75
  self,
93
76
  udf_fields: "Sequence[str]",
@@ -237,13 +220,13 @@ class UDFBase(AbstractUDF):
237
220
 
238
221
  def to_udf_wrapper(
239
222
  self,
240
- batch_rows: Optional[int] = None,
223
+ batch_size: Optional[int] = None,
241
224
  batch: int = 1,
242
225
  ) -> UDFAdapter:
243
226
  return UDFAdapter(
244
227
  self,
245
228
  self.output.to_udf_spec(),
246
- batch_rows,
229
+ batch_size,
247
230
  batch,
248
231
  )
249
232
 
@@ -81,8 +81,8 @@ class Batch(BatchingStrategy):
81
81
  # select rows in batches
82
82
  results = []
83
83
 
84
- with contextlib.closing(execute(query, page_size=page_size)) as batch_rows:
85
- for row in batch_rows:
84
+ with contextlib.closing(execute(query, page_size=page_size)) as rows:
85
+ for row in rows:
86
86
  results.append(row)
87
87
  if len(results) >= self.count:
88
88
  batch, results = results[: self.count], results[self.count :]