datachain 0.3.5__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (242) hide show
  1. {datachain-0.3.5/src/datachain.egg-info → datachain-0.3.6}/PKG-INFO +1 -1
  2. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/catalog/catalog.py +15 -3
  3. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/sqlite.py +1 -0
  4. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/dc.py +23 -0
  5. {datachain-0.3.5 → datachain-0.3.6/src/datachain.egg-info}/PKG-INFO +1 -1
  6. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_datachain.py +15 -1
  7. {datachain-0.3.5 → datachain-0.3.6}/.cruft.json +0 -0
  8. {datachain-0.3.5 → datachain-0.3.6}/.gitattributes +0 -0
  9. {datachain-0.3.5 → datachain-0.3.6}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  10. {datachain-0.3.5 → datachain-0.3.6}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  11. {datachain-0.3.5 → datachain-0.3.6}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  12. {datachain-0.3.5 → datachain-0.3.6}/.github/codecov.yaml +0 -0
  13. {datachain-0.3.5 → datachain-0.3.6}/.github/dependabot.yml +0 -0
  14. {datachain-0.3.5 → datachain-0.3.6}/.github/workflows/benchmarks.yml +0 -0
  15. {datachain-0.3.5 → datachain-0.3.6}/.github/workflows/release.yml +0 -0
  16. {datachain-0.3.5 → datachain-0.3.6}/.github/workflows/tests-studio.yml +0 -0
  17. {datachain-0.3.5 → datachain-0.3.6}/.github/workflows/tests.yml +0 -0
  18. {datachain-0.3.5 → datachain-0.3.6}/.github/workflows/update-template.yaml +0 -0
  19. {datachain-0.3.5 → datachain-0.3.6}/.gitignore +0 -0
  20. {datachain-0.3.5 → datachain-0.3.6}/.pre-commit-config.yaml +0 -0
  21. {datachain-0.3.5 → datachain-0.3.6}/CODE_OF_CONDUCT.rst +0 -0
  22. {datachain-0.3.5 → datachain-0.3.6}/CONTRIBUTING.rst +0 -0
  23. {datachain-0.3.5 → datachain-0.3.6}/LICENSE +0 -0
  24. {datachain-0.3.5 → datachain-0.3.6}/README.rst +0 -0
  25. {datachain-0.3.5 → datachain-0.3.6}/docs/assets/captioned_cartoons.png +0 -0
  26. {datachain-0.3.5 → datachain-0.3.6}/docs/assets/datachain.png +0 -0
  27. {datachain-0.3.5 → datachain-0.3.6}/docs/assets/flowchart.png +0 -0
  28. {datachain-0.3.5 → datachain-0.3.6}/docs/index.md +0 -0
  29. {datachain-0.3.5 → datachain-0.3.6}/docs/references/datachain.md +0 -0
  30. {datachain-0.3.5 → datachain-0.3.6}/docs/references/datatype.md +0 -0
  31. {datachain-0.3.5 → datachain-0.3.6}/docs/references/file.md +0 -0
  32. {datachain-0.3.5 → datachain-0.3.6}/docs/references/index.md +0 -0
  33. {datachain-0.3.5 → datachain-0.3.6}/docs/references/sql.md +0 -0
  34. {datachain-0.3.5 → datachain-0.3.6}/docs/references/torch.md +0 -0
  35. {datachain-0.3.5 → datachain-0.3.6}/docs/references/udf.md +0 -0
  36. {datachain-0.3.5 → datachain-0.3.6}/examples/computer_vision/blip2_image_desc_lib.py +0 -0
  37. {datachain-0.3.5 → datachain-0.3.6}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  38. {datachain-0.3.5 → datachain-0.3.6}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  39. {datachain-0.3.5 → datachain-0.3.6}/examples/computer_vision/openimage-detect.py +0 -0
  40. {datachain-0.3.5 → datachain-0.3.6}/examples/get_started/common_sql_functions.py +0 -0
  41. {datachain-0.3.5 → datachain-0.3.6}/examples/get_started/json-csv-reader.py +0 -0
  42. {datachain-0.3.5 → datachain-0.3.6}/examples/get_started/torch-loader.py +0 -0
  43. {datachain-0.3.5 → datachain-0.3.6}/examples/get_started/udfs/parallel.py +0 -0
  44. {datachain-0.3.5 → datachain-0.3.6}/examples/get_started/udfs/simple.py +0 -0
  45. {datachain-0.3.5 → datachain-0.3.6}/examples/get_started/udfs/stateful.py +0 -0
  46. {datachain-0.3.5 → datachain-0.3.6}/examples/llm_and_nlp/llm-claude-aggregate-query.py +0 -0
  47. {datachain-0.3.5 → datachain-0.3.6}/examples/llm_and_nlp/llm-claude-simple-query.py +0 -0
  48. {datachain-0.3.5 → datachain-0.3.6}/examples/llm_and_nlp/llm-claude.py +0 -0
  49. {datachain-0.3.5 → datachain-0.3.6}/examples/llm_and_nlp/unstructured-text.py +0 -0
  50. {datachain-0.3.5 → datachain-0.3.6}/examples/multimodal/clip_inference.py +0 -0
  51. {datachain-0.3.5 → datachain-0.3.6}/examples/multimodal/hf_pipeline.py +0 -0
  52. {datachain-0.3.5 → datachain-0.3.6}/examples/multimodal/openai_image_desc_lib.py +0 -0
  53. {datachain-0.3.5 → datachain-0.3.6}/examples/multimodal/wds.py +0 -0
  54. {datachain-0.3.5 → datachain-0.3.6}/examples/multimodal/wds_filtered.py +0 -0
  55. {datachain-0.3.5 → datachain-0.3.6}/mkdocs.yml +0 -0
  56. {datachain-0.3.5 → datachain-0.3.6}/noxfile.py +0 -0
  57. {datachain-0.3.5 → datachain-0.3.6}/pyproject.toml +0 -0
  58. {datachain-0.3.5 → datachain-0.3.6}/setup.cfg +0 -0
  59. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/__init__.py +0 -0
  60. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/__main__.py +0 -0
  61. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/asyn.py +0 -0
  62. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/cache.py +0 -0
  63. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/catalog/__init__.py +0 -0
  64. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/catalog/datasource.py +0 -0
  65. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/catalog/loader.py +0 -0
  66. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/catalog/subclass.py +0 -0
  67. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/cli.py +0 -0
  68. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/cli_utils.py +0 -0
  69. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/__init__.py +0 -0
  70. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/azure.py +0 -0
  71. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/fileslice.py +0 -0
  72. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/fsspec.py +0 -0
  73. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/gcs.py +0 -0
  74. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/local.py +0 -0
  75. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/client/s3.py +0 -0
  76. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/config.py +0 -0
  77. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/__init__.py +0 -0
  78. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/db_engine.py +0 -0
  79. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/id_generator.py +0 -0
  80. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/job.py +0 -0
  81. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/metastore.py +0 -0
  82. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/schema.py +0 -0
  83. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/serializer.py +0 -0
  84. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/data_storage/warehouse.py +0 -0
  85. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/dataset.py +0 -0
  86. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/error.py +0 -0
  87. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/job.py +0 -0
  88. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/__init__.py +0 -0
  89. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/arrow.py +0 -0
  90. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/clip.py +0 -0
  91. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/convert/__init__.py +0 -0
  92. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/convert/flatten.py +0 -0
  93. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/convert/python_to_sql.py +0 -0
  94. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/convert/sql_to_python.py +0 -0
  95. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/convert/unflatten.py +0 -0
  96. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  97. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/data_model.py +0 -0
  98. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/dataset_info.py +0 -0
  99. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/file.py +0 -0
  100. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/image.py +0 -0
  101. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/listing.py +0 -0
  102. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/meta_formats.py +0 -0
  103. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/model_store.py +0 -0
  104. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/pytorch.py +0 -0
  105. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/settings.py +0 -0
  106. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/signal_schema.py +0 -0
  107. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/text.py +0 -0
  108. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/udf.py +0 -0
  109. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/udf_signature.py +0 -0
  110. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/utils.py +0 -0
  111. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/vfile.py +0 -0
  112. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/webdataset.py +0 -0
  113. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/lib/webdataset_laion.py +0 -0
  114. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/listing.py +0 -0
  115. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/node.py +0 -0
  116. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/nodes_fetcher.py +0 -0
  117. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/nodes_thread_pool.py +0 -0
  118. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/progress.py +0 -0
  119. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/py.typed +0 -0
  120. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/__init__.py +0 -0
  121. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/batch.py +0 -0
  122. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/builtins.py +0 -0
  123. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/dataset.py +0 -0
  124. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/dispatch.py +0 -0
  125. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/metrics.py +0 -0
  126. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/params.py +0 -0
  127. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/queue.py +0 -0
  128. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/schema.py +0 -0
  129. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/session.py +0 -0
  130. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/query/udf.py +0 -0
  131. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/remote/__init__.py +0 -0
  132. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/remote/studio.py +0 -0
  133. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/__init__.py +0 -0
  134. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/default/__init__.py +0 -0
  135. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/default/base.py +0 -0
  136. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/functions/__init__.py +0 -0
  137. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/functions/array.py +0 -0
  138. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/functions/conditional.py +0 -0
  139. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/functions/path.py +0 -0
  140. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/functions/random.py +0 -0
  141. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/functions/string.py +0 -0
  142. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/selectable.py +0 -0
  143. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/sqlite/__init__.py +0 -0
  144. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/sqlite/base.py +0 -0
  145. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/sqlite/types.py +0 -0
  146. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/sqlite/vector.py +0 -0
  147. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/types.py +0 -0
  148. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/sql/utils.py +0 -0
  149. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/storage.py +0 -0
  150. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/torch/__init__.py +0 -0
  151. {datachain-0.3.5 → datachain-0.3.6}/src/datachain/utils.py +0 -0
  152. {datachain-0.3.5 → datachain-0.3.6}/src/datachain.egg-info/SOURCES.txt +0 -0
  153. {datachain-0.3.5 → datachain-0.3.6}/src/datachain.egg-info/dependency_links.txt +0 -0
  154. {datachain-0.3.5 → datachain-0.3.6}/src/datachain.egg-info/entry_points.txt +0 -0
  155. {datachain-0.3.5 → datachain-0.3.6}/src/datachain.egg-info/requires.txt +0 -0
  156. {datachain-0.3.5 → datachain-0.3.6}/src/datachain.egg-info/top_level.txt +0 -0
  157. {datachain-0.3.5 → datachain-0.3.6}/tests/__init__.py +0 -0
  158. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/__init__.py +0 -0
  159. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/conftest.py +0 -0
  160. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  161. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/datasets/.dvc/config +0 -0
  162. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/datasets/.gitignore +0 -0
  163. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  164. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/test_datachain.py +0 -0
  165. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/test_ls.py +0 -0
  166. {datachain-0.3.5 → datachain-0.3.6}/tests/benchmarks/test_version.py +0 -0
  167. {datachain-0.3.5 → datachain-0.3.6}/tests/conftest.py +0 -0
  168. {datachain-0.3.5 → datachain-0.3.6}/tests/data.py +0 -0
  169. {datachain-0.3.5 → datachain-0.3.6}/tests/examples/__init__.py +0 -0
  170. {datachain-0.3.5 → datachain-0.3.6}/tests/examples/test_examples.py +0 -0
  171. {datachain-0.3.5 → datachain-0.3.6}/tests/examples/test_wds_e2e.py +0 -0
  172. {datachain-0.3.5 → datachain-0.3.6}/tests/examples/wds_data.py +0 -0
  173. {datachain-0.3.5 → datachain-0.3.6}/tests/func/__init__.py +0 -0
  174. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_catalog.py +0 -0
  175. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_client.py +0 -0
  176. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_dataset_query.py +0 -0
  177. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_datasets.py +0 -0
  178. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_feature_pickling.py +0 -0
  179. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_listing.py +0 -0
  180. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_ls.py +0 -0
  181. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_pull.py +0 -0
  182. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_pytorch.py +0 -0
  183. {datachain-0.3.5 → datachain-0.3.6}/tests/func/test_query.py +0 -0
  184. {datachain-0.3.5 → datachain-0.3.6}/tests/scripts/feature_class.py +0 -0
  185. {datachain-0.3.5 → datachain-0.3.6}/tests/scripts/feature_class_parallel.py +0 -0
  186. {datachain-0.3.5 → datachain-0.3.6}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  187. {datachain-0.3.5 → datachain-0.3.6}/tests/scripts/name_len_slow.py +0 -0
  188. {datachain-0.3.5 → datachain-0.3.6}/tests/test_cli_e2e.py +0 -0
  189. {datachain-0.3.5 → datachain-0.3.6}/tests/test_query_e2e.py +0 -0
  190. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/__init__.py +0 -0
  191. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/__init__.py +0 -0
  192. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/conftest.py +0 -0
  193. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_arrow.py +0 -0
  194. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_clip.py +0 -0
  195. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_datachain.py +0 -0
  196. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  197. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_datachain_merge.py +0 -0
  198. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_feature.py +0 -0
  199. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_feature_utils.py +0 -0
  200. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_file.py +0 -0
  201. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_image.py +0 -0
  202. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_schema.py +0 -0
  203. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_signal_schema.py +0 -0
  204. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_sql_to_python.py +0 -0
  205. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_text.py +0 -0
  206. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_udf_signature.py +0 -0
  207. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_utils.py +0 -0
  208. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/lib/test_webdataset.py +0 -0
  209. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/__init__.py +0 -0
  210. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/sqlite/__init__.py +0 -0
  211. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/sqlite/test_utils.py +0 -0
  212. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/test_array.py +0 -0
  213. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/test_conditional.py +0 -0
  214. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/test_path.py +0 -0
  215. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/test_random.py +0 -0
  216. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/test_selectable.py +0 -0
  217. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/sql/test_string.py +0 -0
  218. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_asyn.py +0 -0
  219. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_cache.py +0 -0
  220. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_catalog.py +0 -0
  221. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_catalog_loader.py +0 -0
  222. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_cli_parsing.py +0 -0
  223. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_client.py +0 -0
  224. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_client_s3.py +0 -0
  225. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_data_storage.py +0 -0
  226. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_database_engine.py +0 -0
  227. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_dataset.py +0 -0
  228. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_dispatch.py +0 -0
  229. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_fileslice.py +0 -0
  230. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_id_generator.py +0 -0
  231. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_listing.py +0 -0
  232. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_metastore.py +0 -0
  233. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_module_exports.py +0 -0
  234. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_query_metrics.py +0 -0
  235. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_query_params.py +0 -0
  236. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_serializer.py +0 -0
  237. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_session.py +0 -0
  238. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_storage.py +0 -0
  239. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_udf.py +0 -0
  240. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_utils.py +0 -0
  241. {datachain-0.3.5 → datachain-0.3.6}/tests/unit/test_warehouse.py +0 -0
  242. {datachain-0.3.5 → datachain-0.3.6}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -120,13 +120,25 @@ def noop(_: str):
120
120
 
121
121
  @contextmanager
122
122
  def print_and_capture(
123
- stream: "IO[str]", callback: Callable[[str], None] = noop
123
+ stream: "IO[bytes]|IO[str]", callback: Callable[[str], None] = noop
124
124
  ) -> "Iterator[list[str]]":
125
125
  lines: list[str] = []
126
126
  append = lines.append
127
127
 
128
128
  def loop() -> None:
129
- for line in iter(stream.readline, ""):
129
+ buffer = b""
130
+ while byt := stream.read(1): # Read one byte at a time
131
+ buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
132
+
133
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
134
+ line = buffer.decode("utf-8")
135
+ print(line, end="")
136
+ callback(line)
137
+ append(line)
138
+ buffer = b"" # Clear buffer for next line
139
+
140
+ if buffer: # Handle any remaining data in the buffer
141
+ line = buffer.decode("utf-8")
130
142
  print(line, end="")
131
143
  callback(line)
132
144
  append(line)
@@ -2128,7 +2140,7 @@ class Catalog:
2128
2140
  stdout=subprocess.PIPE if capture_output else None,
2129
2141
  stderr=subprocess.STDOUT if capture_output else None,
2130
2142
  bufsize=1,
2131
- text=True,
2143
+ text=False,
2132
2144
  **kwargs,
2133
2145
  ) as proc:
2134
2146
  os.close(w)
@@ -209,6 +209,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
209
209
  return cursor.executemany(self.compile(query).string, params)
210
210
  return self.db.executemany(self.compile(query).string, params)
211
211
 
212
+ @retry_sqlite_locks
212
213
  def execute_str(self, sql: str, parameters=None) -> sqlite3.Cursor:
213
214
  if parameters is None:
214
215
  return self.db.execute(sql)
@@ -839,6 +839,10 @@ class DataChain(DatasetQuery):
839
839
  def mutate(self, **kwargs) -> "Self":
840
840
  """Create new signals based on existing signals.
841
841
 
842
+ This method cannot modify existing columns. If you need to modify an
843
+ existing column, use a different name for the new column and then use
844
+ `select()` to choose which columns to keep.
845
+
842
846
  This method is vectorized and more efficient compared to map(), and it does not
843
847
  extract or download any data from the internal database. However, it can only
844
848
  utilize predefined built-in functions and their combinations.
@@ -859,7 +863,26 @@ class DataChain(DatasetQuery):
859
863
  dist=cosine_distance(embedding_text, embedding_image)
860
864
  )
861
865
  ```
866
+
867
+ This method can be also used to rename signals. If the Column("name") provided
868
+ as value for the new signal - the old column will be dropped. Otherwise a new
869
+ column is created.
870
+
871
+ Example:
872
+ ```py
873
+ dc.mutate(
874
+ newkey=Column("oldkey")
875
+ )
876
+ ```
862
877
  """
878
+ existing_columns = set(self.signals_schema.values.keys())
879
+ for col_name in kwargs:
880
+ if col_name in existing_columns:
881
+ raise DataChainColumnError(
882
+ col_name,
883
+ "Cannot modify existing column with mutate(). "
884
+ "Use a different name for the new column.",
885
+ )
863
886
  for col_name, expr in kwargs.items():
864
887
  if not isinstance(expr, Column) and isinstance(expr.type, NullType):
865
888
  raise DataChainColumnError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -8,10 +8,11 @@ import pandas as pd
8
8
  import pytest
9
9
  import pytz
10
10
  from PIL import Image
11
+ from sqlalchemy import Column
11
12
 
12
13
  from datachain.data_storage.sqlite import SQLiteWarehouse
13
14
  from datachain.dataset import DatasetStats
14
- from datachain.lib.dc import DataChain
15
+ from datachain.lib.dc import DataChain, DataChainColumnError
15
16
  from datachain.lib.file import File, ImageFile
16
17
  from tests.utils import images_equal
17
18
 
@@ -314,3 +315,16 @@ def test_from_storage_check_rows(tmp_dir, test_session):
314
315
  location=None,
315
316
  vtype="",
316
317
  )
318
+
319
+
320
+ def test_mutate_existing_column(catalog):
321
+ ds = DataChain.from_values(ids=[1, 2, 3])
322
+
323
+ with pytest.raises(DataChainColumnError) as excinfo:
324
+ ds.mutate(ids=Column("ids") + 1)
325
+
326
+ assert (
327
+ str(excinfo.value)
328
+ == "Error for column ids: Cannot modify existing column with mutate()."
329
+ " Use a different name for the new column."
330
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes