datachain 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (242) hide show
  1. {datachain-0.3.5/src/datachain.egg-info → datachain-0.3.7}/PKG-INFO +1 -1
  2. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/catalog/catalog.py +15 -3
  3. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/schema.py +1 -1
  4. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/sqlite.py +2 -10
  5. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/warehouse.py +12 -5
  6. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/dc.py +23 -0
  7. {datachain-0.3.5 → datachain-0.3.7/src/datachain.egg-info}/PKG-INFO +1 -1
  8. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_datachain.py +15 -1
  9. {datachain-0.3.5 → datachain-0.3.7}/.cruft.json +0 -0
  10. {datachain-0.3.5 → datachain-0.3.7}/.gitattributes +0 -0
  11. {datachain-0.3.5 → datachain-0.3.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  12. {datachain-0.3.5 → datachain-0.3.7}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  13. {datachain-0.3.5 → datachain-0.3.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  14. {datachain-0.3.5 → datachain-0.3.7}/.github/codecov.yaml +0 -0
  15. {datachain-0.3.5 → datachain-0.3.7}/.github/dependabot.yml +0 -0
  16. {datachain-0.3.5 → datachain-0.3.7}/.github/workflows/benchmarks.yml +0 -0
  17. {datachain-0.3.5 → datachain-0.3.7}/.github/workflows/release.yml +0 -0
  18. {datachain-0.3.5 → datachain-0.3.7}/.github/workflows/tests-studio.yml +0 -0
  19. {datachain-0.3.5 → datachain-0.3.7}/.github/workflows/tests.yml +0 -0
  20. {datachain-0.3.5 → datachain-0.3.7}/.github/workflows/update-template.yaml +0 -0
  21. {datachain-0.3.5 → datachain-0.3.7}/.gitignore +0 -0
  22. {datachain-0.3.5 → datachain-0.3.7}/.pre-commit-config.yaml +0 -0
  23. {datachain-0.3.5 → datachain-0.3.7}/CODE_OF_CONDUCT.rst +0 -0
  24. {datachain-0.3.5 → datachain-0.3.7}/CONTRIBUTING.rst +0 -0
  25. {datachain-0.3.5 → datachain-0.3.7}/LICENSE +0 -0
  26. {datachain-0.3.5 → datachain-0.3.7}/README.rst +0 -0
  27. {datachain-0.3.5 → datachain-0.3.7}/docs/assets/captioned_cartoons.png +0 -0
  28. {datachain-0.3.5 → datachain-0.3.7}/docs/assets/datachain.png +0 -0
  29. {datachain-0.3.5 → datachain-0.3.7}/docs/assets/flowchart.png +0 -0
  30. {datachain-0.3.5 → datachain-0.3.7}/docs/index.md +0 -0
  31. {datachain-0.3.5 → datachain-0.3.7}/docs/references/datachain.md +0 -0
  32. {datachain-0.3.5 → datachain-0.3.7}/docs/references/datatype.md +0 -0
  33. {datachain-0.3.5 → datachain-0.3.7}/docs/references/file.md +0 -0
  34. {datachain-0.3.5 → datachain-0.3.7}/docs/references/index.md +0 -0
  35. {datachain-0.3.5 → datachain-0.3.7}/docs/references/sql.md +0 -0
  36. {datachain-0.3.5 → datachain-0.3.7}/docs/references/torch.md +0 -0
  37. {datachain-0.3.5 → datachain-0.3.7}/docs/references/udf.md +0 -0
  38. {datachain-0.3.5 → datachain-0.3.7}/examples/computer_vision/blip2_image_desc_lib.py +0 -0
  39. {datachain-0.3.5 → datachain-0.3.7}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  40. {datachain-0.3.5 → datachain-0.3.7}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  41. {datachain-0.3.5 → datachain-0.3.7}/examples/computer_vision/openimage-detect.py +0 -0
  42. {datachain-0.3.5 → datachain-0.3.7}/examples/get_started/common_sql_functions.py +0 -0
  43. {datachain-0.3.5 → datachain-0.3.7}/examples/get_started/json-csv-reader.py +0 -0
  44. {datachain-0.3.5 → datachain-0.3.7}/examples/get_started/torch-loader.py +0 -0
  45. {datachain-0.3.5 → datachain-0.3.7}/examples/get_started/udfs/parallel.py +0 -0
  46. {datachain-0.3.5 → datachain-0.3.7}/examples/get_started/udfs/simple.py +0 -0
  47. {datachain-0.3.5 → datachain-0.3.7}/examples/get_started/udfs/stateful.py +0 -0
  48. {datachain-0.3.5 → datachain-0.3.7}/examples/llm_and_nlp/llm-claude-aggregate-query.py +0 -0
  49. {datachain-0.3.5 → datachain-0.3.7}/examples/llm_and_nlp/llm-claude-simple-query.py +0 -0
  50. {datachain-0.3.5 → datachain-0.3.7}/examples/llm_and_nlp/llm-claude.py +0 -0
  51. {datachain-0.3.5 → datachain-0.3.7}/examples/llm_and_nlp/unstructured-text.py +0 -0
  52. {datachain-0.3.5 → datachain-0.3.7}/examples/multimodal/clip_inference.py +0 -0
  53. {datachain-0.3.5 → datachain-0.3.7}/examples/multimodal/hf_pipeline.py +0 -0
  54. {datachain-0.3.5 → datachain-0.3.7}/examples/multimodal/openai_image_desc_lib.py +0 -0
  55. {datachain-0.3.5 → datachain-0.3.7}/examples/multimodal/wds.py +0 -0
  56. {datachain-0.3.5 → datachain-0.3.7}/examples/multimodal/wds_filtered.py +0 -0
  57. {datachain-0.3.5 → datachain-0.3.7}/mkdocs.yml +0 -0
  58. {datachain-0.3.5 → datachain-0.3.7}/noxfile.py +0 -0
  59. {datachain-0.3.5 → datachain-0.3.7}/pyproject.toml +0 -0
  60. {datachain-0.3.5 → datachain-0.3.7}/setup.cfg +0 -0
  61. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/__init__.py +0 -0
  62. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/__main__.py +0 -0
  63. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/asyn.py +0 -0
  64. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/cache.py +0 -0
  65. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/catalog/__init__.py +0 -0
  66. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/catalog/datasource.py +0 -0
  67. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/catalog/loader.py +0 -0
  68. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/catalog/subclass.py +0 -0
  69. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/cli.py +0 -0
  70. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/cli_utils.py +0 -0
  71. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/__init__.py +0 -0
  72. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/azure.py +0 -0
  73. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/fileslice.py +0 -0
  74. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/fsspec.py +0 -0
  75. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/gcs.py +0 -0
  76. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/local.py +0 -0
  77. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/client/s3.py +0 -0
  78. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/config.py +0 -0
  79. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/__init__.py +0 -0
  80. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/db_engine.py +0 -0
  81. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/id_generator.py +0 -0
  82. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/job.py +0 -0
  83. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/metastore.py +0 -0
  84. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/data_storage/serializer.py +0 -0
  85. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/dataset.py +0 -0
  86. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/error.py +0 -0
  87. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/job.py +0 -0
  88. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/__init__.py +0 -0
  89. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/arrow.py +0 -0
  90. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/clip.py +0 -0
  91. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/convert/__init__.py +0 -0
  92. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/convert/flatten.py +0 -0
  93. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/convert/python_to_sql.py +0 -0
  94. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/convert/sql_to_python.py +0 -0
  95. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/convert/unflatten.py +0 -0
  96. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  97. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/data_model.py +0 -0
  98. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/dataset_info.py +0 -0
  99. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/file.py +0 -0
  100. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/image.py +0 -0
  101. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/listing.py +0 -0
  102. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/meta_formats.py +0 -0
  103. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/model_store.py +0 -0
  104. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/pytorch.py +0 -0
  105. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/settings.py +0 -0
  106. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/signal_schema.py +0 -0
  107. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/text.py +0 -0
  108. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/udf.py +0 -0
  109. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/udf_signature.py +0 -0
  110. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/utils.py +0 -0
  111. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/vfile.py +0 -0
  112. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/webdataset.py +0 -0
  113. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/lib/webdataset_laion.py +0 -0
  114. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/listing.py +0 -0
  115. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/node.py +0 -0
  116. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/nodes_fetcher.py +0 -0
  117. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/nodes_thread_pool.py +0 -0
  118. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/progress.py +0 -0
  119. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/py.typed +0 -0
  120. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/__init__.py +0 -0
  121. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/batch.py +0 -0
  122. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/builtins.py +0 -0
  123. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/dataset.py +0 -0
  124. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/dispatch.py +0 -0
  125. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/metrics.py +0 -0
  126. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/params.py +0 -0
  127. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/queue.py +0 -0
  128. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/schema.py +0 -0
  129. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/session.py +0 -0
  130. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/query/udf.py +0 -0
  131. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/remote/__init__.py +0 -0
  132. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/remote/studio.py +0 -0
  133. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/__init__.py +0 -0
  134. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/default/__init__.py +0 -0
  135. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/default/base.py +0 -0
  136. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/functions/__init__.py +0 -0
  137. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/functions/array.py +0 -0
  138. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/functions/conditional.py +0 -0
  139. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/functions/path.py +0 -0
  140. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/functions/random.py +0 -0
  141. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/functions/string.py +0 -0
  142. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/selectable.py +0 -0
  143. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/sqlite/__init__.py +0 -0
  144. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/sqlite/base.py +0 -0
  145. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/sqlite/types.py +0 -0
  146. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/sqlite/vector.py +0 -0
  147. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/types.py +0 -0
  148. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/sql/utils.py +0 -0
  149. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/storage.py +0 -0
  150. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/torch/__init__.py +0 -0
  151. {datachain-0.3.5 → datachain-0.3.7}/src/datachain/utils.py +0 -0
  152. {datachain-0.3.5 → datachain-0.3.7}/src/datachain.egg-info/SOURCES.txt +0 -0
  153. {datachain-0.3.5 → datachain-0.3.7}/src/datachain.egg-info/dependency_links.txt +0 -0
  154. {datachain-0.3.5 → datachain-0.3.7}/src/datachain.egg-info/entry_points.txt +0 -0
  155. {datachain-0.3.5 → datachain-0.3.7}/src/datachain.egg-info/requires.txt +0 -0
  156. {datachain-0.3.5 → datachain-0.3.7}/src/datachain.egg-info/top_level.txt +0 -0
  157. {datachain-0.3.5 → datachain-0.3.7}/tests/__init__.py +0 -0
  158. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/__init__.py +0 -0
  159. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/conftest.py +0 -0
  160. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  161. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/datasets/.dvc/config +0 -0
  162. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/datasets/.gitignore +0 -0
  163. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  164. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/test_datachain.py +0 -0
  165. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/test_ls.py +0 -0
  166. {datachain-0.3.5 → datachain-0.3.7}/tests/benchmarks/test_version.py +0 -0
  167. {datachain-0.3.5 → datachain-0.3.7}/tests/conftest.py +0 -0
  168. {datachain-0.3.5 → datachain-0.3.7}/tests/data.py +0 -0
  169. {datachain-0.3.5 → datachain-0.3.7}/tests/examples/__init__.py +0 -0
  170. {datachain-0.3.5 → datachain-0.3.7}/tests/examples/test_examples.py +0 -0
  171. {datachain-0.3.5 → datachain-0.3.7}/tests/examples/test_wds_e2e.py +0 -0
  172. {datachain-0.3.5 → datachain-0.3.7}/tests/examples/wds_data.py +0 -0
  173. {datachain-0.3.5 → datachain-0.3.7}/tests/func/__init__.py +0 -0
  174. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_catalog.py +0 -0
  175. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_client.py +0 -0
  176. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_dataset_query.py +0 -0
  177. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_datasets.py +0 -0
  178. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_feature_pickling.py +0 -0
  179. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_listing.py +0 -0
  180. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_ls.py +0 -0
  181. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_pull.py +0 -0
  182. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_pytorch.py +0 -0
  183. {datachain-0.3.5 → datachain-0.3.7}/tests/func/test_query.py +0 -0
  184. {datachain-0.3.5 → datachain-0.3.7}/tests/scripts/feature_class.py +0 -0
  185. {datachain-0.3.5 → datachain-0.3.7}/tests/scripts/feature_class_parallel.py +0 -0
  186. {datachain-0.3.5 → datachain-0.3.7}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  187. {datachain-0.3.5 → datachain-0.3.7}/tests/scripts/name_len_slow.py +0 -0
  188. {datachain-0.3.5 → datachain-0.3.7}/tests/test_cli_e2e.py +0 -0
  189. {datachain-0.3.5 → datachain-0.3.7}/tests/test_query_e2e.py +0 -0
  190. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/__init__.py +0 -0
  191. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/__init__.py +0 -0
  192. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/conftest.py +0 -0
  193. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_arrow.py +0 -0
  194. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_clip.py +0 -0
  195. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_datachain.py +0 -0
  196. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  197. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_datachain_merge.py +0 -0
  198. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_feature.py +0 -0
  199. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_feature_utils.py +0 -0
  200. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_file.py +0 -0
  201. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_image.py +0 -0
  202. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_schema.py +0 -0
  203. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_signal_schema.py +0 -0
  204. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_sql_to_python.py +0 -0
  205. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_text.py +0 -0
  206. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_udf_signature.py +0 -0
  207. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_utils.py +0 -0
  208. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/lib/test_webdataset.py +0 -0
  209. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/__init__.py +0 -0
  210. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/sqlite/__init__.py +0 -0
  211. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/sqlite/test_utils.py +0 -0
  212. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/test_array.py +0 -0
  213. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/test_conditional.py +0 -0
  214. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/test_path.py +0 -0
  215. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/test_random.py +0 -0
  216. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/test_selectable.py +0 -0
  217. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/sql/test_string.py +0 -0
  218. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_asyn.py +0 -0
  219. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_cache.py +0 -0
  220. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_catalog.py +0 -0
  221. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_catalog_loader.py +0 -0
  222. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_cli_parsing.py +0 -0
  223. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_client.py +0 -0
  224. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_client_s3.py +0 -0
  225. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_data_storage.py +0 -0
  226. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_database_engine.py +0 -0
  227. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_dataset.py +0 -0
  228. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_dispatch.py +0 -0
  229. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_fileslice.py +0 -0
  230. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_id_generator.py +0 -0
  231. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_listing.py +0 -0
  232. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_metastore.py +0 -0
  233. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_module_exports.py +0 -0
  234. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_query_metrics.py +0 -0
  235. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_query_params.py +0 -0
  236. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_serializer.py +0 -0
  237. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_session.py +0 -0
  238. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_storage.py +0 -0
  239. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_udf.py +0 -0
  240. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_utils.py +0 -0
  241. {datachain-0.3.5 → datachain-0.3.7}/tests/unit/test_warehouse.py +0 -0
  242. {datachain-0.3.5 → datachain-0.3.7}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -120,13 +120,25 @@ def noop(_: str):
120
120
 
121
121
  @contextmanager
122
122
  def print_and_capture(
123
- stream: "IO[str]", callback: Callable[[str], None] = noop
123
+ stream: "IO[bytes]|IO[str]", callback: Callable[[str], None] = noop
124
124
  ) -> "Iterator[list[str]]":
125
125
  lines: list[str] = []
126
126
  append = lines.append
127
127
 
128
128
  def loop() -> None:
129
- for line in iter(stream.readline, ""):
129
+ buffer = b""
130
+ while byt := stream.read(1): # Read one byte at a time
131
+ buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
132
+
133
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
134
+ line = buffer.decode("utf-8")
135
+ print(line, end="")
136
+ callback(line)
137
+ append(line)
138
+ buffer = b"" # Clear buffer for next line
139
+
140
+ if buffer: # Handle any remaining data in the buffer
141
+ line = buffer.decode("utf-8")
130
142
  print(line, end="")
131
143
  callback(line)
132
144
  append(line)
@@ -2128,7 +2140,7 @@ class Catalog:
2128
2140
  stdout=subprocess.PIPE if capture_output else None,
2129
2141
  stderr=subprocess.STDOUT if capture_output else None,
2130
2142
  bufsize=1,
2131
- text=True,
2143
+ text=False,
2132
2144
  **kwargs,
2133
2145
  ) as proc:
2134
2146
  os.close(w)
@@ -50,7 +50,7 @@ def convert_rows_custom_column_types(
50
50
  columns: "ColumnCollection[str, ColumnElement[Any]]",
51
51
  rows: Iterator[tuple[Any, ...]],
52
52
  dialect: "Dialect",
53
- ):
53
+ ) -> Iterator[tuple[Any, ...]]:
54
54
  """
55
55
  This function converts values of rows columns based on their types which are
56
56
  defined in columns. We are only converting column values for which types are
@@ -27,10 +27,7 @@ import datachain.sql.sqlite
27
27
  from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
28
28
  from datachain.data_storage.db_engine import DatabaseEngine
29
29
  from datachain.data_storage.id_generator import AbstractDBIDGenerator
30
- from datachain.data_storage.schema import (
31
- DefaultSchema,
32
- convert_rows_custom_column_types,
33
- )
30
+ from datachain.data_storage.schema import DefaultSchema
34
31
  from datachain.dataset import DatasetRecord
35
32
  from datachain.error import DataChainError
36
33
  from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
@@ -209,6 +206,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
209
206
  return cursor.executemany(self.compile(query).string, params)
210
207
  return self.db.executemany(self.compile(query).string, params)
211
208
 
209
+ @retry_sqlite_locks
212
210
  def execute_str(self, sql: str, parameters=None) -> sqlite3.Cursor:
213
211
  if parameters is None:
214
212
  return self.db.execute(sql)
@@ -650,12 +648,6 @@ class SQLiteWarehouse(AbstractWarehouse):
650
648
  self.db.create_table(table, if_not_exists=if_not_exists)
651
649
  return table
652
650
 
653
- def dataset_rows_select(self, select_query: Select, **kwargs):
654
- rows = self.db.execute(select_query, **kwargs)
655
- yield from convert_rows_custom_column_types(
656
- select_query.selected_columns, rows, sqlite_dialect
657
- )
658
-
659
651
  def get_dataset_sources(
660
652
  self, dataset: DatasetRecord, version: int
661
653
  ) -> list[StorageURI]:
@@ -17,6 +17,7 @@ from sqlalchemy.sql.expression import true
17
17
  from tqdm import tqdm
18
18
 
19
19
  from datachain.client import Client
20
+ from datachain.data_storage.schema import convert_rows_custom_column_types
20
21
  from datachain.data_storage.serializer import Serializable
21
22
  from datachain.dataset import DatasetRecord
22
23
  from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
@@ -226,7 +227,7 @@ class AbstractWarehouse(ABC, Serializable):
226
227
  if limit < page_size:
227
228
  paginated_query = paginated_query.limit(None).limit(limit)
228
229
 
229
- results = self.db.execute(paginated_query.offset(offset))
230
+ results = self.dataset_rows_select(paginated_query.offset(offset))
230
231
 
231
232
  processed = False
232
233
  for row in results:
@@ -309,12 +310,18 @@ class AbstractWarehouse(ABC, Serializable):
309
310
  Merge results should not contain duplicates.
310
311
  """
311
312
 
312
- @abstractmethod
313
- def dataset_rows_select(self, select_query: sa.sql.selectable.Select, **kwargs):
313
+ def dataset_rows_select(
314
+ self,
315
+ query: sa.sql.selectable.Select,
316
+ **kwargs,
317
+ ) -> Iterator[tuple[Any, ...]]:
314
318
  """
315
- Method for fetching dataset rows from database. This is abstract since
316
- in some DBs we need to use special settings
319
+ Fetch dataset rows from database.
317
320
  """
321
+ rows = self.db.execute(query, **kwargs)
322
+ yield from convert_rows_custom_column_types(
323
+ query.selected_columns, rows, self.db.dialect
324
+ )
318
325
 
319
326
  @abstractmethod
320
327
  def get_dataset_sources(
@@ -839,6 +839,10 @@ class DataChain(DatasetQuery):
839
839
  def mutate(self, **kwargs) -> "Self":
840
840
  """Create new signals based on existing signals.
841
841
 
842
+ This method cannot modify existing columns. If you need to modify an
843
+ existing column, use a different name for the new column and then use
844
+ `select()` to choose which columns to keep.
845
+
842
846
  This method is vectorized and more efficient compared to map(), and it does not
843
847
  extract or download any data from the internal database. However, it can only
844
848
  utilize predefined built-in functions and their combinations.
@@ -859,7 +863,26 @@ class DataChain(DatasetQuery):
859
863
  dist=cosine_distance(embedding_text, embedding_image)
860
864
  )
861
865
  ```
866
+
867
+ This method can be also used to rename signals. If the Column("name") provided
868
+ as value for the new signal - the old column will be dropped. Otherwise a new
869
+ column is created.
870
+
871
+ Example:
872
+ ```py
873
+ dc.mutate(
874
+ newkey=Column("oldkey")
875
+ )
876
+ ```
862
877
  """
878
+ existing_columns = set(self.signals_schema.values.keys())
879
+ for col_name in kwargs:
880
+ if col_name in existing_columns:
881
+ raise DataChainColumnError(
882
+ col_name,
883
+ "Cannot modify existing column with mutate(). "
884
+ "Use a different name for the new column.",
885
+ )
863
886
  for col_name, expr in kwargs.items():
864
887
  if not isinstance(expr, Column) and isinstance(expr.type, NullType):
865
888
  raise DataChainColumnError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -8,10 +8,11 @@ import pandas as pd
8
8
  import pytest
9
9
  import pytz
10
10
  from PIL import Image
11
+ from sqlalchemy import Column
11
12
 
12
13
  from datachain.data_storage.sqlite import SQLiteWarehouse
13
14
  from datachain.dataset import DatasetStats
14
- from datachain.lib.dc import DataChain
15
+ from datachain.lib.dc import DataChain, DataChainColumnError
15
16
  from datachain.lib.file import File, ImageFile
16
17
  from tests.utils import images_equal
17
18
 
@@ -314,3 +315,16 @@ def test_from_storage_check_rows(tmp_dir, test_session):
314
315
  location=None,
315
316
  vtype="",
316
317
  )
318
+
319
+
320
+ def test_mutate_existing_column(catalog):
321
+ ds = DataChain.from_values(ids=[1, 2, 3])
322
+
323
+ with pytest.raises(DataChainColumnError) as excinfo:
324
+ ds.mutate(ids=Column("ids") + 1)
325
+
326
+ assert (
327
+ str(excinfo.value)
328
+ == "Error for column ids: Cannot modify existing column with mutate()."
329
+ " Use a different name for the new column."
330
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes