datachain 0.2.17__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (258) hide show
  1. {datachain-0.2.17 → datachain-0.2.18}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.2.17/src/datachain.egg-info → datachain-0.2.18}/PKG-INFO +1 -1
  3. datachain-0.2.18/src/datachain/lib/convert/sql_to_python.py +18 -0
  4. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/dc.py +24 -0
  5. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/__init__.py +3 -2
  6. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/array.py +8 -0
  7. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/base.py +5 -0
  8. {datachain-0.2.17 → datachain-0.2.18/src/datachain.egg-info}/PKG-INFO +1 -1
  9. {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/SOURCES.txt +1 -0
  10. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_datachain.py +94 -6
  11. datachain-0.2.18/tests/unit/lib/test_sql_to_python.py +28 -0
  12. datachain-0.2.17/src/datachain/lib/convert/sql_to_python.py +0 -23
  13. {datachain-0.2.17 → datachain-0.2.18}/.cruft.json +0 -0
  14. {datachain-0.2.17 → datachain-0.2.18}/.gitattributes +0 -0
  15. {datachain-0.2.17 → datachain-0.2.18}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  16. {datachain-0.2.17 → datachain-0.2.18}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  17. {datachain-0.2.17 → datachain-0.2.18}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  18. {datachain-0.2.17 → datachain-0.2.18}/.github/codecov.yaml +0 -0
  19. {datachain-0.2.17 → datachain-0.2.18}/.github/dependabot.yml +0 -0
  20. {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/benchmarks.yml +0 -0
  21. {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/release.yml +0 -0
  22. {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/tests.yml +0 -0
  23. {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/update-template.yaml +0 -0
  24. {datachain-0.2.17 → datachain-0.2.18}/.gitignore +0 -0
  25. {datachain-0.2.17 → datachain-0.2.18}/CODE_OF_CONDUCT.rst +0 -0
  26. {datachain-0.2.17 → datachain-0.2.18}/CONTRIBUTING.rst +0 -0
  27. {datachain-0.2.17 → datachain-0.2.18}/LICENSE +0 -0
  28. {datachain-0.2.17 → datachain-0.2.18}/README.rst +0 -0
  29. {datachain-0.2.17 → datachain-0.2.18}/docs/assets/captioned_cartoons.png +0 -0
  30. {datachain-0.2.17 → datachain-0.2.18}/docs/assets/datachain.png +0 -0
  31. {datachain-0.2.17 → datachain-0.2.18}/docs/assets/flowchart.png +0 -0
  32. {datachain-0.2.17 → datachain-0.2.18}/docs/index.md +0 -0
  33. {datachain-0.2.17 → datachain-0.2.18}/docs/references/datachain.md +0 -0
  34. {datachain-0.2.17 → datachain-0.2.18}/docs/references/datatype.md +0 -0
  35. {datachain-0.2.17 → datachain-0.2.18}/docs/references/file.md +0 -0
  36. {datachain-0.2.17 → datachain-0.2.18}/docs/references/index.md +0 -0
  37. {datachain-0.2.17 → datachain-0.2.18}/docs/references/sql.md +0 -0
  38. {datachain-0.2.17 → datachain-0.2.18}/docs/references/torch.md +0 -0
  39. {datachain-0.2.17 → datachain-0.2.18}/docs/references/udf.md +0 -0
  40. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/blip2_image_desc_lib.py +0 -0
  41. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
  42. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
  43. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
  44. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/3-train-model.ipynb +0 -0
  45. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/4-inference.ipynb +0 -0
  46. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/README.md +0 -0
  47. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
  48. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
  49. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
  50. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
  51. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
  52. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/3-train-model.py +0 -0
  53. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
  54. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/src/train.py +0 -0
  55. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
  56. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
  57. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
  58. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
  59. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
  60. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
  61. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
  62. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
  63. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  64. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  65. {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/openimage-detect.py +0 -0
  66. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/common_sql_functions.py +0 -0
  67. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/json-csv-reader.py +0 -0
  68. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/json-metadata-tutorial.ipynb +0 -0
  69. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/torch-loader.py +0 -0
  70. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/udfs/parallel.py +0 -0
  71. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/udfs/simple.py +0 -0
  72. {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/udfs/stateful.py +0 -0
  73. {datachain-0.2.17 → datachain-0.2.18}/examples/llm/llm_chatbot_evaluation.ipynb +0 -0
  74. {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/llm-claude-aggregate-query.py +0 -0
  75. {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/llm-claude-simple-query.py +0 -0
  76. {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/llm-claude.py +0 -0
  77. {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/unstructured-text.py +0 -0
  78. {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/clip_fine_tuning.ipynb +0 -0
  79. {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/clip_inference.py +0 -0
  80. {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/hf_pipeline.py +0 -0
  81. {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/openai_image_desc_lib.py +0 -0
  82. {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/wds.py +0 -0
  83. {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/wds_filtered.py +0 -0
  84. {datachain-0.2.17 → datachain-0.2.18}/mkdocs.yml +0 -0
  85. {datachain-0.2.17 → datachain-0.2.18}/noxfile.py +0 -0
  86. {datachain-0.2.17 → datachain-0.2.18}/pyproject.toml +0 -0
  87. {datachain-0.2.17 → datachain-0.2.18}/setup.cfg +0 -0
  88. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/__init__.py +0 -0
  89. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/__main__.py +0 -0
  90. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/asyn.py +0 -0
  91. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/cache.py +0 -0
  92. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/__init__.py +0 -0
  93. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/catalog.py +0 -0
  94. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/datasource.py +0 -0
  95. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/loader.py +0 -0
  96. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/subclass.py +0 -0
  97. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/cli.py +0 -0
  98. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/cli_utils.py +0 -0
  99. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/__init__.py +0 -0
  100. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/azure.py +0 -0
  101. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/fileslice.py +0 -0
  102. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/fsspec.py +0 -0
  103. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/gcs.py +0 -0
  104. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/local.py +0 -0
  105. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/s3.py +0 -0
  106. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/config.py +0 -0
  107. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/__init__.py +0 -0
  108. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/db_engine.py +0 -0
  109. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/id_generator.py +0 -0
  110. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/job.py +0 -0
  111. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/metastore.py +0 -0
  112. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/schema.py +0 -0
  113. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/serializer.py +0 -0
  114. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/sqlite.py +0 -0
  115. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/warehouse.py +0 -0
  116. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/dataset.py +0 -0
  117. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/error.py +0 -0
  118. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/job.py +0 -0
  119. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/__init__.py +0 -0
  120. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/arrow.py +0 -0
  121. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/clip.py +0 -0
  122. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/__init__.py +0 -0
  123. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/flatten.py +0 -0
  124. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/python_to_sql.py +0 -0
  125. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/unflatten.py +0 -0
  126. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  127. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/data_model.py +0 -0
  128. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/dataset_info.py +0 -0
  129. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/file.py +0 -0
  130. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/image.py +0 -0
  131. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/meta_formats.py +0 -0
  132. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/model_store.py +0 -0
  133. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/pytorch.py +0 -0
  134. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/settings.py +0 -0
  135. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/signal_schema.py +0 -0
  136. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/text.py +0 -0
  137. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/udf.py +0 -0
  138. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/udf_signature.py +0 -0
  139. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/utils.py +0 -0
  140. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/vfile.py +0 -0
  141. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/webdataset.py +0 -0
  142. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/webdataset_laion.py +0 -0
  143. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/listing.py +0 -0
  144. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/node.py +0 -0
  145. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/nodes_fetcher.py +0 -0
  146. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/nodes_thread_pool.py +0 -0
  147. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/progress.py +0 -0
  148. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/py.typed +0 -0
  149. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/__init__.py +0 -0
  150. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/batch.py +0 -0
  151. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/builtins.py +0 -0
  152. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/dataset.py +0 -0
  153. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/dispatch.py +0 -0
  154. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/metrics.py +0 -0
  155. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/params.py +0 -0
  156. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/schema.py +0 -0
  157. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/session.py +0 -0
  158. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/udf.py +0 -0
  159. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/remote/__init__.py +0 -0
  160. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/remote/studio.py +0 -0
  161. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/__init__.py +0 -0
  162. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/default/__init__.py +0 -0
  163. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/default/base.py +0 -0
  164. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/conditional.py +0 -0
  165. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/path.py +0 -0
  166. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/random.py +0 -0
  167. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/string.py +0 -0
  168. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/selectable.py +0 -0
  169. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/__init__.py +0 -0
  170. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/types.py +0 -0
  171. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/vector.py +0 -0
  172. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/types.py +0 -0
  173. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/utils.py +0 -0
  174. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/storage.py +0 -0
  175. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/torch/__init__.py +0 -0
  176. {datachain-0.2.17 → datachain-0.2.18}/src/datachain/utils.py +0 -0
  177. {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/dependency_links.txt +0 -0
  178. {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/entry_points.txt +0 -0
  179. {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/requires.txt +0 -0
  180. {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/top_level.txt +0 -0
  181. {datachain-0.2.17 → datachain-0.2.18}/tests/__init__.py +0 -0
  182. {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/__init__.py +0 -0
  183. {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/conftest.py +0 -0
  184. {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/test_ls.py +0 -0
  185. {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/test_version.py +0 -0
  186. {datachain-0.2.17 → datachain-0.2.18}/tests/conftest.py +0 -0
  187. {datachain-0.2.17 → datachain-0.2.18}/tests/data.py +0 -0
  188. {datachain-0.2.17 → datachain-0.2.18}/tests/examples/__init__.py +0 -0
  189. {datachain-0.2.17 → datachain-0.2.18}/tests/examples/test_wds_e2e.py +0 -0
  190. {datachain-0.2.17 → datachain-0.2.18}/tests/examples/wds_data.py +0 -0
  191. {datachain-0.2.17 → datachain-0.2.18}/tests/func/__init__.py +0 -0
  192. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_catalog.py +0 -0
  193. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_client.py +0 -0
  194. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_datachain.py +0 -0
  195. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_dataset_query.py +0 -0
  196. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_datasets.py +0 -0
  197. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_feature_pickling.py +0 -0
  198. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_ls.py +0 -0
  199. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_pull.py +0 -0
  200. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_pytorch.py +0 -0
  201. {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_query.py +0 -0
  202. {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/feature_class.py +0 -0
  203. {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/feature_class_parallel.py +0 -0
  204. {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  205. {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/name_len_normal.py +0 -0
  206. {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/name_len_slow.py +0 -0
  207. {datachain-0.2.17 → datachain-0.2.18}/tests/test_cli_e2e.py +0 -0
  208. {datachain-0.2.17 → datachain-0.2.18}/tests/test_query_e2e.py +0 -0
  209. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/__init__.py +0 -0
  210. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/__init__.py +0 -0
  211. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/conftest.py +0 -0
  212. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_arrow.py +0 -0
  213. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_clip.py +0 -0
  214. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  215. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_datachain_merge.py +0 -0
  216. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_feature.py +0 -0
  217. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_feature_utils.py +0 -0
  218. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_file.py +0 -0
  219. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_image.py +0 -0
  220. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_signal_schema.py +0 -0
  221. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_text.py +0 -0
  222. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_udf_signature.py +0 -0
  223. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_utils.py +0 -0
  224. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_webdataset.py +0 -0
  225. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/__init__.py +0 -0
  226. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/sqlite/__init__.py +0 -0
  227. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/sqlite/test_utils.py +0 -0
  228. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_array.py +0 -0
  229. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_conditional.py +0 -0
  230. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_path.py +0 -0
  231. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_random.py +0 -0
  232. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_selectable.py +0 -0
  233. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_string.py +0 -0
  234. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_asyn.py +0 -0
  235. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_cache.py +0 -0
  236. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_catalog.py +0 -0
  237. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_catalog_loader.py +0 -0
  238. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_cli_parsing.py +0 -0
  239. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_client.py +0 -0
  240. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_client_s3.py +0 -0
  241. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_data_storage.py +0 -0
  242. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_database_engine.py +0 -0
  243. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_dataset.py +0 -0
  244. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_dispatch.py +0 -0
  245. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_fileslice.py +0 -0
  246. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_id_generator.py +0 -0
  247. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_listing.py +0 -0
  248. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_metastore.py +0 -0
  249. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_module_exports.py +0 -0
  250. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_query_metrics.py +0 -0
  251. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_query_params.py +0 -0
  252. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_serializer.py +0 -0
  253. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_session.py +0 -0
  254. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_storage.py +0 -0
  255. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_udf.py +0 -0
  256. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_utils.py +0 -0
  257. {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_warehouse.py +0 -0
  258. {datachain-0.2.17 → datachain-0.2.18}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.5.5'
27
+ rev: 'v0.5.6'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -0,0 +1,18 @@
1
+ from decimal import Decimal
2
+ from typing import Any
3
+
4
+ from sqlalchemy import ColumnElement
5
+
6
+
7
+ def sql_to_python(args_map: dict[str, ColumnElement]) -> dict[str, Any]:
8
+ res = {}
9
+ for name, sql_exp in args_map.items():
10
+ try:
11
+ type_ = sql_exp.type.python_type
12
+ if type_ == Decimal:
13
+ type_ = float
14
+ except NotImplementedError:
15
+ type_ = str
16
+ res[name] = type_
17
+
18
+ return res
@@ -20,8 +20,10 @@ import pandas as pd
20
20
  import sqlalchemy
21
21
  from pydantic import BaseModel, create_model
22
22
  from sqlalchemy.sql.functions import GenericFunction
23
+ from sqlalchemy.sql.sqltypes import NullType
23
24
 
24
25
  from datachain import DataModel
26
+ from datachain.lib.convert.python_to_sql import python_to_sql
25
27
  from datachain.lib.convert.values_to_tuples import values_to_tuples
26
28
  from datachain.lib.data_model import DataType
27
29
  from datachain.lib.dataset_info import DatasetInfo
@@ -110,6 +112,11 @@ class DatasetMergeError(DataChainParamsError): # noqa: D101
110
112
  super().__init__(f"Merge error on='{on_str}'{right_on_str}: {msg}")
111
113
 
112
114
 
115
+ class DataChainColumnError(DataChainParamsError): # noqa: D101
116
+ def __init__(self, col_name, msg): # noqa: D107
117
+ super().__init__(f"Error for column {col_name}: {msg}")
118
+
119
+
113
120
  OutputType = Union[None, DataType, Sequence[str], dict[str, DataType]]
114
121
 
115
122
 
@@ -225,6 +232,17 @@ class DataChain(DatasetQuery):
225
232
  """Get schema of the chain."""
226
233
  return self._effective_signals_schema.values
227
234
 
235
+ def column(self, name: str) -> Column:
236
+ """Returns Column instance with a type if name is found in current schema,
237
+ otherwise raises an exception.
238
+ """
239
+ name_path = name.split(".")
240
+ for path, type_, _, _ in self.signals_schema.get_flat_tree():
241
+ if path == name_path:
242
+ return Column(name, python_to_sql(type_))
243
+
244
+ raise ValueError(f"Column with name {name} not found in the schema")
245
+
228
246
  def print_schema(self) -> None:
229
247
  """Print schema of the chain."""
230
248
  self._effective_signals_schema.print_tree()
@@ -829,6 +847,12 @@ class DataChain(DatasetQuery):
829
847
  )
830
848
  ```
831
849
  """
850
+ for col_name, expr in kwargs.items():
851
+ if not isinstance(expr, Column) and isinstance(expr.type, NullType):
852
+ raise DataChainColumnError(
853
+ col_name, f"Cannot infer type with expression {expr}"
854
+ )
855
+
832
856
  mutated = {}
833
857
  schema = self.signals_schema
834
858
  for name, value in kwargs.items():
@@ -1,16 +1,17 @@
1
1
  from sqlalchemy.sql.expression import func
2
2
 
3
- from . import path, string
3
+ from . import array, path, string
4
+ from .array import avg
4
5
  from .conditional import greatest, least
5
6
  from .random import rand
6
7
 
7
8
  count = func.count
8
9
  sum = func.sum
9
- avg = func.avg
10
10
  min = func.min
11
11
  max = func.max
12
12
 
13
13
  __all__ = [
14
+ "array",
14
15
  "avg",
15
16
  "count",
16
17
  "func",
@@ -44,7 +44,15 @@ class sip_hash_64(GenericFunction): # noqa: N801
44
44
  inherit_cache = True
45
45
 
46
46
 
47
+ class avg(GenericFunction): # noqa: N801
48
+ type = Float()
49
+ package = "array"
50
+ name = "avg"
51
+ inherit_cache = True
52
+
53
+
47
54
  compiler_not_implemented(cosine_distance)
48
55
  compiler_not_implemented(euclidean_distance)
49
56
  compiler_not_implemented(length)
50
57
  compiler_not_implemented(sip_hash_64)
58
+ compiler_not_implemented(avg)
@@ -78,6 +78,7 @@ def setup():
78
78
  compiles(conditional.least, "sqlite")(compile_least)
79
79
  compiles(Values, "sqlite")(compile_values)
80
80
  compiles(random.rand, "sqlite")(compile_rand)
81
+ compiles(array.avg, "sqlite")(compile_avg)
81
82
 
82
83
  if load_usearch_extension(sqlite3.connect(":memory:")):
83
84
  compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
@@ -349,6 +350,10 @@ def compile_rand(element, compiler, **kwargs):
349
350
  return compiler.process(func.random(), **kwargs)
350
351
 
351
352
 
353
+ def compile_avg(element, compiler, **kwargs):
354
+ return compiler.process(func.avg(*element.clauses.clauses), **kwargs)
355
+
356
+
352
357
  def load_usearch_extension(conn) -> bool:
353
358
  try:
354
359
  # usearch is part of the vector optional dependencies
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -239,6 +239,7 @@ tests/unit/lib/test_feature_utils.py
239
239
  tests/unit/lib/test_file.py
240
240
  tests/unit/lib/test_image.py
241
241
  tests/unit/lib/test_signal_schema.py
242
+ tests/unit/lib/test_sql_to_python.py
242
243
  tests/unit/lib/test_text.py
243
244
  tests/unit/lib/test_udf_signature.py
244
245
  tests/unit/lib/test_utils.py
@@ -10,7 +10,7 @@ from pydantic import BaseModel
10
10
 
11
11
  from datachain import Column
12
12
  from datachain.lib.data_model import DataModel
13
- from datachain.lib.dc import C, DataChain, Sys
13
+ from datachain.lib.dc import C, DataChain, DataChainColumnError, Sys
14
14
  from datachain.lib.file import File
15
15
  from datachain.lib.signal_schema import (
16
16
  SignalResolvingError,
@@ -19,6 +19,8 @@ from datachain.lib.signal_schema import (
19
19
  )
20
20
  from datachain.lib.udf_signature import UdfSignatureError
21
21
  from datachain.lib.utils import DataChainParamsError
22
+ from datachain.sql import functions as func
23
+ from datachain.sql.types import Float, Int64, String
22
24
  from tests.utils import skip_if_not_sqlite
23
25
 
24
26
  DF_DATA = {
@@ -1254,14 +1256,20 @@ def test_column_math(test_session):
1254
1256
  fib = [1, 1, 2, 3, 5, 8]
1255
1257
  chain = DataChain.from_values(num=fib, session=test_session)
1256
1258
 
1257
- ch = chain.mutate(add2=Column("num") + 2)
1259
+ ch = chain.mutate(add2=chain.column("num") + 2)
1258
1260
  assert list(ch.collect("add2")) == [x + 2 for x in fib]
1259
1261
 
1260
- ch = chain.mutate(div2=Column("num") / 2.0)
1261
- assert list(ch.collect("div2")) == [x / 2.0 for x in fib]
1262
+ ch2 = ch.mutate(x=1 - ch.column("add2"))
1263
+ assert list(ch2.collect("x")) == [1 - (x + 2.0) for x in fib]
1264
+
1265
+
1266
+ def test_column_math_division(test_session):
1267
+ skip_if_not_sqlite()
1268
+ fib = [1, 1, 2, 3, 5, 8]
1269
+ chain = DataChain.from_values(num=fib, session=test_session)
1262
1270
 
1263
- ch2 = ch.mutate(x=1 - Column("div2"))
1264
- assert list(ch2.collect("x")) == [1 - (x / 2.0) for x in fib]
1271
+ ch = chain.mutate(div2=chain.column("num") / 2.0)
1272
+ assert list(ch.collect("div2")) == [x / 2.0 for x in fib]
1265
1273
 
1266
1274
 
1267
1275
  def test_from_values_array_of_floats(test_session):
@@ -1409,3 +1417,83 @@ def test_rename_object_name_with_mutate(catalog):
1409
1417
  assert ds.signals_schema.values.get("ids") is int
1410
1418
  assert "file" not in ds.signals_schema.values
1411
1419
  assert list(ds.order_by("my_file.name").collect("my_file.name")) == ["a", "b", "c"]
1420
+
1421
+
1422
+ def test_column(catalog):
1423
+ ds = DataChain.from_values(
1424
+ ints=[1, 2], floats=[0.5, 0.5], file=[File(name="a"), File(name="b")]
1425
+ )
1426
+
1427
+ c = ds.column("ints")
1428
+ assert isinstance(c, Column)
1429
+ assert c.name == "ints"
1430
+ assert isinstance(c.type, Int64)
1431
+
1432
+ c = ds.column("floats")
1433
+ assert isinstance(c, Column)
1434
+ assert c.name == "floats"
1435
+ assert isinstance(c.type, Float)
1436
+
1437
+ c = ds.column("file.name")
1438
+ assert isinstance(c, Column)
1439
+ assert c.name == "file__name"
1440
+ assert isinstance(c.type, String)
1441
+
1442
+ with pytest.raises(ValueError):
1443
+ c = ds.column("missing")
1444
+
1445
+
1446
+ def test_mutate_with_subtraction():
1447
+ ds = DataChain.from_values(id=[1, 2])
1448
+ assert ds.mutate(new=ds.column("id") - 1).signals_schema.values["new"] is int
1449
+
1450
+
1451
+ def test_mutate_with_addition():
1452
+ ds = DataChain.from_values(id=[1, 2])
1453
+ assert ds.mutate(new=ds.column("id") + 1).signals_schema.values["new"] is int
1454
+
1455
+
1456
+ def test_mutate_with_division():
1457
+ ds = DataChain.from_values(id=[1, 2])
1458
+ assert ds.mutate(new=ds.column("id") / 10).signals_schema.values["new"] is float
1459
+
1460
+
1461
+ def test_mutate_with_multiplication():
1462
+ ds = DataChain.from_values(id=[1, 2])
1463
+ assert ds.mutate(new=ds.column("id") * 10).signals_schema.values["new"] is int
1464
+
1465
+
1466
+ def test_mutate_with_func():
1467
+ ds = DataChain.from_values(id=[1, 2])
1468
+ assert (
1469
+ ds.mutate(new=func.avg(ds.column("id"))).signals_schema.values["new"] is float
1470
+ )
1471
+
1472
+
1473
+ def test_mutate_with_complex_expression():
1474
+ ds = DataChain.from_values(id=[1, 2], name=["Jim", "Jon"])
1475
+ assert (
1476
+ ds.mutate(
1477
+ new=(func.sum(ds.column("id"))) * (5 - func.min(ds.column("id")))
1478
+ ).signals_schema.values["new"]
1479
+ is int
1480
+ )
1481
+
1482
+
1483
+ def test_mutate_with_saving():
1484
+ skip_if_not_sqlite()
1485
+ ds = DataChain.from_values(id=[1, 2])
1486
+ ds = ds.mutate(new=ds.column("id") / 2).save("mutated")
1487
+
1488
+ ds = DataChain(name="mutated")
1489
+ assert ds.signals_schema.values["new"] is float
1490
+ assert list(ds.collect("new")) == [0.5, 1.0]
1491
+
1492
+
1493
+ def test_mutate_with_expression_without_type(catalog):
1494
+ with pytest.raises(DataChainColumnError) as excinfo:
1495
+ DataChain.from_values(id=[1, 2]).mutate(new=(Column("id") - 1)).save()
1496
+
1497
+ assert str(excinfo.value) == (
1498
+ "Error for column new: Cannot infer type with expression id - :id_1"
1499
+ )
@@ -0,0 +1,28 @@
1
+ from sqlalchemy.sql.sqltypes import NullType
2
+
3
+ from datachain import Column
4
+ from datachain.lib.convert.sql_to_python import sql_to_python
5
+ from datachain.sql import functions as func
6
+ from datachain.sql.types import Float, Int64, String
7
+
8
+
9
+ def test_sql_columns_to_python_types():
10
+ assert sql_to_python(
11
+ {
12
+ "name": Column("name", String),
13
+ "age": Column("age", Int64),
14
+ "score": Column("score", Float),
15
+ }
16
+ ) == {"name": str, "age": int, "score": float}
17
+
18
+
19
+ def test_sql_expression_to_python_types():
20
+ assert sql_to_python({"age": Column("age", Int64) - 2}) == {"age": int}
21
+
22
+
23
+ def test_sql_function_to_python_types():
24
+ assert sql_to_python({"age": func.avg(Column("age", Int64))}) == {"age": float}
25
+
26
+
27
+ def test_sql_to_python_types_default_type():
28
+ assert sql_to_python({"null": Column("null", NullType)}) == {"null": str}
@@ -1,23 +0,0 @@
1
- from datetime import datetime
2
- from typing import Any
3
-
4
- from sqlalchemy import ARRAY, JSON, Boolean, DateTime, Float, Integer, String
5
-
6
- from datachain.data_storage.sqlite import Column
7
-
8
- SQL_TO_PYTHON = {
9
- String: str,
10
- Integer: int,
11
- Float: float,
12
- Boolean: bool,
13
- DateTime: datetime,
14
- ARRAY: list,
15
- JSON: dict,
16
- }
17
-
18
-
19
- def sql_to_python(args_map: dict[str, Column]) -> dict[str, Any]:
20
- return {
21
- k: SQL_TO_PYTHON.get(type(v.type), str) # type: ignore[union-attr]
22
- for k, v in args_map.items()
23
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes