datachain 0.5.1__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (257) hide show
  1. {datachain-0.5.1 → datachain-0.6.1}/.pre-commit-config.yaml +2 -2
  2. {datachain-0.5.1/src/datachain.egg-info → datachain-0.6.1}/PKG-INFO +2 -2
  3. {datachain-0.5.1 → datachain-0.6.1}/pyproject.toml +1 -1
  4. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/__init__.py +2 -0
  5. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/catalog/catalog.py +1 -9
  6. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/sqlite.py +8 -0
  7. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/warehouse.py +0 -4
  8. datachain-0.6.1/src/datachain/lib/convert/sql_to_python.py +14 -0
  9. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/convert/values_to_tuples.py +2 -2
  10. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/data_model.py +1 -1
  11. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/dc.py +82 -30
  12. datachain-0.6.1/src/datachain/lib/func/__init__.py +14 -0
  13. datachain-0.6.1/src/datachain/lib/func/aggregate.py +42 -0
  14. datachain-0.6.1/src/datachain/lib/func/func.py +64 -0
  15. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/signal_schema.py +15 -9
  16. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/udf.py +177 -151
  17. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/utils.py +5 -0
  18. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/__init__.py +1 -2
  19. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/batch.py +0 -11
  20. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/dataset.py +23 -44
  21. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/dispatch.py +0 -12
  22. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/schema.py +1 -61
  23. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/session.py +33 -25
  24. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/functions/__init__.py +1 -1
  25. datachain-0.6.1/src/datachain/sql/functions/aggregate.py +47 -0
  26. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/functions/array.py +0 -8
  27. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/functions/string.py +12 -0
  28. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/sqlite/base.py +30 -7
  29. {datachain-0.5.1 → datachain-0.6.1/src/datachain.egg-info}/PKG-INFO +2 -2
  30. {datachain-0.5.1 → datachain-0.6.1}/src/datachain.egg-info/SOURCES.txt +4 -0
  31. {datachain-0.5.1 → datachain-0.6.1}/src/datachain.egg-info/requires.txt +1 -1
  32. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_datachain.py +61 -8
  33. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_dataset_query.py +0 -34
  34. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_datasets.py +33 -0
  35. datachain-0.6.1/tests/scripts/feature_class_exception.py +34 -0
  36. {datachain-0.5.1 → datachain-0.6.1}/tests/test_atomicity.py +10 -4
  37. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_datachain.py +361 -19
  38. datachain-0.6.1/tests/unit/lib/test_sql_to_python.py +25 -0
  39. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/test_string.py +15 -0
  40. {datachain-0.5.1 → datachain-0.6.1}/tests/utils.py +20 -0
  41. datachain-0.5.1/src/datachain/lib/convert/sql_to_python.py +0 -18
  42. datachain-0.5.1/tests/scripts/feature_class_exception.py +0 -24
  43. datachain-0.5.1/tests/unit/lib/test_sql_to_python.py +0 -28
  44. {datachain-0.5.1 → datachain-0.6.1}/.cruft.json +0 -0
  45. {datachain-0.5.1 → datachain-0.6.1}/.gitattributes +0 -0
  46. {datachain-0.5.1 → datachain-0.6.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  47. {datachain-0.5.1 → datachain-0.6.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  48. {datachain-0.5.1 → datachain-0.6.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  49. {datachain-0.5.1 → datachain-0.6.1}/.github/codecov.yaml +0 -0
  50. {datachain-0.5.1 → datachain-0.6.1}/.github/dependabot.yml +0 -0
  51. {datachain-0.5.1 → datachain-0.6.1}/.github/workflows/benchmarks.yml +0 -0
  52. {datachain-0.5.1 → datachain-0.6.1}/.github/workflows/release.yml +0 -0
  53. {datachain-0.5.1 → datachain-0.6.1}/.github/workflows/tests-studio.yml +0 -0
  54. {datachain-0.5.1 → datachain-0.6.1}/.github/workflows/tests.yml +0 -0
  55. {datachain-0.5.1 → datachain-0.6.1}/.github/workflows/update-template.yaml +0 -0
  56. {datachain-0.5.1 → datachain-0.6.1}/.gitignore +0 -0
  57. {datachain-0.5.1 → datachain-0.6.1}/CODE_OF_CONDUCT.rst +0 -0
  58. {datachain-0.5.1 → datachain-0.6.1}/CONTRIBUTING.rst +0 -0
  59. {datachain-0.5.1 → datachain-0.6.1}/LICENSE +0 -0
  60. {datachain-0.5.1 → datachain-0.6.1}/README.rst +0 -0
  61. {datachain-0.5.1 → datachain-0.6.1}/docs/assets/captioned_cartoons.png +0 -0
  62. {datachain-0.5.1 → datachain-0.6.1}/docs/assets/datachain-white.svg +0 -0
  63. {datachain-0.5.1 → datachain-0.6.1}/docs/assets/datachain.svg +0 -0
  64. {datachain-0.5.1 → datachain-0.6.1}/docs/assets/flowchart.png +0 -0
  65. {datachain-0.5.1 → datachain-0.6.1}/docs/index.md +0 -0
  66. {datachain-0.5.1 → datachain-0.6.1}/docs/references/datachain.md +0 -0
  67. {datachain-0.5.1 → datachain-0.6.1}/docs/references/datatype.md +0 -0
  68. {datachain-0.5.1 → datachain-0.6.1}/docs/references/file.md +0 -0
  69. {datachain-0.5.1 → datachain-0.6.1}/docs/references/index.md +0 -0
  70. {datachain-0.5.1 → datachain-0.6.1}/docs/references/sql.md +0 -0
  71. {datachain-0.5.1 → datachain-0.6.1}/docs/references/torch.md +0 -0
  72. {datachain-0.5.1 → datachain-0.6.1}/docs/references/udf.md +0 -0
  73. {datachain-0.5.1 → datachain-0.6.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  74. {datachain-0.5.1 → datachain-0.6.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  75. {datachain-0.5.1 → datachain-0.6.1}/examples/computer_vision/openimage-detect.py +0 -0
  76. {datachain-0.5.1 → datachain-0.6.1}/examples/get_started/common_sql_functions.py +0 -0
  77. {datachain-0.5.1 → datachain-0.6.1}/examples/get_started/json-csv-reader.py +0 -0
  78. {datachain-0.5.1 → datachain-0.6.1}/examples/get_started/torch-loader.py +0 -0
  79. {datachain-0.5.1 → datachain-0.6.1}/examples/get_started/udfs/parallel.py +0 -0
  80. {datachain-0.5.1 → datachain-0.6.1}/examples/get_started/udfs/simple.py +0 -0
  81. {datachain-0.5.1 → datachain-0.6.1}/examples/get_started/udfs/stateful.py +0 -0
  82. {datachain-0.5.1 → datachain-0.6.1}/examples/llm_and_nlp/claude-query.py +0 -0
  83. {datachain-0.5.1 → datachain-0.6.1}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  84. {datachain-0.5.1 → datachain-0.6.1}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  85. {datachain-0.5.1 → datachain-0.6.1}/examples/multimodal/clip_inference.py +0 -0
  86. {datachain-0.5.1 → datachain-0.6.1}/examples/multimodal/hf_pipeline.py +0 -0
  87. {datachain-0.5.1 → datachain-0.6.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  88. {datachain-0.5.1 → datachain-0.6.1}/examples/multimodal/wds.py +0 -0
  89. {datachain-0.5.1 → datachain-0.6.1}/examples/multimodal/wds_filtered.py +0 -0
  90. {datachain-0.5.1 → datachain-0.6.1}/mkdocs.yml +0 -0
  91. {datachain-0.5.1 → datachain-0.6.1}/noxfile.py +0 -0
  92. {datachain-0.5.1 → datachain-0.6.1}/overrides/main.html +0 -0
  93. {datachain-0.5.1 → datachain-0.6.1}/setup.cfg +0 -0
  94. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/__main__.py +0 -0
  95. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/asyn.py +0 -0
  96. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/cache.py +0 -0
  97. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/catalog/__init__.py +0 -0
  98. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/catalog/datasource.py +0 -0
  99. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/catalog/loader.py +0 -0
  100. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/cli.py +0 -0
  101. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/cli_utils.py +0 -0
  102. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/__init__.py +0 -0
  103. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/azure.py +0 -0
  104. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/fileslice.py +0 -0
  105. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/fsspec.py +0 -0
  106. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/gcs.py +0 -0
  107. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/hf.py +0 -0
  108. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/local.py +0 -0
  109. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/client/s3.py +0 -0
  110. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/config.py +0 -0
  111. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/__init__.py +0 -0
  112. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/db_engine.py +0 -0
  113. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/id_generator.py +0 -0
  114. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/job.py +0 -0
  115. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/metastore.py +0 -0
  116. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/schema.py +0 -0
  117. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/data_storage/serializer.py +0 -0
  118. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/dataset.py +0 -0
  119. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/error.py +0 -0
  120. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/job.py +0 -0
  121. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/__init__.py +0 -0
  122. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/arrow.py +0 -0
  123. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/clip.py +0 -0
  124. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/convert/__init__.py +0 -0
  125. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/convert/flatten.py +0 -0
  126. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  127. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/convert/unflatten.py +0 -0
  128. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/dataset_info.py +0 -0
  129. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/file.py +0 -0
  130. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/hf.py +0 -0
  131. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/image.py +0 -0
  132. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/listing.py +0 -0
  133. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/listing_info.py +0 -0
  134. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/meta_formats.py +0 -0
  135. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/model_store.py +0 -0
  136. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/pytorch.py +0 -0
  137. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/settings.py +0 -0
  138. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/tar.py +0 -0
  139. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/text.py +0 -0
  140. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/udf_signature.py +0 -0
  141. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/vfile.py +0 -0
  142. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/webdataset.py +0 -0
  143. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/lib/webdataset_laion.py +0 -0
  144. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/listing.py +0 -0
  145. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/node.py +0 -0
  146. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/nodes_fetcher.py +0 -0
  147. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/nodes_thread_pool.py +0 -0
  148. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/progress.py +0 -0
  149. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/py.typed +0 -0
  150. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/metrics.py +0 -0
  151. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/params.py +0 -0
  152. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/query/queue.py +0 -0
  153. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/remote/__init__.py +0 -0
  154. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/remote/studio.py +0 -0
  155. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/__init__.py +0 -0
  156. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/default/__init__.py +0 -0
  157. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/default/base.py +0 -0
  158. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/functions/conditional.py +0 -0
  159. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/functions/path.py +0 -0
  160. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/functions/random.py +0 -0
  161. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/selectable.py +0 -0
  162. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  163. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/sqlite/types.py +0 -0
  164. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/sqlite/vector.py +0 -0
  165. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/types.py +0 -0
  166. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/sql/utils.py +0 -0
  167. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/storage.py +0 -0
  168. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/telemetry.py +0 -0
  169. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/torch/__init__.py +0 -0
  170. {datachain-0.5.1 → datachain-0.6.1}/src/datachain/utils.py +0 -0
  171. {datachain-0.5.1 → datachain-0.6.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  172. {datachain-0.5.1 → datachain-0.6.1}/src/datachain.egg-info/entry_points.txt +0 -0
  173. {datachain-0.5.1 → datachain-0.6.1}/src/datachain.egg-info/top_level.txt +0 -0
  174. {datachain-0.5.1 → datachain-0.6.1}/tests/__init__.py +0 -0
  175. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/__init__.py +0 -0
  176. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/conftest.py +0 -0
  177. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  178. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  179. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/datasets/.gitignore +0 -0
  180. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  181. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/test_datachain.py +0 -0
  182. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/test_ls.py +0 -0
  183. {datachain-0.5.1 → datachain-0.6.1}/tests/benchmarks/test_version.py +0 -0
  184. {datachain-0.5.1 → datachain-0.6.1}/tests/conftest.py +0 -0
  185. {datachain-0.5.1 → datachain-0.6.1}/tests/data.py +0 -0
  186. {datachain-0.5.1 → datachain-0.6.1}/tests/examples/__init__.py +0 -0
  187. {datachain-0.5.1 → datachain-0.6.1}/tests/examples/test_examples.py +0 -0
  188. {datachain-0.5.1 → datachain-0.6.1}/tests/examples/test_wds_e2e.py +0 -0
  189. {datachain-0.5.1 → datachain-0.6.1}/tests/examples/wds_data.py +0 -0
  190. {datachain-0.5.1 → datachain-0.6.1}/tests/func/__init__.py +0 -0
  191. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_catalog.py +0 -0
  192. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_client.py +0 -0
  193. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_feature_pickling.py +0 -0
  194. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_listing.py +0 -0
  195. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_ls.py +0 -0
  196. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_meta_formats.py +0 -0
  197. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_metrics.py +0 -0
  198. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_pull.py +0 -0
  199. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_pytorch.py +0 -0
  200. {datachain-0.5.1 → datachain-0.6.1}/tests/func/test_query.py +0 -0
  201. {datachain-0.5.1 → datachain-0.6.1}/tests/scripts/feature_class.py +0 -0
  202. {datachain-0.5.1 → datachain-0.6.1}/tests/scripts/feature_class_parallel.py +0 -0
  203. {datachain-0.5.1 → datachain-0.6.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  204. {datachain-0.5.1 → datachain-0.6.1}/tests/scripts/name_len_slow.py +0 -0
  205. {datachain-0.5.1 → datachain-0.6.1}/tests/test_cli_e2e.py +0 -0
  206. {datachain-0.5.1 → datachain-0.6.1}/tests/test_query_e2e.py +0 -0
  207. {datachain-0.5.1 → datachain-0.6.1}/tests/test_telemetry.py +0 -0
  208. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/__init__.py +0 -0
  209. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/__init__.py +0 -0
  210. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/conftest.py +0 -0
  211. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_arrow.py +0 -0
  212. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_clip.py +0 -0
  213. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  214. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  215. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_feature.py +0 -0
  216. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_feature_utils.py +0 -0
  217. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_file.py +0 -0
  218. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_hf.py +0 -0
  219. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_image.py +0 -0
  220. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_schema.py +0 -0
  221. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_signal_schema.py +0 -0
  222. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_text.py +0 -0
  223. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_udf_signature.py +0 -0
  224. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_utils.py +0 -0
  225. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/lib/test_webdataset.py +0 -0
  226. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/__init__.py +0 -0
  227. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  228. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  229. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/test_array.py +0 -0
  230. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/test_conditional.py +0 -0
  231. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/test_path.py +0 -0
  232. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/test_random.py +0 -0
  233. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/sql/test_selectable.py +0 -0
  234. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_asyn.py +0 -0
  235. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_cache.py +0 -0
  236. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_catalog.py +0 -0
  237. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_catalog_loader.py +0 -0
  238. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_cli_parsing.py +0 -0
  239. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_client.py +0 -0
  240. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_client_s3.py +0 -0
  241. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_data_storage.py +0 -0
  242. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_database_engine.py +0 -0
  243. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_dataset.py +0 -0
  244. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_dispatch.py +0 -0
  245. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_fileslice.py +0 -0
  246. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_id_generator.py +0 -0
  247. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_listing.py +0 -0
  248. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_metastore.py +0 -0
  249. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_module_exports.py +0 -0
  250. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_query.py +0 -0
  251. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_query_metrics.py +0 -0
  252. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_query_params.py +0 -0
  253. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_serializer.py +0 -0
  254. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_session.py +0 -0
  255. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_storage.py +0 -0
  256. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_utils.py +0 -0
  257. {datachain-0.5.1 → datachain-0.6.1}/tests/unit/test_warehouse.py +0 -0
@@ -4,7 +4,7 @@ ci:
4
4
  skip: [mypy]
5
5
  repos:
6
6
  - repo: https://github.com/pre-commit/pre-commit-hooks
7
- rev: v4.6.0
7
+ rev: v5.0.0
8
8
  hooks:
9
9
  - id: check-added-large-files
10
10
  exclude: '^tests/examples/data/'
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.6.8'
27
+ rev: 'v0.6.9'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.5.1
3
+ Version: 0.6.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -81,7 +81,7 @@ Requires-Dist: requests-mock; extra == "tests"
81
81
  Requires-Dist: scipy; extra == "tests"
82
82
  Provides-Extra: dev
83
83
  Requires-Dist: datachain[docs,tests]; extra == "dev"
84
- Requires-Dist: mypy==1.11.2; extra == "dev"
84
+ Requires-Dist: mypy==1.12.0; extra == "dev"
85
85
  Requires-Dist: types-python-dateutil; extra == "dev"
86
86
  Requires-Dist: types-pytz; extra == "dev"
87
87
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -93,7 +93,7 @@ tests = [
93
93
  ]
94
94
  dev = [
95
95
  "datachain[docs,tests]",
96
- "mypy==1.11.2",
96
+ "mypy==1.12.0",
97
97
  "types-python-dateutil",
98
98
  "types-pytz",
99
99
  "types-PyYAML",
@@ -1,3 +1,4 @@
1
+ from datachain.lib import func
1
2
  from datachain.lib.data_model import DataModel, DataType, is_chain_type
2
3
  from datachain.lib.dc import C, Column, DataChain, Sys
3
4
  from datachain.lib.file import (
@@ -34,6 +35,7 @@ __all__ = [
34
35
  "Sys",
35
36
  "TarVFile",
36
37
  "TextFile",
38
+ "func",
37
39
  "is_chain_type",
38
40
  "metrics",
39
41
  "param",
@@ -989,13 +989,6 @@ class Catalog:
989
989
  c.name: c.type.to_dict() for c in columns if isinstance(c.type, SQLType)
990
990
  }
991
991
 
992
- job_id = job_id or os.getenv("DATACHAIN_JOB_ID")
993
- if not job_id:
994
- from datachain.query.session import Session
995
-
996
- session = Session.get(catalog=self)
997
- job_id = session.job_id
998
-
999
992
  dataset = self.metastore.create_dataset_version(
1000
993
  dataset,
1001
994
  version,
@@ -1218,6 +1211,7 @@ class Catalog:
1218
1211
  preview=dataset_version.preview,
1219
1212
  job_id=dataset_version.job_id,
1220
1213
  )
1214
+
1221
1215
  # to avoid re-creating rows table, we are just renaming it for a new version
1222
1216
  # of target dataset
1223
1217
  self.warehouse.rename_dataset_table(
@@ -1325,8 +1319,6 @@ class Catalog:
1325
1319
  if offset:
1326
1320
  q = q.offset(offset)
1327
1321
 
1328
- q = q.order_by("sys__id")
1329
-
1330
1322
  return q.to_db_records()
1331
1323
 
1332
1324
  def signed_url(self, source: str, path: str, client_config=None) -> str:
@@ -763,6 +763,14 @@ class SQLiteWarehouse(AbstractWarehouse):
763
763
  query: Select,
764
764
  progress_cb: Optional[Callable[[int], None]] = None,
765
765
  ) -> None:
766
+ if len(query._group_by_clause) > 0:
767
+ select_q = query.with_only_columns(
768
+ *[c for c in query.selected_columns if c.name != "sys__id"]
769
+ )
770
+ q = table.insert().from_select(list(select_q.selected_columns), select_q)
771
+ self.db.execute(q)
772
+ return
773
+
766
774
  if "sys__id" in query.selected_columns:
767
775
  col_id = query.selected_columns.sys__id
768
776
  else:
@@ -215,10 +215,6 @@ class AbstractWarehouse(ABC, Serializable):
215
215
  limit = query._limit
216
216
  paginated_query = query.limit(page_size)
217
217
 
218
- if not paginated_query._order_by_clauses:
219
- # default order by is order by `sys__id`
220
- paginated_query = paginated_query.order_by(query.selected_columns.sys__id)
221
-
222
218
  results = None
223
219
  offset = 0
224
220
  num_yielded = 0
@@ -0,0 +1,14 @@
1
+ from decimal import Decimal
2
+ from typing import Any
3
+
4
+ from sqlalchemy import ColumnElement
5
+
6
+
7
+ def sql_to_python(sql_exp: ColumnElement) -> Any:
8
+ try:
9
+ type_ = sql_exp.type.python_type
10
+ if type_ == Decimal:
11
+ type_ = float
12
+ except NotImplementedError:
13
+ type_ = str
14
+ return type_
@@ -4,7 +4,7 @@ from typing import Any, Union
4
4
  from datachain.lib.data_model import (
5
5
  DataType,
6
6
  DataTypeNames,
7
- DataValuesType,
7
+ DataValue,
8
8
  is_chain_type,
9
9
  )
10
10
  from datachain.lib.utils import DataChainParamsError
@@ -20,7 +20,7 @@ class ValuesToTupleError(DataChainParamsError):
20
20
  def values_to_tuples( # noqa: C901, PLR0912
21
21
  ds_name: str = "",
22
22
  output: Union[None, DataType, Sequence[str], dict[str, DataType]] = None,
23
- **fr_map: Sequence[DataValuesType],
23
+ **fr_map: Sequence[DataValue],
24
24
  ) -> tuple[Any, Any, Any]:
25
25
  if output:
26
26
  if not isinstance(output, (Sequence, str, dict)):
@@ -18,7 +18,7 @@ StandardType = Union[
18
18
  ]
19
19
  DataType = Union[type[BaseModel], StandardType]
20
20
  DataTypeNames = "BaseModel, int, str, float, bool, list, dict, bytes, datetime"
21
- DataValuesType = Union[BaseModel, int, str, float, bool, list, dict, bytes, datetime]
21
+ DataValue = Union[BaseModel, int, str, float, bool, list, dict, bytes, datetime]
22
22
 
23
23
 
24
24
  class DataModel(BaseModel):
@@ -29,6 +29,7 @@ from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
29
29
  from datachain.lib.dataset_info import DatasetInfo
30
30
  from datachain.lib.file import ArrowRow, File, get_file_type
31
31
  from datachain.lib.file import ExportPlacement as FileExportPlacement
32
+ from datachain.lib.func import Func
32
33
  from datachain.lib.listing import (
33
34
  is_listing_dataset,
34
35
  is_listing_expired,
@@ -42,26 +43,18 @@ from datachain.lib.meta_formats import read_meta, read_schema
42
43
  from datachain.lib.model_store import ModelStore
43
44
  from datachain.lib.settings import Settings
44
45
  from datachain.lib.signal_schema import SignalSchema
45
- from datachain.lib.udf import (
46
- Aggregator,
47
- BatchMapper,
48
- Generator,
49
- Mapper,
50
- UDFBase,
51
- )
46
+ from datachain.lib.udf import Aggregator, BatchMapper, Generator, Mapper, UDFBase
52
47
  from datachain.lib.udf_signature import UdfSignature
53
- from datachain.lib.utils import DataChainParamsError
48
+ from datachain.lib.utils import DataChainColumnError, DataChainParamsError
54
49
  from datachain.query import Session
55
- from datachain.query.dataset import (
56
- DatasetQuery,
57
- PartitionByType,
58
- )
59
- from datachain.query.schema import DEFAULT_DELIMITER, Column, DatasetRow
50
+ from datachain.query.dataset import DatasetQuery, PartitionByType
51
+ from datachain.query.schema import DEFAULT_DELIMITER, Column, ColumnMeta
60
52
  from datachain.sql.functions import path as pathfunc
61
53
  from datachain.telemetry import telemetry
62
54
  from datachain.utils import batched_it, inside_notebook
63
55
 
64
56
  if TYPE_CHECKING:
57
+ from pyarrow import DataType as ArrowDataType
65
58
  from typing_extensions import Concatenate, ParamSpec, Self
66
59
 
67
60
  from datachain.lib.hf import HFDatasetType
@@ -148,11 +141,6 @@ class DatasetMergeError(DataChainParamsError): # noqa: D101
148
141
  super().__init__(f"Merge error on='{on_str}'{right_on_str}: {msg}")
149
142
 
150
143
 
151
- class DataChainColumnError(DataChainParamsError): # noqa: D101
152
- def __init__(self, col_name, msg): # noqa: D107
153
- super().__init__(f"Error for column {col_name}: {msg}")
154
-
155
-
156
144
  OutputType = Union[None, DataType, Sequence[str], dict[str, DataType]]
157
145
 
158
146
 
@@ -981,10 +969,9 @@ class DataChain:
981
969
  row is left in the result set.
982
970
 
983
971
  Example:
984
- ```py
985
- dc.distinct("file.parent", "file.name")
986
- )
987
- ```
972
+ ```py
973
+ dc.distinct("file.parent", "file.name")
974
+ ```
988
975
  """
989
976
  return self._evolve(
990
977
  query=self._query.distinct(
@@ -1010,6 +997,60 @@ class DataChain:
1010
997
  query=self._query.select(*columns), signal_schema=new_schema
1011
998
  )
1012
999
 
1000
+ def group_by(
1001
+ self,
1002
+ *,
1003
+ partition_by: Union[str, Sequence[str]],
1004
+ **kwargs: Func,
1005
+ ) -> "Self":
1006
+ """Group rows by specified set of signals and return new signals
1007
+ with aggregated values.
1008
+
1009
+ Example:
1010
+ ```py
1011
+ chain = chain.group_by(
1012
+ cnt=func.count(),
1013
+ partition_by=("file_source", "file_ext"),
1014
+ )
1015
+ ```
1016
+ """
1017
+ if isinstance(partition_by, str):
1018
+ partition_by = [partition_by]
1019
+ if not partition_by:
1020
+ raise ValueError("At least one column should be provided for partition_by")
1021
+
1022
+ if not kwargs:
1023
+ raise ValueError("At least one column should be provided for group_by")
1024
+ for col_name, func in kwargs.items():
1025
+ if not isinstance(func, Func):
1026
+ raise DataChainColumnError(
1027
+ col_name,
1028
+ f"Column {col_name} has type {type(func)} but expected Func object",
1029
+ )
1030
+
1031
+ partition_by_columns: list[Column] = []
1032
+ signal_columns: list[Column] = []
1033
+ schema_fields: dict[str, DataType] = {}
1034
+
1035
+ # validate partition_by columns and add them to the schema
1036
+ for col_name in partition_by:
1037
+ col_db_name = ColumnMeta.to_db_name(col_name)
1038
+ col_type = self.signals_schema.get_column_type(col_db_name)
1039
+ col = Column(col_db_name, python_to_sql(col_type))
1040
+ partition_by_columns.append(col)
1041
+ schema_fields[col_db_name] = col_type
1042
+
1043
+ # validate signal columns and add them to the schema
1044
+ for col_name, func in kwargs.items():
1045
+ col = func.get_column(self.signals_schema, label=col_name)
1046
+ signal_columns.append(col)
1047
+ schema_fields[col_name] = func.get_result_type(self.signals_schema)
1048
+
1049
+ return self._evolve(
1050
+ query=self._query.group_by(signal_columns, partition_by_columns),
1051
+ signal_schema=SignalSchema(schema_fields),
1052
+ )
1053
+
1013
1054
  def mutate(self, **kwargs) -> "Self":
1014
1055
  """Create new signals based on existing signals.
1015
1056
 
@@ -1024,7 +1065,7 @@ class DataChain:
1024
1065
  The supported functions:
1025
1066
  Numerical: +, -, *, /, rand(), avg(), count(), func(),
1026
1067
  greatest(), least(), max(), min(), sum()
1027
- String: length(), split()
1068
+ String: length(), split(), replace(), regexp_replace()
1028
1069
  Filename: name(), parent(), file_stem(), file_ext()
1029
1070
  Array: length(), sip_hash_64(), euclidean_distance(),
1030
1071
  cosine_distance()
@@ -1476,12 +1517,6 @@ class DataChain:
1476
1517
  fr_map = {col.lower(): df[col].tolist() for col in df.columns}
1477
1518
 
1478
1519
  for column in fr_map:
1479
- if column in DatasetRow.schema:
1480
- raise DatasetPrepareError(
1481
- name,
1482
- f"import from pandas error - column '{column}' conflicts with"
1483
- " default schema",
1484
- )
1485
1520
  if not column.isidentifier():
1486
1521
  raise DatasetPrepareError(
1487
1522
  name,
@@ -1709,6 +1744,7 @@ class DataChain:
1709
1744
  nrows=None,
1710
1745
  session: Optional[Session] = None,
1711
1746
  settings: Optional[dict] = None,
1747
+ column_types: Optional[dict[str, "Union[str, ArrowDataType]"]] = None,
1712
1748
  **kwargs,
1713
1749
  ) -> "DataChain":
1714
1750
  """Generate chain from csv files.
@@ -1727,6 +1763,9 @@ class DataChain:
1727
1763
  nrows : Optional row limit.
1728
1764
  session : Session to use for the chain.
1729
1765
  settings : Settings to use for the chain.
1766
+ column_types : Dictionary of column names and their corresponding types.
1767
+ It is passed to CSV reader and for each column specified type auto
1768
+ inference is disabled.
1730
1769
 
1731
1770
  Example:
1732
1771
  Reading a csv file:
@@ -1742,6 +1781,15 @@ class DataChain:
1742
1781
  from pandas.io.parsers.readers import STR_NA_VALUES
1743
1782
  from pyarrow.csv import ConvertOptions, ParseOptions, ReadOptions
1744
1783
  from pyarrow.dataset import CsvFileFormat
1784
+ from pyarrow.lib import type_for_alias
1785
+
1786
+ if column_types:
1787
+ column_types = {
1788
+ name: type_for_alias(typ) if isinstance(typ, str) else typ
1789
+ for name, typ in column_types.items()
1790
+ }
1791
+ else:
1792
+ column_types = {}
1745
1793
 
1746
1794
  chain = DataChain.from_storage(
1747
1795
  path, session=session, settings=settings, **kwargs
@@ -1767,7 +1815,9 @@ class DataChain:
1767
1815
  parse_options = ParseOptions(delimiter=delimiter)
1768
1816
  read_options = ReadOptions(column_names=column_names)
1769
1817
  convert_options = ConvertOptions(
1770
- strings_can_be_null=True, null_values=STR_NA_VALUES
1818
+ strings_can_be_null=True,
1819
+ null_values=STR_NA_VALUES,
1820
+ column_types=column_types,
1771
1821
  )
1772
1822
  format = CsvFileFormat(
1773
1823
  parse_options=parse_options,
@@ -1978,6 +2028,8 @@ class DataChain:
1978
2028
  ),
1979
2029
  )
1980
2030
 
2031
+ session.add_dataset_version(dsr, dsr.latest_version)
2032
+
1981
2033
  if isinstance(to_insert, dict):
1982
2034
  to_insert = [to_insert]
1983
2035
  elif not to_insert:
@@ -0,0 +1,14 @@
1
+ from .aggregate import any_value, avg, collect, concat, count, max, min, sum
2
+ from .func import Func
3
+
4
+ __all__ = [
5
+ "Func",
6
+ "any_value",
7
+ "avg",
8
+ "collect",
9
+ "concat",
10
+ "count",
11
+ "max",
12
+ "min",
13
+ "sum",
14
+ ]
@@ -0,0 +1,42 @@
1
+ from typing import Optional
2
+
3
+ from sqlalchemy import func as sa_func
4
+
5
+ from datachain.sql import functions as dc_func
6
+
7
+ from .func import Func
8
+
9
+
10
+ def count(col: Optional[str] = None) -> Func:
11
+ return Func(inner=sa_func.count, col=col, result_type=int)
12
+
13
+
14
+ def sum(col: str) -> Func:
15
+ return Func(inner=sa_func.sum, col=col)
16
+
17
+
18
+ def avg(col: str) -> Func:
19
+ return Func(inner=dc_func.aggregate.avg, col=col)
20
+
21
+
22
+ def min(col: str) -> Func:
23
+ return Func(inner=sa_func.min, col=col)
24
+
25
+
26
+ def max(col: str) -> Func:
27
+ return Func(inner=sa_func.max, col=col)
28
+
29
+
30
+ def any_value(col: str) -> Func:
31
+ return Func(inner=dc_func.aggregate.any_value, col=col)
32
+
33
+
34
+ def collect(col: str) -> Func:
35
+ return Func(inner=dc_func.aggregate.collect, col=col, is_array=True)
36
+
37
+
38
+ def concat(col: str, separator="") -> Func:
39
+ def inner(arg):
40
+ return dc_func.aggregate.group_concat(arg, separator)
41
+
42
+ return Func(inner=inner, col=col, result_type=str)
@@ -0,0 +1,64 @@
1
+ from typing import TYPE_CHECKING, Callable, Optional
2
+
3
+ from datachain.lib.convert.python_to_sql import python_to_sql
4
+ from datachain.lib.utils import DataChainColumnError
5
+ from datachain.query.schema import Column, ColumnMeta
6
+
7
+ if TYPE_CHECKING:
8
+ from datachain import DataType
9
+ from datachain.lib.signal_schema import SignalSchema
10
+
11
+
12
+ class Func:
13
+ def __init__(
14
+ self,
15
+ inner: Callable,
16
+ col: Optional[str] = None,
17
+ result_type: Optional["DataType"] = None,
18
+ is_array: bool = False,
19
+ ) -> None:
20
+ self.inner = inner
21
+ self.col = col
22
+ self.result_type = result_type
23
+ self.is_array = is_array
24
+
25
+ @property
26
+ def db_col(self) -> Optional[str]:
27
+ return ColumnMeta.to_db_name(self.col) if self.col else None
28
+
29
+ def db_col_type(self, signals_schema: "SignalSchema") -> Optional["DataType"]:
30
+ if not self.db_col:
31
+ return None
32
+ col_type: type = signals_schema.get_column_type(self.db_col)
33
+ return list[col_type] if self.is_array else col_type # type: ignore[valid-type]
34
+
35
+ def get_result_type(self, signals_schema: "SignalSchema") -> "DataType":
36
+ col_type = self.db_col_type(signals_schema)
37
+
38
+ if self.result_type:
39
+ return self.result_type
40
+
41
+ if col_type:
42
+ return col_type
43
+
44
+ raise DataChainColumnError(
45
+ str(self.inner),
46
+ "Column name is required to infer result type",
47
+ )
48
+
49
+ def get_column(
50
+ self, signals_schema: "SignalSchema", label: Optional[str] = None
51
+ ) -> Column:
52
+ if self.col:
53
+ if label == "collect":
54
+ print(label)
55
+ col_type = self.get_result_type(signals_schema)
56
+ col = Column(self.db_col, python_to_sql(col_type))
57
+ func_col = self.inner(col)
58
+ else:
59
+ func_col = self.inner()
60
+
61
+ if label:
62
+ func_col = func_col.label(label)
63
+
64
+ return func_col
@@ -25,7 +25,7 @@ from typing_extensions import Literal as LiteralEx
25
25
  from datachain.lib.convert.python_to_sql import python_to_sql
26
26
  from datachain.lib.convert.sql_to_python import sql_to_python
27
27
  from datachain.lib.convert.unflatten import unflatten_to_json_pos
28
- from datachain.lib.data_model import DataModel, DataType
28
+ from datachain.lib.data_model import DataModel, DataType, DataValue
29
29
  from datachain.lib.file import File
30
30
  from datachain.lib.model_store import ModelStore
31
31
  from datachain.lib.utils import DataChainParamsError
@@ -110,7 +110,7 @@ class SignalSchema:
110
110
  values: dict[str, DataType]
111
111
  tree: dict[str, Any]
112
112
  setup_func: dict[str, Callable]
113
- setup_values: Optional[dict[str, Callable]]
113
+ setup_values: Optional[dict[str, Any]]
114
114
 
115
115
  def __init__(
116
116
  self,
@@ -333,21 +333,21 @@ class SignalSchema:
333
333
  res[db_name] = python_to_sql(type_)
334
334
  return res
335
335
 
336
- def row_to_objs(self, row: Sequence[Any]) -> list[DataType]:
336
+ def row_to_objs(self, row: Sequence[Any]) -> list[DataValue]:
337
337
  self._init_setup_values()
338
338
 
339
- objs = []
339
+ objs: list[DataValue] = []
340
340
  pos = 0
341
341
  for name, fr_type in self.values.items():
342
342
  if self.setup_values and (val := self.setup_values.get(name, None)):
343
343
  objs.append(val)
344
344
  elif (fr := ModelStore.to_pydantic(fr_type)) is not None:
345
345
  j, pos = unflatten_to_json_pos(fr, row, pos)
346
- objs.append(fr(**j)) # type: ignore[arg-type]
346
+ objs.append(fr(**j))
347
347
  else:
348
348
  objs.append(row[pos])
349
349
  pos += 1
350
- return objs # type: ignore[return-value]
350
+ return objs
351
351
 
352
352
  def contains_file(self) -> bool:
353
353
  for type_ in self.values.values():
@@ -400,6 +400,12 @@ class SignalSchema:
400
400
  if ModelStore.is_pydantic(finfo.annotation):
401
401
  SignalSchema._set_file_stream(getattr(obj, field), catalog, cache)
402
402
 
403
+ def get_column_type(self, col_name: str) -> DataType:
404
+ for path, _type, has_subtree, _ in self.get_flat_tree():
405
+ if not has_subtree and DEFAULT_DELIMITER.join(path) == col_name:
406
+ return _type
407
+ raise SignalResolvingError([col_name], "is not found")
408
+
403
409
  def db_signals(
404
410
  self, name: Optional[str] = None, as_columns=False
405
411
  ) -> Union[list[str], list[Column]]:
@@ -490,7 +496,7 @@ class SignalSchema:
490
496
  new_values[name] = args_map[name]
491
497
  else:
492
498
  # adding new signal
493
- new_values.update(sql_to_python({name: value}))
499
+ new_values[name] = sql_to_python(value)
494
500
 
495
501
  return SignalSchema(new_values)
496
502
 
@@ -534,12 +540,12 @@ class SignalSchema:
534
540
  for name, val in values.items()
535
541
  }
536
542
 
537
- def get_flat_tree(self) -> Iterator[tuple[list[str], type, bool, int]]:
543
+ def get_flat_tree(self) -> Iterator[tuple[list[str], DataType, bool, int]]:
538
544
  yield from self._get_flat_tree(self.tree, [], 0)
539
545
 
540
546
  def _get_flat_tree(
541
547
  self, tree: dict, prefix: list[str], depth: int
542
- ) -> Iterator[tuple[list[str], type, bool, int]]:
548
+ ) -> Iterator[tuple[list[str], DataType, bool, int]]:
543
549
  for name, (type_, substree) in tree.items():
544
550
  suffix = name.split(".")
545
551
  new_prefix = prefix + suffix