datachain 0.10.0__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (324) hide show
  1. {datachain-0.10.0 → datachain-0.11.0}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.10.0 → datachain-0.11.0}/PKG-INFO +3 -2
  3. {datachain-0.10.0 → datachain-0.11.0}/pyproject.toml +3 -2
  4. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/file.py +6 -2
  5. datachain-0.11.0/src/datachain/script_meta.py +147 -0
  6. {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/PKG-INFO +3 -2
  7. {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/SOURCES.txt +2 -0
  8. {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/requires.txt +4 -1
  9. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_datachain.py +21 -0
  10. datachain-0.11.0/tests/unit/test_script_meta.py +119 -0
  11. {datachain-0.10.0 → datachain-0.11.0}/.cruft.json +0 -0
  12. {datachain-0.10.0 → datachain-0.11.0}/.gitattributes +0 -0
  13. {datachain-0.10.0 → datachain-0.11.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  14. {datachain-0.10.0 → datachain-0.11.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  15. {datachain-0.10.0 → datachain-0.11.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  16. {datachain-0.10.0 → datachain-0.11.0}/.github/codecov.yaml +0 -0
  17. {datachain-0.10.0 → datachain-0.11.0}/.github/dependabot.yml +0 -0
  18. {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/benchmarks.yml +0 -0
  19. {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/release.yml +0 -0
  20. {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/tests-studio.yml +0 -0
  21. {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/tests.yml +0 -0
  22. {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/update-template.yaml +0 -0
  23. {datachain-0.10.0 → datachain-0.11.0}/.gitignore +0 -0
  24. {datachain-0.10.0 → datachain-0.11.0}/CODE_OF_CONDUCT.rst +0 -0
  25. {datachain-0.10.0 → datachain-0.11.0}/LICENSE +0 -0
  26. {datachain-0.10.0 → datachain-0.11.0}/README.rst +0 -0
  27. {datachain-0.10.0 → datachain-0.11.0}/docs/assets/captioned_cartoons.png +0 -0
  28. {datachain-0.10.0 → datachain-0.11.0}/docs/assets/datachain-white.svg +0 -0
  29. {datachain-0.10.0 → datachain-0.11.0}/docs/assets/datachain.svg +0 -0
  30. {datachain-0.10.0 → datachain-0.11.0}/docs/contributing.md +0 -0
  31. {datachain-0.10.0 → datachain-0.11.0}/docs/css/github-permalink-style.css +0 -0
  32. {datachain-0.10.0 → datachain-0.11.0}/docs/examples.md +0 -0
  33. {datachain-0.10.0 → datachain-0.11.0}/docs/index.md +0 -0
  34. {datachain-0.10.0 → datachain-0.11.0}/docs/overrides/main.html +0 -0
  35. {datachain-0.10.0 → datachain-0.11.0}/docs/quick-start.md +0 -0
  36. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/arrowrow.md +0 -0
  37. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/bbox.md +0 -0
  38. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/file.md +0 -0
  39. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/imagefile.md +0 -0
  40. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/index.md +0 -0
  41. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/pose.md +0 -0
  42. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/segment.md +0 -0
  43. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/tarvfile.md +0 -0
  44. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/textfile.md +0 -0
  45. {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/videofile.md +0 -0
  46. {datachain-0.10.0 → datachain-0.11.0}/docs/references/datachain.md +0 -0
  47. {datachain-0.10.0 → datachain-0.11.0}/docs/references/func.md +0 -0
  48. {datachain-0.10.0 → datachain-0.11.0}/docs/references/index.md +0 -0
  49. {datachain-0.10.0 → datachain-0.11.0}/docs/references/toolkit.md +0 -0
  50. {datachain-0.10.0 → datachain-0.11.0}/docs/references/torch.md +0 -0
  51. {datachain-0.10.0 → datachain-0.11.0}/docs/references/udf.md +0 -0
  52. {datachain-0.10.0 → datachain-0.11.0}/docs/tutorials.md +0 -0
  53. {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  54. {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  55. {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/openimage-detect.py +0 -0
  56. {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  57. {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  58. {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  59. {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/common_sql_functions.py +0 -0
  60. {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/json-csv-reader.py +0 -0
  61. {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/torch-loader.py +0 -0
  62. {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/udfs/parallel.py +0 -0
  63. {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/udfs/simple.py +0 -0
  64. {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/udfs/stateful.py +0 -0
  65. {datachain-0.10.0 → datachain-0.11.0}/examples/llm_and_nlp/claude-query.py +0 -0
  66. {datachain-0.10.0 → datachain-0.11.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  67. {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/clip_inference.py +0 -0
  68. {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/hf_pipeline.py +0 -0
  69. {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  70. {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/wds.py +0 -0
  71. {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/wds_filtered.py +0 -0
  72. {datachain-0.10.0 → datachain-0.11.0}/mkdocs.yml +0 -0
  73. {datachain-0.10.0 → datachain-0.11.0}/noxfile.py +0 -0
  74. {datachain-0.10.0 → datachain-0.11.0}/setup.cfg +0 -0
  75. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/__init__.py +0 -0
  76. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/__main__.py +0 -0
  77. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/asyn.py +0 -0
  78. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cache.py +0 -0
  79. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/__init__.py +0 -0
  80. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/catalog.py +0 -0
  81. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/datasource.py +0 -0
  82. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/loader.py +0 -0
  83. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/__init__.py +0 -0
  84. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/__init__.py +0 -0
  85. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/datasets.py +0 -0
  86. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/du.py +0 -0
  87. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/index.py +0 -0
  88. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/ls.py +0 -0
  89. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/misc.py +0 -0
  90. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/query.py +0 -0
  91. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/show.py +0 -0
  92. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/__init__.py +0 -0
  93. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/job.py +0 -0
  94. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/studio.py +0 -0
  95. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/utils.py +0 -0
  96. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/utils.py +0 -0
  97. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/__init__.py +0 -0
  98. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/azure.py +0 -0
  99. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/fileslice.py +0 -0
  100. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/fsspec.py +0 -0
  101. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/gcs.py +0 -0
  102. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/hf.py +0 -0
  103. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/local.py +0 -0
  104. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/s3.py +0 -0
  105. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/config.py +0 -0
  106. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/__init__.py +0 -0
  107. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/db_engine.py +0 -0
  108. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/job.py +0 -0
  109. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/metastore.py +0 -0
  110. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/schema.py +0 -0
  111. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/serializer.py +0 -0
  112. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/sqlite.py +0 -0
  113. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/warehouse.py +0 -0
  114. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/dataset.py +0 -0
  115. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/diff/__init__.py +0 -0
  116. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/error.py +0 -0
  117. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/fs/__init__.py +0 -0
  118. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/fs/reference.py +0 -0
  119. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/__init__.py +0 -0
  120. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/aggregate.py +0 -0
  121. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/array.py +0 -0
  122. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/base.py +0 -0
  123. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/conditional.py +0 -0
  124. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/func.py +0 -0
  125. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/numeric.py +0 -0
  126. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/path.py +0 -0
  127. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/random.py +0 -0
  128. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/string.py +0 -0
  129. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/window.py +0 -0
  130. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/job.py +0 -0
  131. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/__init__.py +0 -0
  132. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/arrow.py +0 -0
  133. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/clip.py +0 -0
  134. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/__init__.py +0 -0
  135. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/flatten.py +0 -0
  136. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  137. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  138. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/unflatten.py +0 -0
  139. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  140. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/data_model.py +0 -0
  141. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/dataset_info.py +0 -0
  142. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/dc.py +0 -0
  143. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/hf.py +0 -0
  144. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/image.py +0 -0
  145. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/listing.py +0 -0
  146. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/listing_info.py +0 -0
  147. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/meta_formats.py +0 -0
  148. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/model_store.py +0 -0
  149. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/pytorch.py +0 -0
  150. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/settings.py +0 -0
  151. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/signal_schema.py +0 -0
  152. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/tar.py +0 -0
  153. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/text.py +0 -0
  154. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/udf.py +0 -0
  155. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/udf_signature.py +0 -0
  156. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/utils.py +0 -0
  157. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/video.py +0 -0
  158. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/webdataset.py +0 -0
  159. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/webdataset_laion.py +0 -0
  160. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/listing.py +0 -0
  161. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/__init__.py +0 -0
  162. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/bbox.py +0 -0
  163. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/pose.py +0 -0
  164. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/segment.py +0 -0
  165. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  166. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  167. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/pose.py +0 -0
  168. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/segment.py +0 -0
  169. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/node.py +0 -0
  170. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/nodes_fetcher.py +0 -0
  171. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/nodes_thread_pool.py +0 -0
  172. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/progress.py +0 -0
  173. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/py.typed +0 -0
  174. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/__init__.py +0 -0
  175. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/batch.py +0 -0
  176. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/dataset.py +0 -0
  177. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/dispatch.py +0 -0
  178. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/metrics.py +0 -0
  179. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/params.py +0 -0
  180. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/queue.py +0 -0
  181. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/schema.py +0 -0
  182. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/session.py +0 -0
  183. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/udf.py +0 -0
  184. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/utils.py +0 -0
  185. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/remote/__init__.py +0 -0
  186. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/remote/studio.py +0 -0
  187. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/__init__.py +0 -0
  188. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/default/__init__.py +0 -0
  189. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/default/base.py +0 -0
  190. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/__init__.py +0 -0
  191. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/aggregate.py +0 -0
  192. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/array.py +0 -0
  193. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/conditional.py +0 -0
  194. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/numeric.py +0 -0
  195. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/path.py +0 -0
  196. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/random.py +0 -0
  197. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/string.py +0 -0
  198. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/selectable.py +0 -0
  199. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  200. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/base.py +0 -0
  201. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/types.py +0 -0
  202. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/vector.py +0 -0
  203. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/types.py +0 -0
  204. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/utils.py +0 -0
  205. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/studio.py +0 -0
  206. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/telemetry.py +0 -0
  207. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/toolkit/__init__.py +0 -0
  208. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/toolkit/split.py +0 -0
  209. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/torch/__init__.py +0 -0
  210. {datachain-0.10.0 → datachain-0.11.0}/src/datachain/utils.py +0 -0
  211. {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  212. {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/entry_points.txt +0 -0
  213. {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/top_level.txt +0 -0
  214. {datachain-0.10.0 → datachain-0.11.0}/tests/__init__.py +0 -0
  215. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/__init__.py +0 -0
  216. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/conftest.py +0 -0
  217. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  218. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  219. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/.gitignore +0 -0
  220. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  221. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/test_datachain.py +0 -0
  222. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/test_ls.py +0 -0
  223. {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/test_version.py +0 -0
  224. {datachain-0.10.0 → datachain-0.11.0}/tests/conftest.py +0 -0
  225. {datachain-0.10.0 → datachain-0.11.0}/tests/data.py +0 -0
  226. {datachain-0.10.0 → datachain-0.11.0}/tests/examples/__init__.py +0 -0
  227. {datachain-0.10.0 → datachain-0.11.0}/tests/examples/test_examples.py +0 -0
  228. {datachain-0.10.0 → datachain-0.11.0}/tests/examples/test_wds_e2e.py +0 -0
  229. {datachain-0.10.0 → datachain-0.11.0}/tests/examples/wds_data.py +0 -0
  230. {datachain-0.10.0 → datachain-0.11.0}/tests/func/__init__.py +0 -0
  231. {datachain-0.10.0 → datachain-0.11.0}/tests/func/fake-service-account-credentials.json +0 -0
  232. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_catalog.py +0 -0
  233. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_client.py +0 -0
  234. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_cloud_transfer.py +0 -0
  235. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_data_storage.py +0 -0
  236. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_datachain_merge.py +0 -0
  237. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_dataset_query.py +0 -0
  238. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_datasets.py +0 -0
  239. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_feature_pickling.py +0 -0
  240. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_file.py +0 -0
  241. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_hf.py +0 -0
  242. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_listing.py +0 -0
  243. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_ls.py +0 -0
  244. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_meta_formats.py +0 -0
  245. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_metrics.py +0 -0
  246. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_pull.py +0 -0
  247. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_pytorch.py +0 -0
  248. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_query.py +0 -0
  249. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_session.py +0 -0
  250. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_toolkit.py +0 -0
  251. {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_warehouse.py +0 -0
  252. {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class.py +0 -0
  253. {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class_exception.py +0 -0
  254. {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class_parallel.py +0 -0
  255. {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  256. {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/name_len_slow.py +0 -0
  257. {datachain-0.10.0 → datachain-0.11.0}/tests/test_atomicity.py +0 -0
  258. {datachain-0.10.0 → datachain-0.11.0}/tests/test_cli_e2e.py +0 -0
  259. {datachain-0.10.0 → datachain-0.11.0}/tests/test_cli_studio.py +0 -0
  260. {datachain-0.10.0 → datachain-0.11.0}/tests/test_query_e2e.py +0 -0
  261. {datachain-0.10.0 → datachain-0.11.0}/tests/test_telemetry.py +0 -0
  262. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/__init__.py +0 -0
  263. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/__init__.py +0 -0
  264. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/conftest.py +0 -0
  265. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  266. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_arrow.py +0 -0
  267. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_clip.py +0 -0
  268. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_datachain.py +0 -0
  269. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  270. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  271. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_diff.py +0 -0
  272. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_feature.py +0 -0
  273. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_feature_utils.py +0 -0
  274. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_file.py +0 -0
  275. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_hf.py +0 -0
  276. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_image.py +0 -0
  277. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_listing_info.py +0 -0
  278. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_models.py +0 -0
  279. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  280. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_schema.py +0 -0
  281. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_signal_schema.py +0 -0
  282. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  283. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_text.py +0 -0
  284. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_udf_signature.py +0 -0
  285. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_utils.py +0 -0
  286. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_video.py +0 -0
  287. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_webdataset.py +0 -0
  288. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/__init__.py +0 -0
  289. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  290. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  291. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  292. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_array.py +0 -0
  293. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_conditional.py +0 -0
  294. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_path.py +0 -0
  295. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_random.py +0 -0
  296. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_selectable.py +0 -0
  297. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_string.py +0 -0
  298. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_asyn.py +0 -0
  299. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_cache.py +0 -0
  300. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_catalog.py +0 -0
  301. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_catalog_loader.py +0 -0
  302. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_cli_parsing.py +0 -0
  303. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_client.py +0 -0
  304. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_client_gcs.py +0 -0
  305. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_client_s3.py +0 -0
  306. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_config.py +0 -0
  307. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_data_storage.py +0 -0
  308. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_database_engine.py +0 -0
  309. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_dataset.py +0 -0
  310. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_dispatch.py +0 -0
  311. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_fileslice.py +0 -0
  312. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_func.py +0 -0
  313. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_listing.py +0 -0
  314. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_metastore.py +0 -0
  315. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_module_exports.py +0 -0
  316. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_pytorch.py +0 -0
  317. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_query.py +0 -0
  318. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_query_metrics.py +0 -0
  319. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_query_params.py +0 -0
  320. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_serializer.py +0 -0
  321. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_session.py +0 -0
  322. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_utils.py +0 -0
  323. {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_warehouse.py +0 -0
  324. {datachain-0.10.0 → datachain-0.11.0}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.9.6'
27
+ rev: 'v0.9.7'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.10.0
3
+ Version: 0.11.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -49,6 +49,7 @@ Requires-Dist: platformdirs
49
49
  Requires-Dist: dvc-studio-client<1,>=0.21
50
50
  Requires-Dist: tabulate
51
51
  Requires-Dist: websockets
52
+ Requires-Dist: tomli; python_version < "3.11"
52
53
  Provides-Extra: docs
53
54
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
54
55
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
@@ -102,7 +103,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
102
103
  Requires-Dist: defusedxml; extra == "examples"
103
104
  Requires-Dist: accelerate; extra == "examples"
104
105
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
105
- Requires-Dist: ultralytics==8.3.74; extra == "examples"
106
+ Requires-Dist: ultralytics==8.3.78; extra == "examples"
106
107
  Requires-Dist: open_clip_torch; extra == "examples"
107
108
 
108
109
  ================
@@ -51,7 +51,8 @@ dependencies = [
51
51
  "platformdirs",
52
52
  "dvc-studio-client>=0.21,<1",
53
53
  "tabulate",
54
- "websockets"
54
+ "websockets",
55
+ "tomli;python_version<'3.11'"
55
56
  ]
56
57
 
57
58
  [project.optional-dependencies]
@@ -118,7 +119,7 @@ examples = [
118
119
  "defusedxml",
119
120
  "accelerate",
120
121
  "huggingface_hub[hf_transfer]",
121
- "ultralytics==8.3.74",
122
+ "ultralytics==8.3.78",
122
123
  "open_clip_torch"
123
124
  ]
124
125
 
@@ -272,8 +272,12 @@ class File(DataModel):
272
272
  def save(self, destination: str):
273
273
  """Writes it's content to destination"""
274
274
  destination = stringify_path(destination)
275
- client: Client = self._catalog.get_client(str(destination))
276
- client.upload(self.read(), str(destination))
275
+ client: Client = self._catalog.get_client(destination)
276
+
277
+ if client.PREFIX == "file://" and not destination.startswith(client.PREFIX):
278
+ destination = Path(destination).absolute().as_uri()
279
+
280
+ client.upload(self.read(), destination)
277
281
 
278
282
  def _symlink_to(self, destination: str):
279
283
  if self.location:
@@ -0,0 +1,147 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from typing import Any, Optional
4
+
5
+ try:
6
+ import tomllib
7
+ except ModuleNotFoundError:
8
+ # tomllib is in standard library from python 3.11 so for earlier versions
9
+ # we need tomli
10
+ import tomli as tomllib # type: ignore[no-redef]
11
+
12
+
13
+ class ScriptConfigParsingError(Exception):
14
+ def __init__(self, message):
15
+ super().__init__(message)
16
+
17
+
18
+ @dataclass
19
+ class ScriptConfig:
20
+ """
21
+ Class that is parsing inline script metadata to get some basic information for
22
+ running datachain script like python version, dependencies, attachments etc.
23
+ Inline script metadata must follow the format described in https://packaging.python.org/en/latest/specifications/inline-script-metadata/#inline-script-metadata.
24
+ Example of script with inline metadata:
25
+ # /// script
26
+ # requires-python = ">=3.12"
27
+ #
28
+ # dependencies = [
29
+ # "pandas < 2.1.0",
30
+ # "numpy == 1.26.4"
31
+ # ]
32
+ #
33
+ # [tools.datachain.workers]
34
+ # num_workers = 3
35
+ #
36
+ # [tools.datachain.attachments]
37
+ # image1 = "s3://ldb-public/image1.jpg"
38
+ # file1 = "s3://ldb-public/file.pdf"
39
+ #
40
+ # [tools.datachain.params]
41
+ # min_length_sec = 1
42
+ # cache = false
43
+ #
44
+ # [tools.datachain.inputs]
45
+ # threshold = 0.5
46
+ # start_ds_name = "ds://start"
47
+ #
48
+ # [tools.datachain.outputs]
49
+ # result_dataset = "ds://res"
50
+ # result_dir = "/temp"
51
+ #
52
+ # ///
53
+
54
+ import sys
55
+ import pandas as pd
56
+
57
+ print(f"Python version: {sys.version_info}")
58
+ print(f"Pandas version: {pd.__version__}")
59
+
60
+ """
61
+
62
+ python_version: Optional[str]
63
+ dependencies: list[str]
64
+ attachments: dict[str, str]
65
+ params: dict[str, Any]
66
+ inputs: dict[str, Any]
67
+ outputs: dict[str, Any]
68
+ num_workers: Optional[int] = None
69
+
70
+ def __init__(
71
+ self,
72
+ python_version: Optional[str] = None,
73
+ dependencies: Optional[list[str]] = None,
74
+ attachments: Optional[dict[str, str]] = None,
75
+ params: Optional[dict[str, Any]] = None,
76
+ inputs: Optional[dict[str, Any]] = None,
77
+ outputs: Optional[dict[str, Any]] = None,
78
+ num_workers: Optional[int] = None,
79
+ ):
80
+ self.python_version = python_version
81
+ self.dependencies = dependencies or []
82
+ self.attachments = attachments or {}
83
+ self.params = params or {}
84
+ self.inputs = inputs or {}
85
+ self.outputs = outputs or {}
86
+ self.num_workers = num_workers
87
+
88
+ def get_param(self, name: str, default: Any) -> Any:
89
+ return self.params.get(name, default)
90
+
91
+ def get_input(self, name: str, default: Any) -> Any:
92
+ return self.inputs.get(name, default)
93
+
94
+ def get_output(self, name: str, default: Any) -> Any:
95
+ return self.outputs.get(name, default)
96
+
97
+ def get_attachment(self, name: str, default: Any) -> Any:
98
+ return self.attachments.get(name, default)
99
+
100
+ @staticmethod
101
+ def read(script: str) -> Optional[dict]:
102
+ """Converts inline script metadata to dict with all found data"""
103
+ regex = (
104
+ r"(?m)^# \/\/\/ (?P<type>[a-zA-Z0-9-]+)[ \t]*$[\r\n|\r|\n]"
105
+ "(?P<content>(?:^#(?:| .*)$[\r\n|\r|\n])+)^# \\/\\/\\/[ \t]*$"
106
+ )
107
+ name = "script"
108
+ matches = list(
109
+ filter(lambda m: m.group("type") == name, re.finditer(regex, script))
110
+ )
111
+ if len(matches) > 1:
112
+ raise ValueError(f"Multiple {name} blocks found")
113
+ if len(matches) == 1:
114
+ content = "".join(
115
+ line[2:] if line.startswith("# ") else line[1:]
116
+ for line in matches[0].group("content").splitlines(keepends=True)
117
+ )
118
+ return tomllib.loads(content)
119
+ return None
120
+
121
+ @staticmethod
122
+ def parse(script: str) -> Optional["ScriptConfig"]:
123
+ """
124
+ Method that is parsing inline script metadata from datachain script and
125
+ instantiating ScriptConfig class with found data. If no inline metadata is
126
+ found, it returns None
127
+ """
128
+ try:
129
+ meta = ScriptConfig.read(script)
130
+ if not meta:
131
+ return None
132
+ custom = meta.get("tools", {}).get("datachain", {})
133
+ return ScriptConfig(
134
+ python_version=meta.get("requires-python"),
135
+ dependencies=meta.get("dependencies"),
136
+ num_workers=custom.get("workers", {}).get("num_workers"),
137
+ attachments=custom.get("attachments"),
138
+ params={k: str(v) for k, v in custom.get("params").items()}
139
+ if custom.get("params")
140
+ else None,
141
+ inputs=custom.get("inputs"),
142
+ outputs=custom.get("outputs"),
143
+ )
144
+ except Exception as e:
145
+ raise ScriptConfigParsingError(
146
+ f"Error when parsing script meta: {e}"
147
+ ) from e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.10.0
3
+ Version: 0.11.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -49,6 +49,7 @@ Requires-Dist: platformdirs
49
49
  Requires-Dist: dvc-studio-client<1,>=0.21
50
50
  Requires-Dist: tabulate
51
51
  Requires-Dist: websockets
52
+ Requires-Dist: tomli; python_version < "3.11"
52
53
  Provides-Extra: docs
53
54
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
54
55
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
@@ -102,7 +103,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
102
103
  Requires-Dist: defusedxml; extra == "examples"
103
104
  Requires-Dist: accelerate; extra == "examples"
104
105
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
105
- Requires-Dist: ultralytics==8.3.74; extra == "examples"
106
+ Requires-Dist: ultralytics==8.3.78; extra == "examples"
106
107
  Requires-Dist: open_clip_torch; extra == "examples"
107
108
 
108
109
  ================
@@ -77,6 +77,7 @@ src/datachain/nodes_fetcher.py
77
77
  src/datachain/nodes_thread_pool.py
78
78
  src/datachain/progress.py
79
79
  src/datachain/py.typed
80
+ src/datachain/script_meta.py
80
81
  src/datachain/studio.py
81
82
  src/datachain/telemetry.py
82
83
  src/datachain/utils.py
@@ -279,6 +280,7 @@ tests/unit/test_pytorch.py
279
280
  tests/unit/test_query.py
280
281
  tests/unit/test_query_metrics.py
281
282
  tests/unit/test_query_params.py
283
+ tests/unit/test_script_meta.py
282
284
  tests/unit/test_serializer.py
283
285
  tests/unit/test_session.py
284
286
  tests/unit/test_utils.py
@@ -32,6 +32,9 @@ dvc-studio-client<1,>=0.21
32
32
  tabulate
33
33
  websockets
34
34
 
35
+ [:python_version < "3.11"]
36
+ tomli
37
+
35
38
  [dev]
36
39
  datachain[docs,tests]
37
40
  mypy==1.15.0
@@ -55,7 +58,7 @@ datachain[tests]
55
58
  defusedxml
56
59
  accelerate
57
60
  huggingface_hub[hf_transfer]
58
- ultralytics==8.3.74
61
+ ultralytics==8.3.78
59
62
  open_clip_torch
60
63
 
61
64
  [hf]
@@ -358,6 +358,27 @@ def test_export_images_files(test_session, tmp_dir, tmp_path, use_cache):
358
358
  assert images_equal(img["data"], exported_img)
359
359
 
360
360
 
361
+ def test_to_storage_relative_path(test_session, tmp_path):
362
+ images = [
363
+ {"name": "img1.jpg", "data": Image.new(mode="RGB", size=(64, 64))},
364
+ {"name": "img2.jpg", "data": Image.new(mode="RGB", size=(128, 128))},
365
+ ]
366
+
367
+ for img in images:
368
+ img["data"].save(tmp_path / img["name"])
369
+
370
+ DataChain.from_values(
371
+ file=[
372
+ ImageFile(path=img["name"], source=f"file://{tmp_path}") for img in images
373
+ ],
374
+ session=test_session,
375
+ ).to_storage("output", placement="filename")
376
+
377
+ for img in images:
378
+ exported_img = Image.open(Path("output") / img["name"])
379
+ assert images_equal(img["data"], exported_img)
380
+
381
+
361
382
  def test_to_storage_files_filename_placement_not_unique_files(tmp_dir, test_session):
362
383
  data = b"some\x00data\x00is\x48\x65\x6c\x57\x6f\x72\x6c\x64\xff\xffheRe"
363
384
  bucket_name = "mybucket"
@@ -0,0 +1,119 @@
1
+ import pytest
2
+
3
+ from datachain.script_meta import ScriptConfig, ScriptConfigParsingError
4
+
5
+
6
+ def test_parsing_all_fields():
7
+ script = """
8
+ # /// script
9
+ # requires-python = ">=3.12"
10
+ #
11
+ # dependencies = [
12
+ # "pandas < 2.1.0",
13
+ # "numpy == 1.26.4"
14
+ # ]
15
+ #
16
+ # [tools.datachain.workers]
17
+ # num_workers = 3
18
+ #
19
+ # [tools.datachain.attachments]
20
+ # image1 = "s3://ldb-public/image1.jpg"
21
+ # file1 = "s3://ldb-public/file.pdf"
22
+ #
23
+ # [tools.datachain.params]
24
+ # min_length_sec = 1
25
+ # cache = false
26
+ #
27
+ # [tools.datachain.inputs]
28
+ # threshold = 0.5
29
+ # start_ds_name = "ds://start"
30
+ #
31
+ # [tools.datachain.outputs]
32
+ # result_dataset = "ds://res"
33
+ # result_dir = "/temp"
34
+ #
35
+ # ///
36
+ import sys
37
+ import pandas as pd
38
+
39
+ print(f"Python version: {sys.version_info}")
40
+ print(f"Pandas version: {pd.__version__}")
41
+ """
42
+ sm = ScriptConfig.parse(script)
43
+ assert sm == ScriptConfig(
44
+ python_version=">=3.12",
45
+ dependencies=["pandas < 2.1.0", "numpy == 1.26.4"],
46
+ attachments={
47
+ "image1": "s3://ldb-public/image1.jpg",
48
+ "file1": "s3://ldb-public/file.pdf",
49
+ },
50
+ params={"min_length_sec": "1", "cache": "False"},
51
+ inputs={"threshold": 0.5, "start_ds_name": "ds://start"},
52
+ outputs={"result_dataset": "ds://res", "result_dir": "/temp"},
53
+ num_workers=3,
54
+ )
55
+ assert sm.get_param("non_existing", "default") == "default"
56
+
57
+
58
+ def test_parsing_no_metadata():
59
+ script = """
60
+ import sys
61
+ import pandas as pd
62
+
63
+ print(f"Python version: {sys.version_info}")
64
+ print(f"Pandas version: {pd.__version__}")
65
+ """
66
+
67
+ assert ScriptConfig.parse(script) is None
68
+
69
+
70
+ def test_parsing_empty():
71
+ script = """
72
+ # /// script
73
+ # ///
74
+ import sys
75
+ import pandas as pd
76
+
77
+ print(f"Python version: {sys.version_info}")
78
+ print(f"Pandas version: {pd.__version__}")
79
+ """
80
+
81
+ assert ScriptConfig.parse(script) is None
82
+
83
+
84
+ def test_parsing_only_python_version():
85
+ script = """
86
+ # /// script
87
+ # requires-python = ">=3.12"
88
+ # ///
89
+ import sys
90
+ import pandas as pd
91
+
92
+ print(f"Python version: {sys.version_info}")
93
+ print(f"Pandas version: {pd.__version__}")
94
+ """
95
+ assert ScriptConfig.parse(script) == ScriptConfig(
96
+ python_version=">=3.12",
97
+ dependencies=[],
98
+ attachments={},
99
+ params={},
100
+ num_workers=None,
101
+ )
102
+
103
+
104
+ def test_error_when_parsing():
105
+ script = """
106
+ # /// script
107
+ # dependencies = [}
108
+ # ///
109
+ import sys
110
+ import pandas as pd
111
+
112
+ print(f"Python version: {sys.version_info}")
113
+ print(f"Pandas version: {pd.__version__}")
114
+ """
115
+ with pytest.raises(ScriptConfigParsingError) as excinfo:
116
+ ScriptConfig.parse(script)
117
+ assert str(excinfo.value) == (
118
+ "Error when parsing script meta: Invalid value (at line 1, column 17)"
119
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes