datachain 0.30.1__tar.gz → 0.30.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (410) hide show
  1. {datachain-0.30.1 → datachain-0.30.2}/.github/workflows/tests.yml +18 -0
  2. {datachain-0.30.1 → datachain-0.30.2}/.gitignore +4 -0
  3. {datachain-0.30.1 → datachain-0.30.2}/PKG-INFO +5 -2
  4. {datachain-0.30.1 → datachain-0.30.2}/pyproject.toml +9 -2
  5. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/database.py +37 -16
  6. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/datachain.py +5 -0
  7. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/__init__.py +2 -0
  8. datachain-0.30.2/src/datachain/sql/postgresql_dialect.py +9 -0
  9. datachain-0.30.2/src/datachain/sql/postgresql_types.py +21 -0
  10. datachain-0.30.2/src/datachain/sql/sqlite/__init__.py +11 -0
  11. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/sqlite/base.py +6 -1
  12. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/types.py +32 -8
  13. {datachain-0.30.1 → datachain-0.30.2}/src/datachain.egg-info/PKG-INFO +5 -2
  14. {datachain-0.30.1 → datachain-0.30.2}/src/datachain.egg-info/SOURCES.txt +2 -0
  15. {datachain-0.30.1 → datachain-0.30.2}/src/datachain.egg-info/requires.txt +5 -1
  16. {datachain-0.30.1 → datachain-0.30.2}/tests/examples/test_examples.py +1 -0
  17. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_to_database.py +474 -298
  18. datachain-0.30.1/src/datachain/sql/sqlite/__init__.py +0 -7
  19. {datachain-0.30.1 → datachain-0.30.2}/.cruft.json +0 -0
  20. {datachain-0.30.1 → datachain-0.30.2}/.gitattributes +0 -0
  21. {datachain-0.30.1 → datachain-0.30.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  22. {datachain-0.30.1 → datachain-0.30.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  23. {datachain-0.30.1 → datachain-0.30.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  24. {datachain-0.30.1 → datachain-0.30.2}/.github/codecov.yaml +0 -0
  25. {datachain-0.30.1 → datachain-0.30.2}/.github/dependabot.yml +0 -0
  26. {datachain-0.30.1 → datachain-0.30.2}/.github/workflows/benchmarks.yml +0 -0
  27. {datachain-0.30.1 → datachain-0.30.2}/.github/workflows/release.yml +0 -0
  28. {datachain-0.30.1 → datachain-0.30.2}/.github/workflows/tests-studio.yml +0 -0
  29. {datachain-0.30.1 → datachain-0.30.2}/.github/workflows/update-template.yaml +0 -0
  30. {datachain-0.30.1 → datachain-0.30.2}/.pre-commit-config.yaml +0 -0
  31. {datachain-0.30.1 → datachain-0.30.2}/CODE_OF_CONDUCT.rst +0 -0
  32. {datachain-0.30.1 → datachain-0.30.2}/LICENSE +0 -0
  33. {datachain-0.30.1 → datachain-0.30.2}/README.rst +0 -0
  34. {datachain-0.30.1 → datachain-0.30.2}/docs/assets/captioned_cartoons.png +0 -0
  35. {datachain-0.30.1 → datachain-0.30.2}/docs/assets/datachain-white.svg +0 -0
  36. {datachain-0.30.1 → datachain-0.30.2}/docs/assets/datachain.svg +0 -0
  37. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/auth/login.md +0 -0
  38. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/auth/logout.md +0 -0
  39. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/auth/team.md +0 -0
  40. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/auth/token.md +0 -0
  41. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/index.md +0 -0
  42. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/job/cancel.md +0 -0
  43. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/job/clusters.md +0 -0
  44. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/job/logs.md +0 -0
  45. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/job/ls.md +0 -0
  46. {datachain-0.30.1 → datachain-0.30.2}/docs/commands/job/run.md +0 -0
  47. {datachain-0.30.1 → datachain-0.30.2}/docs/contributing.md +0 -0
  48. {datachain-0.30.1 → datachain-0.30.2}/docs/css/github-permalink-style.css +0 -0
  49. {datachain-0.30.1 → datachain-0.30.2}/docs/examples.md +0 -0
  50. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/db_migrations.md +0 -0
  51. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/delta.md +0 -0
  52. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/env.md +0 -0
  53. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/index.md +0 -0
  54. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/namespaces.md +0 -0
  55. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/processing.md +0 -0
  56. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/remotes.md +0 -0
  57. {datachain-0.30.1 → datachain-0.30.2}/docs/guide/retry.md +0 -0
  58. {datachain-0.30.1 → datachain-0.30.2}/docs/index.md +0 -0
  59. {datachain-0.30.1 → datachain-0.30.2}/docs/overrides/main.html +0 -0
  60. {datachain-0.30.1 → datachain-0.30.2}/docs/quick-start.md +0 -0
  61. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/arrowrow.md +0 -0
  62. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/bbox.md +0 -0
  63. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/file.md +0 -0
  64. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/imagefile.md +0 -0
  65. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/index.md +0 -0
  66. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/pose.md +0 -0
  67. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/segment.md +0 -0
  68. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/tarvfile.md +0 -0
  69. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/textfile.md +0 -0
  70. {datachain-0.30.1 → datachain-0.30.2}/docs/references/data-types/videofile.md +0 -0
  71. {datachain-0.30.1 → datachain-0.30.2}/docs/references/datachain.md +0 -0
  72. {datachain-0.30.1 → datachain-0.30.2}/docs/references/func.md +0 -0
  73. {datachain-0.30.1 → datachain-0.30.2}/docs/references/index.md +0 -0
  74. {datachain-0.30.1 → datachain-0.30.2}/docs/references/toolkit.md +0 -0
  75. {datachain-0.30.1 → datachain-0.30.2}/docs/references/torch.md +0 -0
  76. {datachain-0.30.1 → datachain-0.30.2}/docs/references/udf.md +0 -0
  77. {datachain-0.30.1 → datachain-0.30.2}/docs/tutorials.md +0 -0
  78. {datachain-0.30.1 → datachain-0.30.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  79. {datachain-0.30.1 → datachain-0.30.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  80. {datachain-0.30.1 → datachain-0.30.2}/examples/computer_vision/openimage-detect.py +0 -0
  81. {datachain-0.30.1 → datachain-0.30.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  82. {datachain-0.30.1 → datachain-0.30.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  83. {datachain-0.30.1 → datachain-0.30.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  84. {datachain-0.30.1 → datachain-0.30.2}/examples/get_started/common_sql_functions.py +0 -0
  85. {datachain-0.30.1 → datachain-0.30.2}/examples/get_started/json-csv-reader.py +0 -0
  86. {datachain-0.30.1 → datachain-0.30.2}/examples/get_started/torch-loader.py +0 -0
  87. {datachain-0.30.1 → datachain-0.30.2}/examples/get_started/udfs/parallel.py +0 -0
  88. {datachain-0.30.1 → datachain-0.30.2}/examples/get_started/udfs/simple.py +0 -0
  89. {datachain-0.30.1 → datachain-0.30.2}/examples/get_started/udfs/stateful.py +0 -0
  90. {datachain-0.30.1 → datachain-0.30.2}/examples/incremental_processing/delta.py +0 -0
  91. {datachain-0.30.1 → datachain-0.30.2}/examples/incremental_processing/retry.py +0 -0
  92. {datachain-0.30.1 → datachain-0.30.2}/examples/incremental_processing/utils.py +0 -0
  93. {datachain-0.30.1 → datachain-0.30.2}/examples/llm_and_nlp/claude-query.py +0 -0
  94. {datachain-0.30.1 → datachain-0.30.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  95. {datachain-0.30.1 → datachain-0.30.2}/examples/multimodal/audio-to-text.py +0 -0
  96. {datachain-0.30.1 → datachain-0.30.2}/examples/multimodal/clip_inference.py +0 -0
  97. {datachain-0.30.1 → datachain-0.30.2}/examples/multimodal/hf_pipeline.py +0 -0
  98. {datachain-0.30.1 → datachain-0.30.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  99. {datachain-0.30.1 → datachain-0.30.2}/examples/multimodal/wds.py +0 -0
  100. {datachain-0.30.1 → datachain-0.30.2}/examples/multimodal/wds_filtered.py +0 -0
  101. {datachain-0.30.1 → datachain-0.30.2}/mkdocs.yml +0 -0
  102. {datachain-0.30.1 → datachain-0.30.2}/noxfile.py +0 -0
  103. {datachain-0.30.1 → datachain-0.30.2}/setup.cfg +0 -0
  104. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/__init__.py +0 -0
  105. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/__main__.py +0 -0
  106. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/asyn.py +0 -0
  107. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cache.py +0 -0
  108. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/catalog/__init__.py +0 -0
  109. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/catalog/catalog.py +0 -0
  110. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/catalog/datasource.py +0 -0
  111. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/catalog/loader.py +0 -0
  112. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/__init__.py +0 -0
  113. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/__init__.py +0 -0
  114. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/datasets.py +0 -0
  115. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/du.py +0 -0
  116. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/index.py +0 -0
  117. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/ls.py +0 -0
  118. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/misc.py +0 -0
  119. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/query.py +0 -0
  120. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/commands/show.py +0 -0
  121. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/parser/__init__.py +0 -0
  122. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/parser/job.py +0 -0
  123. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/parser/studio.py +0 -0
  124. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/parser/utils.py +0 -0
  125. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/cli/utils.py +0 -0
  126. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/__init__.py +0 -0
  127. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/azure.py +0 -0
  128. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/fileslice.py +0 -0
  129. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/fsspec.py +0 -0
  130. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/gcs.py +0 -0
  131. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/hf.py +0 -0
  132. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/local.py +0 -0
  133. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/client/s3.py +0 -0
  134. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/config.py +0 -0
  135. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/__init__.py +0 -0
  136. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/db_engine.py +0 -0
  137. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/job.py +0 -0
  138. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/metastore.py +0 -0
  139. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/schema.py +0 -0
  140. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/serializer.py +0 -0
  141. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/sqlite.py +0 -0
  142. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/data_storage/warehouse.py +0 -0
  143. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/dataset.py +0 -0
  144. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/delta.py +0 -0
  145. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/diff/__init__.py +0 -0
  146. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/error.py +0 -0
  147. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/fs/__init__.py +0 -0
  148. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/fs/reference.py +0 -0
  149. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/fs/utils.py +0 -0
  150. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/__init__.py +0 -0
  151. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/aggregate.py +0 -0
  152. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/array.py +0 -0
  153. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/base.py +0 -0
  154. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/conditional.py +0 -0
  155. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/func.py +0 -0
  156. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/numeric.py +0 -0
  157. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/path.py +0 -0
  158. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/random.py +0 -0
  159. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/string.py +0 -0
  160. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/func/window.py +0 -0
  161. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/job.py +0 -0
  162. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/__init__.py +0 -0
  163. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/arrow.py +0 -0
  164. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/audio.py +0 -0
  165. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/clip.py +0 -0
  166. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/convert/__init__.py +0 -0
  167. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/convert/flatten.py +0 -0
  168. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  169. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  170. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/convert/unflatten.py +0 -0
  171. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  172. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/data_model.py +0 -0
  173. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dataset_info.py +0 -0
  174. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/__init__.py +0 -0
  175. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/csv.py +0 -0
  176. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/datasets.py +0 -0
  177. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/hf.py +0 -0
  178. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/json.py +0 -0
  179. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/listings.py +0 -0
  180. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/pandas.py +0 -0
  181. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/parquet.py +0 -0
  182. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/records.py +0 -0
  183. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/storage.py +0 -0
  184. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/utils.py +0 -0
  185. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/dc/values.py +0 -0
  186. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/file.py +0 -0
  187. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/hf.py +0 -0
  188. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/image.py +0 -0
  189. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/listing.py +0 -0
  190. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/listing_info.py +0 -0
  191. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/meta_formats.py +0 -0
  192. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/model_store.py +0 -0
  193. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/namespaces.py +0 -0
  194. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/projects.py +0 -0
  195. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/pytorch.py +0 -0
  196. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/settings.py +0 -0
  197. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/signal_schema.py +0 -0
  198. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/tar.py +0 -0
  199. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/text.py +0 -0
  200. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/udf.py +0 -0
  201. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/udf_signature.py +0 -0
  202. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/utils.py +0 -0
  203. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/video.py +0 -0
  204. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/webdataset.py +0 -0
  205. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/lib/webdataset_laion.py +0 -0
  206. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/listing.py +0 -0
  207. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/__init__.py +0 -0
  208. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/bbox.py +0 -0
  209. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/pose.py +0 -0
  210. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/segment.py +0 -0
  211. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  212. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  213. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/ultralytics/pose.py +0 -0
  214. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/ultralytics/segment.py +0 -0
  215. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/model/utils.py +0 -0
  216. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/namespace.py +0 -0
  217. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/node.py +0 -0
  218. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/nodes_fetcher.py +0 -0
  219. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/nodes_thread_pool.py +0 -0
  220. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/progress.py +0 -0
  221. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/project.py +0 -0
  222. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/py.typed +0 -0
  223. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/__init__.py +0 -0
  224. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/batch.py +0 -0
  225. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/dataset.py +0 -0
  226. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/dispatch.py +0 -0
  227. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/metrics.py +0 -0
  228. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/params.py +0 -0
  229. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/queue.py +0 -0
  230. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/schema.py +0 -0
  231. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/session.py +0 -0
  232. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/udf.py +0 -0
  233. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/query/utils.py +0 -0
  234. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/remote/__init__.py +0 -0
  235. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/remote/studio.py +0 -0
  236. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/script_meta.py +0 -0
  237. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/semver.py +0 -0
  238. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/default/__init__.py +0 -0
  239. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/default/base.py +0 -0
  240. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/__init__.py +0 -0
  241. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/aggregate.py +0 -0
  242. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/array.py +0 -0
  243. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/conditional.py +0 -0
  244. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/numeric.py +0 -0
  245. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/path.py +0 -0
  246. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/random.py +0 -0
  247. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/functions/string.py +0 -0
  248. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/selectable.py +0 -0
  249. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/sqlite/types.py +0 -0
  250. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/sqlite/vector.py +0 -0
  251. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/sql/utils.py +0 -0
  252. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/studio.py +0 -0
  253. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/telemetry.py +0 -0
  254. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/toolkit/__init__.py +0 -0
  255. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/toolkit/split.py +0 -0
  256. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/torch/__init__.py +0 -0
  257. {datachain-0.30.1 → datachain-0.30.2}/src/datachain/utils.py +0 -0
  258. {datachain-0.30.1 → datachain-0.30.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  259. {datachain-0.30.1 → datachain-0.30.2}/src/datachain.egg-info/entry_points.txt +0 -0
  260. {datachain-0.30.1 → datachain-0.30.2}/src/datachain.egg-info/top_level.txt +0 -0
  261. {datachain-0.30.1 → datachain-0.30.2}/tests/__init__.py +0 -0
  262. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/__init__.py +0 -0
  263. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/conftest.py +0 -0
  264. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  265. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  266. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/datasets/.gitignore +0 -0
  267. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  268. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/test_datachain.py +0 -0
  269. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/test_ls.py +0 -0
  270. {datachain-0.30.1 → datachain-0.30.2}/tests/benchmarks/test_version.py +0 -0
  271. {datachain-0.30.1 → datachain-0.30.2}/tests/conftest.py +0 -0
  272. {datachain-0.30.1 → datachain-0.30.2}/tests/data.py +0 -0
  273. {datachain-0.30.1 → datachain-0.30.2}/tests/examples/__init__.py +0 -0
  274. {datachain-0.30.1 → datachain-0.30.2}/tests/examples/test_wds_e2e.py +0 -0
  275. {datachain-0.30.1 → datachain-0.30.2}/tests/examples/wds_data.py +0 -0
  276. {datachain-0.30.1 → datachain-0.30.2}/tests/func/__init__.py +0 -0
  277. {datachain-0.30.1 → datachain-0.30.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  278. {datachain-0.30.1 → datachain-0.30.2}/tests/func/data/lena.jpg +0 -0
  279. {datachain-0.30.1 → datachain-0.30.2}/tests/func/fake-service-account-credentials.json +0 -0
  280. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/__init__.py +0 -0
  281. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_aggregate.py +0 -0
  282. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_array.py +0 -0
  283. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_conditional.py +0 -0
  284. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_numeric.py +0 -0
  285. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_path.py +0 -0
  286. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_random.py +0 -0
  287. {datachain-0.30.1 → datachain-0.30.2}/tests/func/functions/test_string.py +0 -0
  288. {datachain-0.30.1 → datachain-0.30.2}/tests/func/model/__init__.py +0 -0
  289. {datachain-0.30.1 → datachain-0.30.2}/tests/func/model/data/running-mask0.png +0 -0
  290. {datachain-0.30.1 → datachain-0.30.2}/tests/func/model/data/running-mask1.png +0 -0
  291. {datachain-0.30.1 → datachain-0.30.2}/tests/func/model/data/running.jpg +0 -0
  292. {datachain-0.30.1 → datachain-0.30.2}/tests/func/model/data/ships.jpg +0 -0
  293. {datachain-0.30.1 → datachain-0.30.2}/tests/func/model/test_yolo.py +0 -0
  294. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_audio.py +0 -0
  295. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_batching.py +0 -0
  296. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_catalog.py +0 -0
  297. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_client.py +0 -0
  298. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_cloud_transfer.py +0 -0
  299. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_data_storage.py +0 -0
  300. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_datachain.py +0 -0
  301. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_datachain_merge.py +0 -0
  302. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_dataset_query.py +0 -0
  303. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_datasets.py +0 -0
  304. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_delta.py +0 -0
  305. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_feature_pickling.py +0 -0
  306. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_file.py +0 -0
  307. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_hf.py +0 -0
  308. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_hidden_field.py +0 -0
  309. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_image.py +0 -0
  310. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_listing.py +0 -0
  311. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_ls.py +0 -0
  312. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_meta_formats.py +0 -0
  313. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_metastore.py +0 -0
  314. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_metrics.py +0 -0
  315. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_pull.py +0 -0
  316. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_pytorch.py +0 -0
  317. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_query.py +0 -0
  318. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_read_database.py +0 -0
  319. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_read_dataset_remote.py +0 -0
  320. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  321. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_retry.py +0 -0
  322. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_session.py +0 -0
  323. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_studio_datetime_parsing.py +0 -0
  324. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_toolkit.py +0 -0
  325. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_video.py +0 -0
  326. {datachain-0.30.1 → datachain-0.30.2}/tests/func/test_warehouse.py +0 -0
  327. {datachain-0.30.1 → datachain-0.30.2}/tests/scripts/feature_class.py +0 -0
  328. {datachain-0.30.1 → datachain-0.30.2}/tests/scripts/feature_class_exception.py +0 -0
  329. {datachain-0.30.1 → datachain-0.30.2}/tests/scripts/feature_class_parallel.py +0 -0
  330. {datachain-0.30.1 → datachain-0.30.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  331. {datachain-0.30.1 → datachain-0.30.2}/tests/scripts/name_len_slow.py +0 -0
  332. {datachain-0.30.1 → datachain-0.30.2}/tests/test_atomicity.py +0 -0
  333. {datachain-0.30.1 → datachain-0.30.2}/tests/test_cli_e2e.py +0 -0
  334. {datachain-0.30.1 → datachain-0.30.2}/tests/test_cli_studio.py +0 -0
  335. {datachain-0.30.1 → datachain-0.30.2}/tests/test_import_time.py +0 -0
  336. {datachain-0.30.1 → datachain-0.30.2}/tests/test_query_e2e.py +0 -0
  337. {datachain-0.30.1 → datachain-0.30.2}/tests/test_telemetry.py +0 -0
  338. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/__init__.py +0 -0
  339. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/__init__.py +0 -0
  340. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/conftest.py +0 -0
  341. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_arrow.py +0 -0
  342. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_audio.py +0 -0
  343. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_clip.py +0 -0
  344. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_datachain.py +0 -0
  345. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  346. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  347. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_diff.py +0 -0
  348. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_feature.py +0 -0
  349. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_feature_utils.py +0 -0
  350. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_file.py +0 -0
  351. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_hf.py +0 -0
  352. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_image.py +0 -0
  353. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_listing_info.py +0 -0
  354. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_namespace.py +0 -0
  355. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_partition_by.py +0 -0
  356. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_project.py +0 -0
  357. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  358. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_schema.py +0 -0
  359. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_settings.py +0 -0
  360. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_signal_schema.py +0 -0
  361. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  362. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_text.py +0 -0
  363. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_udf.py +0 -0
  364. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_udf_signature.py +0 -0
  365. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_utils.py +0 -0
  366. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/lib/test_webdataset.py +0 -0
  367. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/model/__init__.py +0 -0
  368. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/model/test_bbox.py +0 -0
  369. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/model/test_pose.py +0 -0
  370. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/model/test_segment.py +0 -0
  371. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/model/test_utils.py +0 -0
  372. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/__init__.py +0 -0
  373. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  374. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  375. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  376. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/test_array.py +0 -0
  377. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/test_conditional.py +0 -0
  378. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/test_path.py +0 -0
  379. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/test_random.py +0 -0
  380. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/test_selectable.py +0 -0
  381. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/sql/test_string.py +0 -0
  382. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_asyn.py +0 -0
  383. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_cache.py +0 -0
  384. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_catalog.py +0 -0
  385. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_catalog_loader.py +0 -0
  386. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_cli_parsing.py +0 -0
  387. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_client.py +0 -0
  388. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_client_gcs.py +0 -0
  389. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_client_s3.py +0 -0
  390. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_config.py +0 -0
  391. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_data_storage.py +0 -0
  392. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_database_engine.py +0 -0
  393. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_dataset.py +0 -0
  394. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_dispatch.py +0 -0
  395. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_fileslice.py +0 -0
  396. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_func.py +0 -0
  397. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_listing.py +0 -0
  398. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_metastore.py +0 -0
  399. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_module_exports.py +0 -0
  400. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_pytorch.py +0 -0
  401. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_query.py +0 -0
  402. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_query_metrics.py +0 -0
  403. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_query_params.py +0 -0
  404. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_script_meta.py +0 -0
  405. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_semver.py +0 -0
  406. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_serializer.py +0 -0
  407. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_session.py +0 -0
  408. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_utils.py +0 -0
  409. {datachain-0.30.1 → datachain-0.30.2}/tests/unit/test_warehouse.py +0 -0
  410. {datachain-0.30.1 → datachain-0.30.2}/tests/utils.py +0 -0
@@ -78,6 +78,24 @@ jobs:
78
78
  fetch-depth: 0
79
79
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
80
80
 
81
+ - name: Setup PostgreSQL
82
+ if: runner.os != 'Windows'
83
+ uses: ikalnytskyi/action-setup-postgres@10ab8a56cc77b4823c2bfa57b1d4dd5605ef0481 # v7
84
+ with:
85
+ username: test
86
+ password: test
87
+ database: test_datachain
88
+ port: 5432
89
+ postgres-version: "17"
90
+ id: postgres
91
+
92
+ - name: Set PostgreSQL URI
93
+ if: runner.os != 'Windows'
94
+ run: |
95
+ FULL_URI="${{ steps.postgres.outputs.connection-uri }}"
96
+ echo "TEST_POSTGRES_URI=${FULL_URI%/*}" >> "$GITHUB_ENV"
97
+ shell: bash
98
+
81
99
  - name: Set up Python ${{ matrix.pyv }}
82
100
  uses: actions/setup-python@v5
83
101
  with:
@@ -145,3 +145,7 @@ cython_debug/
145
145
  *.pt
146
146
 
147
147
  .DS_Store/
148
+
149
+ # for local dev, e.g. LLM generated files, .env.test to override
150
+ # test variables, local scripts to try, etc
151
+ local/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.1
3
+ Version: 0.30.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -81,8 +81,10 @@ Provides-Extra: video
81
81
  Requires-Dist: ffmpeg-python; extra == "video"
82
82
  Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
83
83
  Requires-Dist: opencv-python; extra == "video"
84
+ Provides-Extra: postgres
85
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
84
86
  Provides-Extra: tests
85
- Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
87
+ Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
86
88
  Requires-Dist: pytest<9,>=8; extra == "tests"
87
89
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
88
90
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
@@ -90,6 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
90
92
  Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
91
93
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
92
94
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
95
+ Requires-Dist: pytest-env>=1.1.0; extra == "tests"
93
96
  Requires-Dist: virtualenv; extra == "tests"
94
97
  Requires-Dist: dulwich; extra == "tests"
95
98
  Requires-Dist: hypothesis; extra == "tests"
@@ -96,8 +96,11 @@ video = [
96
96
  "imageio[ffmpeg,pyav]>=2.37.0",
97
97
  "opencv-python"
98
98
  ]
99
+ postgres = [
100
+ "psycopg2-binary>=2.9.0"
101
+ ]
99
102
  tests = [
100
- "datachain[torch,audio,remote,vector,hf,video]",
103
+ "datachain[torch,audio,remote,vector,hf,video,postgres]",
101
104
  "pytest>=8,<9",
102
105
  "pytest-sugar>=0.9.6",
103
106
  "pytest-cov>=4.1.0",
@@ -105,6 +108,7 @@ tests = [
105
108
  "pytest-servers[all]>=0.5.9",
106
109
  "pytest-benchmark[histogram]",
107
110
  "pytest-xdist>=3.3.1",
111
+ "pytest-env>=1.1.0",
108
112
  "virtualenv",
109
113
  "dulwich",
110
114
  "hypothesis",
@@ -150,13 +154,16 @@ namespaces = false
150
154
 
151
155
  [tool.pytest.ini_options]
152
156
  addopts = "-rfEs -m 'not examples' --benchmark-skip"
157
+ env_override_existing_values = true
158
+ env_files = "local/.env.test"
153
159
  markers = [
154
160
  "e2e: End-to-end tests",
155
161
  "examples: All examples",
156
162
  "computer_vision: Computer vision examples",
157
163
  "get_started: Get started examples",
158
164
  "llm_and_nlp: LLM and NLP examples",
159
- "multimodal: Multimodal examples"
165
+ "multimodal: Multimodal examples",
166
+ "incremental_processing: Delta and retry examples"
160
167
  ]
161
168
  filterwarnings = [
162
169
  "error::pandas.errors.PerformanceWarning",
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union
7
7
  import sqlalchemy
8
8
 
9
9
  from datachain.query.schema import ColumnMeta
10
+ from datachain.utils import batched
10
11
 
11
12
  DEFAULT_DATABASE_BATCH_SIZE = 10_000
12
13
 
@@ -74,6 +75,7 @@ def to_database(
74
75
  *,
75
76
  batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
76
77
  on_conflict: Optional[str] = None,
78
+ conflict_columns: Optional[list[str]] = None,
77
79
  column_mapping: Optional[dict[str, Optional[str]]] = None,
78
80
  ) -> None:
79
81
  """
@@ -82,8 +84,6 @@ def to_database(
82
84
  This is the core implementation that handles the actual database operations.
83
85
  For user-facing documentation, see DataChain.to_database() method.
84
86
  """
85
- from datachain.utils import batched
86
-
87
87
  if on_conflict and on_conflict not in ("ignore", "update"):
88
88
  raise ValueError(
89
89
  f"on_conflict must be 'ignore' or 'update', got: {on_conflict}"
@@ -105,19 +105,26 @@ def to_database(
105
105
  metadata = sqlalchemy.MetaData()
106
106
  table = sqlalchemy.Table(table_name, metadata, *columns)
107
107
 
108
- # Check if table already exists to determine if we should clean up on error.
109
- inspector = sqlalchemy.inspect(conn)
110
- assert inspector # to satisfy mypy
111
- table_existed_before = table_name in inspector.get_table_names()
112
-
108
+ table_existed_before = False
113
109
  try:
114
- table.create(conn, checkfirst=True)
115
- rows_iter = chain._leaf_values()
116
- for batch in batched(rows_iter, batch_rows):
117
- _process_batch(
118
- conn, table, batch, on_conflict, column_indices_and_names
119
- )
120
- conn.commit()
110
+ with conn.begin():
111
+ # Check if table exists to determine if we should clean up on error.
112
+ inspector = sqlalchemy.inspect(conn)
113
+ assert inspector # to satisfy mypy
114
+ table_existed_before = table_name in inspector.get_table_names()
115
+
116
+ table.create(conn, checkfirst=True)
117
+
118
+ rows_iter = chain._leaf_values()
119
+ for batch in batched(rows_iter, batch_rows):
120
+ _process_batch(
121
+ conn,
122
+ table,
123
+ batch,
124
+ on_conflict,
125
+ conflict_columns,
126
+ column_indices_and_names,
127
+ )
121
128
  except Exception:
122
129
  if not table_existed_before:
123
130
  try:
@@ -183,7 +190,9 @@ def _prepare_columns(all_columns, column_mapping):
183
190
  return column_indices_and_names, columns
184
191
 
185
192
 
186
- def _process_batch(conn, table, batch, on_conflict, column_indices_and_names):
193
+ def _process_batch(
194
+ conn, table, batch, on_conflict, conflict_columns, column_indices_and_names
195
+ ):
187
196
  """Process a batch of rows with conflict resolution."""
188
197
 
189
198
  def prepare_row(row_values):
@@ -217,7 +226,19 @@ def _process_batch(conn, table, batch, on_conflict, column_indices_and_names):
217
226
  update_values = {
218
227
  col.name: insert_stmt.excluded[col.name] for col in table.columns
219
228
  }
220
- insert_stmt = insert_stmt.on_conflict_do_update(set_=update_values)
229
+ if conn.engine.name == "postgresql":
230
+ if not conflict_columns:
231
+ raise ValueError(
232
+ "conflict_columns parameter is required when "
233
+ "on_conflict='update' with PostgreSQL. Specify the column "
234
+ "names that form a unique constraint."
235
+ )
236
+
237
+ insert_stmt = insert_stmt.on_conflict_do_update(
238
+ index_elements=conflict_columns, set_=update_values
239
+ )
240
+ else:
241
+ insert_stmt = insert_stmt.on_conflict_do_update(set_=update_values)
221
242
  elif on_conflict:
222
243
  import warnings
223
244
 
@@ -2296,6 +2296,7 @@ class DataChain:
2296
2296
  *,
2297
2297
  batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
2298
2298
  on_conflict: Optional[str] = None,
2299
+ conflict_columns: Optional[list[str]] = None,
2299
2300
  column_mapping: Optional[dict[str, Optional[str]]] = None,
2300
2301
  ) -> None:
2301
2302
  """Save chain to a database table using a given database connection.
@@ -2319,6 +2320,9 @@ class DataChain:
2319
2320
  (default)
2320
2321
  - "ignore": Skip duplicate rows silently
2321
2322
  - "update": Update existing rows with new values
2323
+ conflict_columns: List of column names that form a unique constraint
2324
+ for conflict resolution. Required when on_conflict='update' and
2325
+ using PostgreSQL.
2322
2326
  column_mapping: Optional mapping to rename or skip columns:
2323
2327
  - Dict mapping DataChain column names to database column names
2324
2328
  - Set values to None to skip columns entirely, or use `defaultdict` to
@@ -2377,6 +2381,7 @@ class DataChain:
2377
2381
  connection,
2378
2382
  batch_rows=batch_rows,
2379
2383
  on_conflict=on_conflict,
2384
+ conflict_columns=conflict_columns,
2380
2385
  column_mapping=column_mapping,
2381
2386
  )
2382
2387
 
@@ -1,6 +1,8 @@
1
1
  from sqlalchemy.sql.elements import literal
2
2
  from sqlalchemy.sql.expression import column
3
3
 
4
+ # Import PostgreSQL dialect registration (registers PostgreSQL type converter)
5
+ from . import postgresql_dialect # noqa: F401
4
6
  from .default import setup as default_setup
5
7
  from .selectable import select, values
6
8
 
@@ -0,0 +1,9 @@
1
+ """
2
+ PostgreSQL dialect registration for DataChain.
3
+ """
4
+
5
+ from datachain.sql.postgresql_types import PostgreSQLTypeConverter
6
+ from datachain.sql.types import register_backend_types
7
+
8
+ # Register PostgreSQL type converter
9
+ register_backend_types("postgresql", PostgreSQLTypeConverter())
@@ -0,0 +1,21 @@
1
+ """
2
+ PostgreSQL-specific type converter for DataChain.
3
+
4
+ Handles PostgreSQL-specific type mappings that differ from the default dialect.
5
+ """
6
+
7
+ from sqlalchemy.dialects import postgresql
8
+
9
+ from datachain.sql.types import TypeConverter
10
+
11
+
12
+ class PostgreSQLTypeConverter(TypeConverter):
13
+ """PostgreSQL-specific type converter."""
14
+
15
+ def datetime(self):
16
+ """PostgreSQL uses TIMESTAMP WITH TIME ZONE to preserve timezone information."""
17
+ return postgresql.TIMESTAMP(timezone=True)
18
+
19
+ def json(self):
20
+ """PostgreSQL uses JSONB for better performance and query capabilities."""
21
+ return postgresql.JSONB()
@@ -0,0 +1,11 @@
1
+ from .base import (
2
+ create_user_defined_sql_functions,
3
+ setup,
4
+ sqlite_dialect,
5
+ )
6
+
7
+ __all__ = [
8
+ "create_user_defined_sql_functions",
9
+ "setup",
10
+ "sqlite_dialect",
11
+ ]
@@ -304,7 +304,11 @@ def register_user_defined_sql_functions() -> None:
304
304
 
305
305
 
306
306
  def adapt_datetime(val: datetime) -> str:
307
- if not (val.tzinfo is timezone.utc or val.tzname() == "UTC"):
307
+ is_utc_check = val.tzinfo is timezone.utc
308
+ tzname_check = val.tzname() == "UTC"
309
+ combined_check = is_utc_check or tzname_check
310
+
311
+ if not combined_check:
308
312
  try:
309
313
  val = val.astimezone(timezone.utc)
310
314
  except (OverflowError, ValueError, OSError):
@@ -314,6 +318,7 @@ def adapt_datetime(val: datetime) -> str:
314
318
  val = datetime.min.replace(tzinfo=timezone.utc)
315
319
  else:
316
320
  raise
321
+
317
322
  return val.replace(tzinfo=None).isoformat(" ")
318
323
 
319
324
 
@@ -58,9 +58,14 @@ def converter(dialect) -> "TypeConverter":
58
58
  try:
59
59
  return registry[name]
60
60
  except KeyError:
61
- raise ValueError(
62
- f"No type converter registered for dialect: {dialect.name!r}"
63
- ) from None
61
+ # Fall back to default converter if specific dialect not found
62
+ try:
63
+ return registry["default"]
64
+ except KeyError:
65
+ raise ValueError(
66
+ f"No type converter registered for dialect: {dialect.name!r} "
67
+ f"and no default converter available"
68
+ ) from None
64
69
 
65
70
 
66
71
  def read_converter(dialect) -> "TypeReadConverter":
@@ -68,9 +73,14 @@ def read_converter(dialect) -> "TypeReadConverter":
68
73
  try:
69
74
  return read_converter_registry[name]
70
75
  except KeyError:
71
- raise ValueError(
72
- f"No read type converter registered for dialect: {dialect.name!r}"
73
- ) from None
76
+ # Fall back to default converter if specific dialect not found
77
+ try:
78
+ return read_converter_registry["default"]
79
+ except KeyError:
80
+ raise ValueError(
81
+ f"No read type converter registered for dialect: {dialect.name!r} "
82
+ f"and no default converter available"
83
+ ) from None
74
84
 
75
85
 
76
86
  def type_defaults(dialect) -> "TypeDefaults":
@@ -78,7 +88,14 @@ def type_defaults(dialect) -> "TypeDefaults":
78
88
  try:
79
89
  return type_defaults_registry[name]
80
90
  except KeyError:
81
- raise ValueError(f"No type defaults registered for dialect: {name!r}") from None
91
+ # Fall back to default converter if specific dialect not found
92
+ try:
93
+ return type_defaults_registry["default"]
94
+ except KeyError:
95
+ raise ValueError(
96
+ f"No type defaults registered for dialect: {dialect.name!r} "
97
+ f"and no default converter available"
98
+ ) from None
82
99
 
83
100
 
84
101
  def db_defaults(dialect) -> "DBDefaults":
@@ -86,7 +103,14 @@ def db_defaults(dialect) -> "DBDefaults":
86
103
  try:
87
104
  return db_defaults_registry[name]
88
105
  except KeyError:
89
- raise ValueError(f"No DB defaults registered for dialect: {name!r}") from None
106
+ # Fall back to default converter if specific dialect not found
107
+ try:
108
+ return db_defaults_registry["default"]
109
+ except KeyError:
110
+ raise ValueError(
111
+ f"No DB defaults registered for dialect: {dialect.name!r} "
112
+ f"and no default converter available"
113
+ ) from None
90
114
 
91
115
 
92
116
  class SQLType(TypeDecorator):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.1
3
+ Version: 0.30.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -81,8 +81,10 @@ Provides-Extra: video
81
81
  Requires-Dist: ffmpeg-python; extra == "video"
82
82
  Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
83
83
  Requires-Dist: opencv-python; extra == "video"
84
+ Provides-Extra: postgres
85
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
84
86
  Provides-Extra: tests
85
- Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
87
+ Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
86
88
  Requires-Dist: pytest<9,>=8; extra == "tests"
87
89
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
88
90
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
@@ -90,6 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
90
92
  Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
91
93
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
92
94
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
95
+ Requires-Dist: pytest-env>=1.1.0; extra == "tests"
93
96
  Requires-Dist: virtualenv; extra == "tests"
94
97
  Requires-Dist: dulwich; extra == "tests"
95
98
  Requires-Dist: hypothesis; extra == "tests"
@@ -231,6 +231,8 @@ src/datachain/query/utils.py
231
231
  src/datachain/remote/__init__.py
232
232
  src/datachain/remote/studio.py
233
233
  src/datachain/sql/__init__.py
234
+ src/datachain/sql/postgresql_dialect.py
235
+ src/datachain/sql/postgresql_types.py
234
236
  src/datachain/sql/selectable.py
235
237
  src/datachain/sql/types.py
236
238
  src/datachain/sql/utils.py
@@ -76,12 +76,15 @@ fsspec>=2024.12.0
76
76
  [hf:sys_platform == "linux" or sys_platform == "darwin"]
77
77
  datasets[audio]>=4.0.0
78
78
 
79
+ [postgres]
80
+ psycopg2-binary>=2.9.0
81
+
79
82
  [remote]
80
83
  lz4
81
84
  requests>=2.22.0
82
85
 
83
86
  [tests]
84
- datachain[audio,hf,remote,torch,vector,video]
87
+ datachain[audio,hf,postgres,remote,torch,vector,video]
85
88
  pytest<9,>=8
86
89
  pytest-sugar>=0.9.6
87
90
  pytest-cov>=4.1.0
@@ -89,6 +92,7 @@ pytest-mock>=3.12.0
89
92
  pytest-servers[all]>=0.5.9
90
93
  pytest-benchmark[histogram]
91
94
  pytest-xdist>=3.3.1
95
+ pytest-env>=1.1.0
92
96
  virtualenv
93
97
  dulwich
94
98
  hypothesis
@@ -93,6 +93,7 @@ def test_multimodal(example):
93
93
 
94
94
 
95
95
  @pytest.mark.examples
96
+ @pytest.mark.incremental_processing
96
97
  @pytest.mark.parametrize("example", incremental_processing_examples)
97
98
  def test_incremental_processing_examples(example):
98
99
  smoke_test(example)