datachain 0.7.8__py3-none-any.whl → 0.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli.py +9 -3
- datachain/client/fsspec.py +4 -2
- datachain/client/local.py +9 -4
- datachain/data_storage/metastore.py +3 -2
- datachain/func/__init__.py +4 -1
- datachain/func/numeric.py +46 -0
- datachain/func/string.py +46 -0
- datachain/lib/convert/flatten.py +7 -5
- datachain/lib/convert/unflatten.py +2 -2
- datachain/lib/convert/values_to_tuples.py +1 -1
- datachain/lib/dc.py +1 -0
- datachain/lib/pytorch.py +54 -37
- datachain/lib/utils.py +1 -1
- datachain/query/dataset.py +1 -1
- datachain/remote/studio.py +44 -25
- datachain/sql/functions/numeric.py +12 -0
- datachain/sql/functions/string.py +12 -0
- datachain/sql/sqlite/base.py +40 -0
- datachain/studio.py +2 -2
- datachain-0.7.10.dist-info/METADATA +207 -0
- {datachain-0.7.8.dist-info → datachain-0.7.10.dist-info}/RECORD +25 -25
- datachain-0.7.8.dist-info/METADATA +0 -488
- {datachain-0.7.8.dist-info → datachain-0.7.10.dist-info}/LICENSE +0 -0
- {datachain-0.7.8.dist-info → datachain-0.7.10.dist-info}/WHEEL +0 -0
- {datachain-0.7.8.dist-info → datachain-0.7.10.dist-info}/entry_points.txt +0 -0
- {datachain-0.7.8.dist-info → datachain-0.7.10.dist-info}/top_level.txt +0 -0
|
@@ -48,7 +48,19 @@ class replace(GenericFunction): # noqa: N801
|
|
|
48
48
|
inherit_cache = True
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
class byte_hamming_distance(GenericFunction): # noqa: N801
|
|
52
|
+
"""
|
|
53
|
+
Returns the Hamming distance between two strings.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
type = Int64()
|
|
57
|
+
package = "string"
|
|
58
|
+
name = "hamming_distance"
|
|
59
|
+
inherit_cache = True
|
|
60
|
+
|
|
61
|
+
|
|
51
62
|
compiler_not_implemented(length)
|
|
52
63
|
compiler_not_implemented(split)
|
|
53
64
|
compiler_not_implemented(regexp_replace)
|
|
54
65
|
compiler_not_implemented(replace)
|
|
66
|
+
compiler_not_implemented(byte_hamming_distance)
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -90,6 +90,7 @@ def setup():
|
|
|
90
90
|
compiles(string.split, "sqlite")(compile_string_split)
|
|
91
91
|
compiles(string.regexp_replace, "sqlite")(compile_string_regexp_replace)
|
|
92
92
|
compiles(string.replace, "sqlite")(compile_string_replace)
|
|
93
|
+
compiles(string.byte_hamming_distance, "sqlite")(compile_byte_hamming_distance)
|
|
93
94
|
compiles(conditional.greatest, "sqlite")(compile_greatest)
|
|
94
95
|
compiles(conditional.least, "sqlite")(compile_least)
|
|
95
96
|
compiles(Values, "sqlite")(compile_values)
|
|
@@ -104,6 +105,7 @@ def setup():
|
|
|
104
105
|
compiles(numeric.bit_rshift, "sqlite")(compile_bitwise_rshift)
|
|
105
106
|
compiles(numeric.bit_lshift, "sqlite")(compile_bitwise_lshift)
|
|
106
107
|
compiles(numeric.int_hash_64, "sqlite")(compile_int_hash_64)
|
|
108
|
+
compiles(numeric.bit_hamming_distance, "sqlite")(compile_bit_hamming_distance)
|
|
107
109
|
|
|
108
110
|
if load_usearch_extension(sqlite3.connect(":memory:")):
|
|
109
111
|
compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
|
|
@@ -191,6 +193,26 @@ def sqlite_int_hash_64(x: int) -> int:
|
|
|
191
193
|
return x if x < 1 << 63 else (x & MAX_INT64) - (1 << 64)
|
|
192
194
|
|
|
193
195
|
|
|
196
|
+
def sqlite_bit_hamming_distance(a: int, b: int) -> int:
|
|
197
|
+
"""Calculate the Hamming distance between two integers."""
|
|
198
|
+
diff = (a & MAX_INT64) ^ (b & MAX_INT64)
|
|
199
|
+
if hasattr(diff, "bit_count"):
|
|
200
|
+
return diff.bit_count()
|
|
201
|
+
return bin(diff).count("1")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def sqlite_byte_hamming_distance(a: str, b: str) -> int:
|
|
205
|
+
"""Calculate the Hamming distance between two strings."""
|
|
206
|
+
diff = 0
|
|
207
|
+
if len(a) < len(b):
|
|
208
|
+
diff = len(b) - len(a)
|
|
209
|
+
b = b[: len(a)]
|
|
210
|
+
elif len(b) < len(a):
|
|
211
|
+
diff = len(a) - len(b)
|
|
212
|
+
a = a[: len(b)]
|
|
213
|
+
return diff + sum(c1 != c2 for c1, c2 in zip(a, b))
|
|
214
|
+
|
|
215
|
+
|
|
194
216
|
def register_user_defined_sql_functions() -> None:
|
|
195
217
|
# Register optional functions if we have the necessary dependencies
|
|
196
218
|
# and otherwise register functions that will raise an exception with
|
|
@@ -225,6 +247,9 @@ def register_user_defined_sql_functions() -> None:
|
|
|
225
247
|
"bitwise_lshift", 2, lambda a, b: a << b, deterministic=True
|
|
226
248
|
)
|
|
227
249
|
conn.create_function("int_hash_64", 1, sqlite_int_hash_64, deterministic=True)
|
|
250
|
+
conn.create_function(
|
|
251
|
+
"bit_hamming_distance", 2, sqlite_bit_hamming_distance, deterministic=True
|
|
252
|
+
)
|
|
228
253
|
|
|
229
254
|
_registered_function_creators["numeric_functions"] = create_numeric_functions
|
|
230
255
|
|
|
@@ -237,6 +262,9 @@ def register_user_defined_sql_functions() -> None:
|
|
|
237
262
|
conn.create_function(
|
|
238
263
|
"regexp_replace", 3, sqlite_regexp_replace, deterministic=True
|
|
239
264
|
)
|
|
265
|
+
conn.create_function(
|
|
266
|
+
"byte_hamming_distance", 2, sqlite_byte_hamming_distance, deterministic=True
|
|
267
|
+
)
|
|
240
268
|
|
|
241
269
|
_registered_function_creators["string_functions"] = create_string_functions
|
|
242
270
|
|
|
@@ -383,6 +411,18 @@ def compile_int_hash_64(element, compiler, **kwargs):
|
|
|
383
411
|
return compiler.process(func.int_hash_64(*element.clauses.clauses), **kwargs)
|
|
384
412
|
|
|
385
413
|
|
|
414
|
+
def compile_bit_hamming_distance(element, compiler, **kwargs):
|
|
415
|
+
return compiler.process(
|
|
416
|
+
func.bit_hamming_distance(*element.clauses.clauses), **kwargs
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def compile_byte_hamming_distance(element, compiler, **kwargs):
|
|
421
|
+
return compiler.process(
|
|
422
|
+
func.byte_hamming_distance(*element.clauses.clauses), **kwargs
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
386
426
|
def py_json_array_length(arr):
|
|
387
427
|
return len(orjson.loads(arr))
|
|
388
428
|
|
datachain/studio.py
CHANGED
|
@@ -155,7 +155,7 @@ def edit_studio_dataset(
|
|
|
155
155
|
if not response.ok:
|
|
156
156
|
raise_remote_error(response.message)
|
|
157
157
|
|
|
158
|
-
print(f"Dataset {name} updated")
|
|
158
|
+
print(f"Dataset '{name}' updated in Studio")
|
|
159
159
|
|
|
160
160
|
|
|
161
161
|
def remove_studio_dataset(
|
|
@@ -169,7 +169,7 @@ def remove_studio_dataset(
|
|
|
169
169
|
if not response.ok:
|
|
170
170
|
raise_remote_error(response.message)
|
|
171
171
|
|
|
172
|
-
print(f"Dataset {name} removed")
|
|
172
|
+
print(f"Dataset '{name}' removed from Studio")
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
def save_config(hostname, token):
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: datachain
|
|
3
|
+
Version: 0.7.10
|
|
4
|
+
Summary: Wrangle unstructured AI data at scale
|
|
5
|
+
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Documentation, https://datachain.dvc.ai
|
|
8
|
+
Project-URL: Issues, https://github.com/iterative/datachain/issues
|
|
9
|
+
Project-URL: Source, https://github.com/iterative/datachain
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/x-rst
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: pyyaml
|
|
20
|
+
Requires-Dist: tomlkit
|
|
21
|
+
Requires-Dist: tqdm
|
|
22
|
+
Requires-Dist: numpy<3,>=1
|
|
23
|
+
Requires-Dist: pandas>=2.0.0
|
|
24
|
+
Requires-Dist: pyarrow
|
|
25
|
+
Requires-Dist: typing-extensions
|
|
26
|
+
Requires-Dist: python-dateutil>=2
|
|
27
|
+
Requires-Dist: attrs>=21.3.0
|
|
28
|
+
Requires-Dist: s3fs>=2024.2.0
|
|
29
|
+
Requires-Dist: gcsfs>=2024.2.0
|
|
30
|
+
Requires-Dist: adlfs>=2024.2.0
|
|
31
|
+
Requires-Dist: dvc-data<4,>=3.10
|
|
32
|
+
Requires-Dist: dvc-objects<6,>=4
|
|
33
|
+
Requires-Dist: shtab<2,>=1.3.4
|
|
34
|
+
Requires-Dist: sqlalchemy>=2
|
|
35
|
+
Requires-Dist: multiprocess==0.70.16
|
|
36
|
+
Requires-Dist: cloudpickle
|
|
37
|
+
Requires-Dist: orjson>=3.10.5
|
|
38
|
+
Requires-Dist: pydantic<3,>=2
|
|
39
|
+
Requires-Dist: jmespath>=1.0
|
|
40
|
+
Requires-Dist: datamodel-code-generator>=0.25
|
|
41
|
+
Requires-Dist: Pillow<12,>=10.0.0
|
|
42
|
+
Requires-Dist: msgpack<2,>=1.0.4
|
|
43
|
+
Requires-Dist: psutil
|
|
44
|
+
Requires-Dist: huggingface_hub
|
|
45
|
+
Requires-Dist: iterative-telemetry>=0.0.9
|
|
46
|
+
Requires-Dist: platformdirs
|
|
47
|
+
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
48
|
+
Requires-Dist: tabulate
|
|
49
|
+
Provides-Extra: docs
|
|
50
|
+
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocs-material>=9.3.1; extra == "docs"
|
|
53
|
+
Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
|
|
56
|
+
Provides-Extra: torch
|
|
57
|
+
Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
58
|
+
Requires-Dist: torchvision; extra == "torch"
|
|
59
|
+
Requires-Dist: transformers>=4.36.0; extra == "torch"
|
|
60
|
+
Provides-Extra: remote
|
|
61
|
+
Requires-Dist: lz4; extra == "remote"
|
|
62
|
+
Requires-Dist: requests>=2.22.0; extra == "remote"
|
|
63
|
+
Provides-Extra: vector
|
|
64
|
+
Requires-Dist: usearch; extra == "vector"
|
|
65
|
+
Provides-Extra: hf
|
|
66
|
+
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
67
|
+
Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
|
|
68
|
+
Provides-Extra: tests
|
|
69
|
+
Requires-Dist: datachain[hf,remote,torch,vector]; extra == "tests"
|
|
70
|
+
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
71
|
+
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
72
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
73
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
74
|
+
Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
|
|
75
|
+
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
76
|
+
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
77
|
+
Requires-Dist: virtualenv; extra == "tests"
|
|
78
|
+
Requires-Dist: dulwich; extra == "tests"
|
|
79
|
+
Requires-Dist: hypothesis; extra == "tests"
|
|
80
|
+
Requires-Dist: open_clip_torch; extra == "tests"
|
|
81
|
+
Requires-Dist: aiotools>=1.7.0; extra == "tests"
|
|
82
|
+
Requires-Dist: requests-mock; extra == "tests"
|
|
83
|
+
Requires-Dist: scipy; extra == "tests"
|
|
84
|
+
Provides-Extra: dev
|
|
85
|
+
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
86
|
+
Requires-Dist: mypy==1.13.0; extra == "dev"
|
|
87
|
+
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
88
|
+
Requires-Dist: types-pytz; extra == "dev"
|
|
89
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
90
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
91
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
92
|
+
Provides-Extra: examples
|
|
93
|
+
Requires-Dist: datachain[tests]; extra == "examples"
|
|
94
|
+
Requires-Dist: numpy<2,>=1; extra == "examples"
|
|
95
|
+
Requires-Dist: defusedxml; extra == "examples"
|
|
96
|
+
Requires-Dist: accelerate; extra == "examples"
|
|
97
|
+
Requires-Dist: unstructured[embed-huggingface,pdf]<0.16.0; extra == "examples"
|
|
98
|
+
Requires-Dist: pdfplumber==0.11.4; extra == "examples"
|
|
99
|
+
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
100
|
+
Requires-Dist: onnx==1.16.1; extra == "examples"
|
|
101
|
+
Requires-Dist: ultralytics==8.3.37; extra == "examples"
|
|
102
|
+
|
|
103
|
+
================
|
|
104
|
+
|logo| DataChain
|
|
105
|
+
================
|
|
106
|
+
|
|
107
|
+
|PyPI| |Python Version| |Codecov| |Tests|
|
|
108
|
+
|
|
109
|
+
.. |logo| image:: docs/assets/datachain.svg
|
|
110
|
+
:height: 24
|
|
111
|
+
.. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
|
|
112
|
+
:target: https://pypi.org/project/datachain/
|
|
113
|
+
:alt: PyPI
|
|
114
|
+
.. |Python Version| image:: https://img.shields.io/pypi/pyversions/datachain
|
|
115
|
+
:target: https://pypi.org/project/datachain
|
|
116
|
+
:alt: Python Version
|
|
117
|
+
.. |Codecov| image:: https://codecov.io/gh/iterative/datachain/graph/badge.svg?token=byliXGGyGB
|
|
118
|
+
:target: https://codecov.io/gh/iterative/datachain
|
|
119
|
+
:alt: Codecov
|
|
120
|
+
.. |Tests| image:: https://github.com/iterative/datachain/actions/workflows/tests.yml/badge.svg
|
|
121
|
+
:target: https://github.com/iterative/datachain/actions/workflows/tests.yml
|
|
122
|
+
:alt: Tests
|
|
123
|
+
|
|
124
|
+
DataChain is a Python-based AI-data warehouse for transforming and analyzing unstructured
|
|
125
|
+
data like images, audio, videos, text and PDFs. It integrates with external storage
|
|
126
|
+
(e.g. S3) to process data efficiently without data duplication and manages metadata
|
|
127
|
+
in an internal database for easy and efficient querying.
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
Use Cases
|
|
131
|
+
=========
|
|
132
|
+
|
|
133
|
+
1. **ETL.** Pythonic framework for describing and running unstructured data transformations
|
|
134
|
+
and enrichments, applying models to data, including LLMs.
|
|
135
|
+
2. **Analytics.** DataChain dataset is a table that combines all the information about data
|
|
136
|
+
objects in one place + it provides dataframe-like API and vecrorized engine to do analytics
|
|
137
|
+
on these tables at scale.
|
|
138
|
+
3. **Versioning.** DataChain doesn't store, require moving or copying data (unlike DVC).
|
|
139
|
+
Perfect use case is a bucket with thousands or millions of images, videos, audio, PDFs.
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
Key Features
|
|
143
|
+
============
|
|
144
|
+
|
|
145
|
+
📂 **Multimodal Dataset Versioning.**
|
|
146
|
+
- Version unstructured data without moving or creating data copies, by supporting
|
|
147
|
+
references to S3, GCP, Azure, and local file systems.
|
|
148
|
+
- Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
|
|
149
|
+
- Unite files and metadata together into persistent, versioned, columnar datasets.
|
|
150
|
+
|
|
151
|
+
🐍 **Python-friendly.**
|
|
152
|
+
- Operate on Python objects and object fields: float scores, strings, matrixes,
|
|
153
|
+
LLM response objects.
|
|
154
|
+
- Run Python code in a high-scale, terabytes size datasets, with built-in
|
|
155
|
+
parallelization and memory-efficient computing — no SQL or Spark required.
|
|
156
|
+
|
|
157
|
+
🧠 **Data Enrichment and Processing.**
|
|
158
|
+
- Generate metadata using local AI models and LLM APIs.
|
|
159
|
+
- Filter, join, and group datasets by metadata. Search by vector embeddings.
|
|
160
|
+
- High-performance vectorized operations on Python objects: sum, count, avg, etc.
|
|
161
|
+
- Pass datasets to Pytorch and Tensorflow, or export them back into storage.
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
Getting Started
|
|
165
|
+
===============
|
|
166
|
+
|
|
167
|
+
Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ to get started with `DataChain` and learn more.
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
Contributing
|
|
171
|
+
============
|
|
172
|
+
|
|
173
|
+
Contributions are very welcome. To learn more, see the `Contributor Guide`_.
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
Community and Support
|
|
177
|
+
=====================
|
|
178
|
+
|
|
179
|
+
* `Docs <https://docs.datachain.ai/>`_
|
|
180
|
+
* `File an issue`_ if you encounter any problems
|
|
181
|
+
* `Discord Chat <https://dvc.org/chat>`_
|
|
182
|
+
* `Email <mailto:support@dvc.org>`_
|
|
183
|
+
* `Twitter <https://twitter.com/DVCorg>`_
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
DataChain Studio Platform
|
|
187
|
+
=========================
|
|
188
|
+
|
|
189
|
+
`DataChain Studio`_ is a proprietary solution for teams that offers:
|
|
190
|
+
|
|
191
|
+
- **Centralized dataset registry** to manage data, code and dependency
|
|
192
|
+
dependencies in one place.
|
|
193
|
+
- **Data Lineage** for data sources as well as derivative dataset.
|
|
194
|
+
- **UI for Multimodal Data** like images, videos, and PDFs.
|
|
195
|
+
- **Scalable Compute** to handle large datasets (100M+ files) and in-house
|
|
196
|
+
AI model inference.
|
|
197
|
+
- **Access control** including SSO and team based collaboration.
|
|
198
|
+
|
|
199
|
+
.. _PyPI: https://pypi.org/
|
|
200
|
+
.. _file an issue: https://github.com/iterative/datachain/issues
|
|
201
|
+
.. github-only
|
|
202
|
+
.. _Contributor Guide: https://docs.datachain.ai/contributing
|
|
203
|
+
.. _Pydantic: https://github.com/pydantic/pydantic
|
|
204
|
+
.. _publicly available: https://radar.kit.edu/radar/en/dataset/FdJmclKpjHzLfExE.ExpBot%2B-%2BA%2Bdataset%2Bof%2B79%2Bdialogs%2Bwith%2Ban%2Bexperimental%2Bcustomer%2Bservice%2Bchatbot
|
|
205
|
+
.. _SQLite: https://www.sqlite.org/
|
|
206
|
+
.. _Getting Started: https://docs.datachain.ai/
|
|
207
|
+
.. _DataChain Studio: https://studio.datachain.ai/
|
|
@@ -2,7 +2,7 @@ datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
|
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
|
|
4
4
|
datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
|
|
5
|
-
datachain/cli.py,sha256=
|
|
5
|
+
datachain/cli.py,sha256=wQiYQ_qSVCGvS06pkknT9_FIBdFRzBdeRusW9uXE3vQ,42505
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
8
8
|
datachain/dataset.py,sha256=P-pDBgvPqJGDhq_I7fwCfb6hY8E8mIAO8Q0NT7SNlNE,19128
|
|
@@ -14,7 +14,7 @@ datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,11
|
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
15
15
|
datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
|
|
16
16
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
datachain/studio.py,sha256=
|
|
17
|
+
datachain/studio.py,sha256=Hr0Ha0kou0so4i8i-gWiXC1AYlJ2arI1D55cc7mi3tg,7253
|
|
18
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
19
19
|
datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
|
|
20
20
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
@@ -24,36 +24,36 @@ datachain/catalog/loader.py,sha256=HA_mBC7q_My8j2WnSvIjUGuJpl6SIdg5vvy_lagxJlA,5
|
|
|
24
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
25
25
|
datachain/client/azure.py,sha256=ffxs26zm6KLAL1aUWJm-vtzuZP3LSNha7UDGXynMBKo,2234
|
|
26
26
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
27
|
-
datachain/client/fsspec.py,sha256=
|
|
27
|
+
datachain/client/fsspec.py,sha256=kf1blSGNcEXJ0tra3y5i35jc1aAy-67wMHXkqjlRMXg,12736
|
|
28
28
|
datachain/client/gcs.py,sha256=cnTIr5GS6dbYOEYfqehhyQu3dr6XNjPHSg5U3FkivUk,4124
|
|
29
29
|
datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
|
|
30
|
-
datachain/client/local.py,sha256=
|
|
30
|
+
datachain/client/local.py,sha256=f2HBqWH8SQM5CyiJ0ljfePVROg2FszWaAn6E2c8RiLE,4596
|
|
31
31
|
datachain/client/s3.py,sha256=CVHBUZ1Ic2Q3370nl-Bbe69phuWjFlrVv9dTJKBpRT0,6019
|
|
32
32
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
33
33
|
datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
|
|
34
34
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
|
-
datachain/data_storage/metastore.py,sha256=
|
|
35
|
+
datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
|
|
36
36
|
datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
|
|
37
37
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
38
|
datachain/data_storage/sqlite.py,sha256=D_ZQ0PHmZzHO2dinv4naVJocUDIZUwV4WAz692C1cyk,22521
|
|
39
39
|
datachain/data_storage/warehouse.py,sha256=tjIkU-5JywBR0apCyqTcwSyaRtGxhu2L7IVjrz-55uc,30802
|
|
40
|
-
datachain/func/__init__.py,sha256=
|
|
40
|
+
datachain/func/__init__.py,sha256=TG6JHFKtLi06Nd5iLszXIflEq-VKZcKMdgo_KiQ8SGQ,1055
|
|
41
41
|
datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
|
|
42
42
|
datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
|
|
43
43
|
datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
|
|
44
44
|
datachain/func/conditional.py,sha256=mQroxsoExpBW84Zm5dAYP4OpBblWmzfnF2qJq9rba54,2223
|
|
45
45
|
datachain/func/func.py,sha256=mJ_rOXMpoqnK4-d5eF9boSMx5hWzgKoMLPGpZQqLAfw,15222
|
|
46
|
-
datachain/func/numeric.py,sha256=
|
|
46
|
+
datachain/func/numeric.py,sha256=gMe1Ks0dqQKHkjcpvj7I5S-neECzQ_gltPQLNoaWOyo,5632
|
|
47
47
|
datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
|
|
48
48
|
datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
|
|
49
|
-
datachain/func/string.py,sha256=
|
|
49
|
+
datachain/func/string.py,sha256=8az3BTeezlaZt6NW-54GWX7WSosAOVMbTr6bXIYyJq4,5958
|
|
50
50
|
datachain/func/window.py,sha256=0MB1yjpVbwOrl_WNLZ8V3jkJz3o0XlYinpAcZQJuxiA,1688
|
|
51
51
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
datachain/lib/arrow.py,sha256=b5efxAUaNNYVwtXVJqj07D3zf5KC-BPlLCxKEZbEG6w,9429
|
|
53
53
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
54
54
|
datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
|
|
55
55
|
datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
|
|
56
|
-
datachain/lib/dc.py,sha256=
|
|
56
|
+
datachain/lib/dc.py,sha256=xqLR4IH_mbuet0FsxBHDsRUg-zR6tO8UZdLQQTLG8EE,89533
|
|
57
57
|
datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
|
|
58
58
|
datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
|
|
59
59
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
@@ -61,23 +61,23 @@ datachain/lib/listing.py,sha256=cVkCp7TRVpcZKSx-Bbk9t51bQI9Mw0o86W6ZPhAsuzM,3667
|
|
|
61
61
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
62
62
|
datachain/lib/meta_formats.py,sha256=anK2bDVbaeCCh0yvKUBaW2MVos3zRgdaSV8uSduzPcU,6680
|
|
63
63
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
64
|
-
datachain/lib/pytorch.py,sha256=
|
|
64
|
+
datachain/lib/pytorch.py,sha256=QMJO_OGEMvBi2x71vGcG25agLzNwyLmF4Qx5iILlwaM,6350
|
|
65
65
|
datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
|
|
66
66
|
datachain/lib/signal_schema.py,sha256=_uh19nCKhiD9ua8oIN1Q8R9iYv1BZAuqTJCLYVmyW8k,24557
|
|
67
67
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
68
68
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
69
69
|
datachain/lib/udf.py,sha256=-j0krjNAELTqRI0dB1N65AmawtcIY5vN---AuUcW8Us,13637
|
|
70
70
|
datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
|
|
71
|
-
datachain/lib/utils.py,sha256=
|
|
71
|
+
datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
|
|
72
72
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
73
|
datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
|
|
74
74
|
datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
|
|
75
75
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
|
-
datachain/lib/convert/flatten.py,sha256=
|
|
76
|
+
datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1NY4,1505
|
|
77
77
|
datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
|
|
78
78
|
datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
|
|
79
|
-
datachain/lib/convert/unflatten.py,sha256=
|
|
80
|
-
datachain/lib/convert/values_to_tuples.py,sha256=
|
|
79
|
+
datachain/lib/convert/unflatten.py,sha256=5RLIEB7utQFcXlyUIRGqu6VtmAN4N4whlslpO7xMQyI,2026
|
|
80
|
+
datachain/lib/convert/values_to_tuples.py,sha256=EFfIGBiVVltJQG8blzsQ1dGXneh4D3wdLfSUeoK10OI,3931
|
|
81
81
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
82
82
|
datachain/model/bbox.py,sha256=1Li1G3RdiQwLOAc2Mak2nQU0bcvdH-lXmXtA984CUWM,3154
|
|
83
83
|
datachain/model/pose.py,sha256=q9NgB8h66aKnYnLi7Pyf9bU-F_90W4cbvtSO3-_hkdk,3078
|
|
@@ -88,7 +88,7 @@ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15p
|
|
|
88
88
|
datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
|
|
89
89
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
90
90
|
datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
|
|
91
|
-
datachain/query/dataset.py,sha256=
|
|
91
|
+
datachain/query/dataset.py,sha256=eXr9fJz2grX2evmkmsiH0Xeqajd8gFnujmt_USMxy0c,54563
|
|
92
92
|
datachain/query/dispatch.py,sha256=fZ0TgGFRcsrYh1iXQoZVjkUl4Xetom9PSHoeDes3IRs,11606
|
|
93
93
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
94
94
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -96,7 +96,7 @@ datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
|
96
96
|
datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
|
|
97
97
|
datachain/query/session.py,sha256=vvLIJ5b8eElovHLAWq_CZJXmN5t7C7iAZA7x9wPPOms,5905
|
|
98
98
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
|
-
datachain/remote/studio.py,sha256=
|
|
99
|
+
datachain/remote/studio.py,sha256=WiK6fpRAw0a6Dth4XXI0YInEHH4gDU7AUHHDNd3wJzg,11616
|
|
100
100
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
101
101
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
102
102
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -107,20 +107,20 @@ datachain/sql/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
107
107
|
datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
|
|
108
108
|
datachain/sql/functions/array.py,sha256=Zq59CaMHf_hFapU4kxvy2mwteH344k5Wksxja4MfBks,1204
|
|
109
109
|
datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
|
|
110
|
-
datachain/sql/functions/numeric.py,sha256=
|
|
110
|
+
datachain/sql/functions/numeric.py,sha256=BK2KCiPSgM2IveCq-9M_PG3CtPBlztaS9TTn1LGzyLs,1250
|
|
111
111
|
datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
|
|
112
112
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
113
|
-
datachain/sql/functions/string.py,sha256=
|
|
113
|
+
datachain/sql/functions/string.py,sha256=E-T9OIzUR-GKaLgjZsEtg5CJrY_sLf1lt1awTvY7w2w,1426
|
|
114
114
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
115
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
115
|
+
datachain/sql/sqlite/base.py,sha256=E2PK3hoGlHey1eEjcReXRrI-c_ASr3AmAXaNYKDY_o8,18634
|
|
116
116
|
datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
|
|
117
117
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
118
118
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
119
119
|
datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
|
|
120
120
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
121
|
-
datachain-0.7.
|
|
122
|
-
datachain-0.7.
|
|
123
|
-
datachain-0.7.
|
|
124
|
-
datachain-0.7.
|
|
125
|
-
datachain-0.7.
|
|
126
|
-
datachain-0.7.
|
|
121
|
+
datachain-0.7.10.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
122
|
+
datachain-0.7.10.dist-info/METADATA,sha256=qtw_rToRdmR9-CO6MFCAGv6NWJJ87C95iQaDEnDE4H8,8371
|
|
123
|
+
datachain-0.7.10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
124
|
+
datachain-0.7.10.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
125
|
+
datachain-0.7.10.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
126
|
+
datachain-0.7.10.dist-info/RECORD,,
|