datachain 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/sql/types.py CHANGED
@@ -17,6 +17,7 @@ from datetime import datetime
17
17
  from types import MappingProxyType
18
18
  from typing import Any, Union
19
19
 
20
+ import sqlalchemy as sa
20
21
  from sqlalchemy import TypeDecorator, types
21
22
 
22
23
  _registry: dict[str, "TypeConverter"] = {}
@@ -28,6 +29,9 @@ read_converter_registry = MappingProxyType(_read_converter_registry)
28
29
  _type_defaults_registry: dict[str, "TypeDefaults"] = {}
29
30
  type_defaults_registry = MappingProxyType(_type_defaults_registry)
30
31
 
32
+ _db_defaults_registry: dict[str, "DBDefaults"] = {}
33
+ db_defaults_registry = MappingProxyType(_db_defaults_registry)
34
+
31
35
  NullType = types.NullType
32
36
 
33
37
 
@@ -43,6 +47,10 @@ def register_type_defaults(dialect_name: str, td: "TypeDefaults"):
43
47
  _type_defaults_registry[dialect_name] = td
44
48
 
45
49
 
50
+ def register_db_defaults(dialect_name: str, dbd: "DBDefaults"):
51
+ _db_defaults_registry[dialect_name] = dbd
52
+
53
+
46
54
  def converter(dialect) -> "TypeConverter":
47
55
  name = dialect.name
48
56
  try:
@@ -71,6 +79,14 @@ def type_defaults(dialect) -> "TypeDefaults":
71
79
  raise ValueError(f"No type defaults registered for dialect: {name!r}") from None
72
80
 
73
81
 
82
+ def db_defaults(dialect) -> "DBDefaults":
83
+ name = dialect.name
84
+ try:
85
+ return db_defaults_registry[name]
86
+ except KeyError:
87
+ raise ValueError(f"No DB defaults registered for dialect: {name!r}") from None
88
+
89
+
74
90
  class SQLType(TypeDecorator):
75
91
  impl: type[types.TypeEngine[Any]] = types.TypeEngine
76
92
  cache_ok = True
@@ -97,6 +113,10 @@ class String(SQLType):
97
113
  def default_value(dialect):
98
114
  return type_defaults(dialect).string()
99
115
 
116
+ @staticmethod
117
+ def db_default_value(dialect):
118
+ return db_defaults(dialect).string()
119
+
100
120
  def on_read_convert(self, value, dialect):
101
121
  return read_converter(dialect).string(value)
102
122
 
@@ -115,6 +135,10 @@ class Boolean(SQLType):
115
135
  def default_value(dialect):
116
136
  return type_defaults(dialect).boolean()
117
137
 
138
+ @staticmethod
139
+ def db_default_value(dialect):
140
+ return db_defaults(dialect).boolean()
141
+
118
142
  def on_read_convert(self, value, dialect):
119
143
  return read_converter(dialect).boolean(value)
120
144
 
@@ -133,6 +157,10 @@ class Int(SQLType):
133
157
  def default_value(dialect):
134
158
  return type_defaults(dialect).int()
135
159
 
160
+ @staticmethod
161
+ def db_default_value(dialect):
162
+ return db_defaults(dialect).int()
163
+
136
164
  def on_read_convert(self, value, dialect):
137
165
  return read_converter(dialect).int(value)
138
166
 
@@ -145,6 +173,10 @@ class Int32(Int):
145
173
  def default_value(dialect):
146
174
  return type_defaults(dialect).int32()
147
175
 
176
+ @staticmethod
177
+ def db_default_value(dialect):
178
+ return db_defaults(dialect).int32()
179
+
148
180
  def on_read_convert(self, value, dialect):
149
181
  return read_converter(dialect).int32(value)
150
182
 
@@ -157,6 +189,10 @@ class Int64(Int):
157
189
  def default_value(dialect):
158
190
  return type_defaults(dialect).int64()
159
191
 
192
+ @staticmethod
193
+ def db_default_value(dialect):
194
+ return db_defaults(dialect).int64()
195
+
160
196
  def on_read_convert(self, value, dialect):
161
197
  return read_converter(dialect).int64(value)
162
198
 
@@ -169,12 +205,16 @@ class UInt64(Int):
169
205
  def default_value(dialect):
170
206
  return type_defaults(dialect).uint64()
171
207
 
208
+ @staticmethod
209
+ def db_default_value(dialect):
210
+ return db_defaults(dialect).uint64()
211
+
172
212
  def on_read_convert(self, value, dialect):
173
213
  return read_converter(dialect).uint64(value)
174
214
 
175
215
 
176
216
  class Float(SQLType):
177
- impl = types.INTEGER
217
+ impl = types.FLOAT
178
218
 
179
219
  @property
180
220
  def python_type(self):
@@ -187,6 +227,10 @@ class Float(SQLType):
187
227
  def default_value(dialect):
188
228
  return type_defaults(dialect).float()
189
229
 
230
+ @staticmethod
231
+ def db_default_value(dialect):
232
+ return db_defaults(dialect).float()
233
+
190
234
  def on_read_convert(self, value, dialect):
191
235
  return read_converter(dialect).float(value)
192
236
 
@@ -199,6 +243,10 @@ class Float32(Float):
199
243
  def default_value(dialect):
200
244
  return type_defaults(dialect).float32()
201
245
 
246
+ @staticmethod
247
+ def db_default_value(dialect):
248
+ return db_defaults(dialect).float32()
249
+
202
250
  def on_read_convert(self, value, dialect):
203
251
  return read_converter(dialect).float32(value)
204
252
 
@@ -211,6 +259,10 @@ class Float64(Float):
211
259
  def default_value(dialect):
212
260
  return type_defaults(dialect).float64()
213
261
 
262
+ @staticmethod
263
+ def db_default_value(dialect):
264
+ return db_defaults(dialect).float64()
265
+
214
266
  def on_read_convert(self, value, dialect):
215
267
  return read_converter(dialect).float64(value)
216
268
 
@@ -247,6 +299,10 @@ class Array(SQLType):
247
299
  def default_value(dialect):
248
300
  return type_defaults(dialect).array()
249
301
 
302
+ @staticmethod
303
+ def db_default_value(dialect):
304
+ return db_defaults(dialect).array()
305
+
250
306
  def on_read_convert(self, value, dialect):
251
307
  r = read_converter(dialect).array(value, self.item_type, dialect)
252
308
  if isinstance(self.item_type, JSON):
@@ -268,6 +324,10 @@ class JSON(SQLType):
268
324
  def default_value(dialect):
269
325
  return type_defaults(dialect).json()
270
326
 
327
+ @staticmethod
328
+ def db_default_value(dialect):
329
+ return db_defaults(dialect).json()
330
+
271
331
  def on_read_convert(self, value, dialect):
272
332
  return read_converter(dialect).json(value)
273
333
 
@@ -286,6 +346,10 @@ class DateTime(SQLType):
286
346
  def default_value(dialect):
287
347
  return type_defaults(dialect).datetime()
288
348
 
349
+ @staticmethod
350
+ def db_default_value(dialect):
351
+ return db_defaults(dialect).datetime()
352
+
289
353
  def on_read_convert(self, value, dialect):
290
354
  return read_converter(dialect).datetime(value)
291
355
 
@@ -304,6 +368,10 @@ class Binary(SQLType):
304
368
  def default_value(dialect):
305
369
  return type_defaults(dialect).binary()
306
370
 
371
+ @staticmethod
372
+ def db_default_value(dialect):
373
+ return db_defaults(dialect).binary()
374
+
307
375
  def on_read_convert(self, value, dialect):
308
376
  return read_converter(dialect).binary(value)
309
377
 
@@ -328,13 +396,17 @@ class TypeReadConverter:
328
396
  return value
329
397
 
330
398
  def float(self, value):
399
+ if value is None:
400
+ return float("nan")
401
+ if isinstance(value, str) and value.lower() == "nan":
402
+ return float("nan")
331
403
  return value
332
404
 
333
405
  def float32(self, value):
334
- return value
406
+ return self.float(value)
335
407
 
336
408
  def float64(self, value):
337
- return value
409
+ return self.float(value)
338
410
 
339
411
  def array(self, value, item_type, dialect):
340
412
  if value is None or item_type is None:
@@ -347,10 +419,9 @@ class TypeReadConverter:
347
419
  def datetime(self, value):
348
420
  return value
349
421
 
350
- def uuid(self, value):
351
- return value
352
-
353
422
  def binary(self, value):
423
+ if isinstance(value, str):
424
+ return value.encode()
354
425
  return value
355
426
 
356
427
 
@@ -415,13 +486,13 @@ class TypeDefaults:
415
486
  return None
416
487
 
417
488
  def float(self):
418
- return None
489
+ return float("nan")
419
490
 
420
491
  def float32(self):
421
- return None
492
+ return self.float()
422
493
 
423
494
  def float64(self):
424
- return None
495
+ return self.float()
425
496
 
426
497
  def array(self):
427
498
  return None
@@ -432,11 +503,49 @@ class TypeDefaults:
432
503
  def datetime(self):
433
504
  return None
434
505
 
435
- def uuid(self):
506
+ def binary(self):
436
507
  return None
437
508
 
509
+
510
+ class DBDefaults:
511
+ def string(self):
512
+ return sa.text("''")
513
+
514
+ def boolean(self):
515
+ return sa.text("False")
516
+
517
+ def int(self):
518
+ return sa.text("0")
519
+
520
+ def int32(self):
521
+ return self.int()
522
+
523
+ def int64(self):
524
+ return self.int()
525
+
526
+ def uint64(self):
527
+ return self.int()
528
+
529
+ def float(self):
530
+ return sa.text("NaN")
531
+
532
+ def float32(self):
533
+ return self.float()
534
+
535
+ def float64(self):
536
+ return self.float()
537
+
538
+ def array(self):
539
+ return sa.text("'[]'")
540
+
541
+ def json(self):
542
+ return sa.text("'{}'")
543
+
544
+ def datetime(self):
545
+ return sa.text("'1970-01-01 00:00:00'")
546
+
438
547
  def binary(self):
439
- return None
548
+ return sa.text("''")
440
549
 
441
550
 
442
551
  TYPES = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -43,7 +43,7 @@ Requires-Dist: Pillow <11,>=10.0.0
43
43
  Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
44
44
  Provides-Extra: dev
45
45
  Requires-Dist: datachain[docs,tests] ; extra == 'dev'
46
- Requires-Dist: mypy ==1.10.1 ; extra == 'dev'
46
+ Requires-Dist: mypy ==1.11.1 ; extra == 'dev'
47
47
  Requires-Dist: types-python-dateutil ; extra == 'dev'
48
48
  Requires-Dist: types-pytz ; extra == 'dev'
49
49
  Requires-Dist: types-PyYAML ; extra == 'dev'
@@ -55,6 +55,15 @@ Requires-Dist: mkdocs-material >=9.3.1 ; extra == 'docs'
55
55
  Requires-Dist: mkdocs-section-index >=0.3.6 ; extra == 'docs'
56
56
  Requires-Dist: mkdocstrings-python >=1.6.3 ; extra == 'docs'
57
57
  Requires-Dist: mkdocs-literate-nav >=0.6.1 ; extra == 'docs'
58
+ Provides-Extra: examples
59
+ Requires-Dist: datachain[tests] ; extra == 'examples'
60
+ Requires-Dist: numpy <2,>=1 ; extra == 'examples'
61
+ Requires-Dist: defusedxml ; extra == 'examples'
62
+ Requires-Dist: accelerate ; extra == 'examples'
63
+ Requires-Dist: unstructured[pdf] ; extra == 'examples'
64
+ Requires-Dist: pdfplumber ==0.11.3 ; extra == 'examples'
65
+ Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
66
+ Requires-Dist: nltk ==3.8.1 ; extra == 'examples'
58
67
  Provides-Extra: remote
59
68
  Requires-Dist: lz4 ; extra == 'remote'
60
69
  Requires-Dist: msgpack <2,>=1.0.4 ; extra == 'remote'
@@ -100,102 +109,78 @@ Requires-Dist: usearch ; extra == 'vector'
100
109
  AI 🔗 DataChain
101
110
  ----------------
102
111
 
103
- DataChain is a data-frame library designed for AI-specific scenarios. It helps ML and
104
- AI engineers build a metadata layer on top of unstructured files and analyze data using
105
- this layer.
112
+ DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
113
+ It is made to organize your unstructured data into datasets and wrangle it at scale on
114
+ your local machine.
106
115
 
107
- 📂 **Raw Files Processing**
108
- Process raw files (images, video, text, PDFs) directly from storage (S3, GCP, Azure,
109
- Local), version and update datasets.
116
+ Key Features
117
+ ============
110
118
 
111
- 🌟 **Metadata layer.**
112
- Build a metadata layer on top of files using structured sources like CSV, Parquet,
113
- and JSON files.
119
+ 📂 **Storage as a Source of Truth.**
120
+ - Process unstructured data without redundant copies: S3, GCP, Azure, and local
121
+ file systems.
122
+ - Multimodal data: images, video, text, PDFs, JSONs, CSVs, parquet.
123
+ - Join files and metadata together into persistent, versioned, columnar datasets.
114
124
 
115
- **Metadata enrichment.**
116
- Enhance the metadata layer with outputs from local ML model inferences and LLM calls.
125
+ 🐍 **Python-friendly data pipelines.**
126
+ - Operate on Python objects and object fields.
127
+ - Built-in parallelization and out-of-memory compute without a need in SQL or
128
+ Spark jobs.
117
129
 
118
- 🛠️ **Data Transformation.**
119
- Transform metadata using traditional methods like filtering, grouping, joining, and
120
- others.
130
+ 🧠 **Data Enrichment and Processing.**
131
+ - Generate metadata columns using local AI models and LLM APIs.
132
+ - Filter, join, and group by AI metadata. Vector similarity search.
133
+ - Pass datasets to Pytorch and Tensorflow, or export back into storage.
121
134
 
122
- 🐍 **User-friendly interface.**
123
- Operate efficiently with familiar Python objects and object fields, eliminating the
124
- need for SQL.
135
+ 🚀 **Efficiency.**
136
+ - Parallelization, out-of-memory workloads and data caching.
137
+ - Vectorized operations on Python object fields: sum, count, avg, etc.
138
+ - Vector search on embeddings.
125
139
 
126
140
 
141
+ Quick Start
142
+ -----------
143
+
127
144
  .. code:: console
128
145
 
129
146
  $ pip install datachain
130
147
 
131
148
 
132
- Data Structures
133
- ===============
134
-
135
- DataChain introduces expressive data structures tailored for AI-specific workload:
136
-
137
- - **Dataset:** Preserves the file-references and meta-information. Takes care of Python
138
- object serialization, dataset versioning and difference. Operations on dataset:
139
-
140
- - **Transformations:** traditional data-frame or SQL operations such as filtering,
141
- grouping, joining.
142
- - **Enrichments:** mapping, aggregating and generating using customer’s Python
143
- code. This is needed to work with ML inference and LLM calls.
144
-
145
- - **Chain** is a sequence of operations on datasets. Chain executes operations in lazy
146
- mode - only when needed.
147
-
148
- DataChain name comes from these major data structures: dataset and chaining.
149
-
149
+ Selecting files using JSON metadata
150
+ ======================================
150
151
 
151
- What’s new in DataChain?
152
- ========================
152
+ A storage consists of images of cats and dogs (`dog.1048.jpg`, `cat.1009.jpg`),
153
+ annotated with ground truth and model inferences in the 'json-pairs' format,
154
+ where each image has a matching JSON file like `cat.1009.json`:
153
155
 
154
- The project combines multiple ideas from different areas in order to simplify AI
155
- use-cases and at the same time to fit it into traditional data infrastructure.
156
+ .. code:: json
156
157
 
157
- - **Python-Native for AI.** Utilizes Python instead of SQL for data manipulation as the
158
- native language for AI. It’s powered by `Pydantic`_ data models.
159
- - **Separation of CPU-GPU workloads.** Distinguishes CPU-heavy transformations (filter,
160
- group_by, join) from GPU heavy enrichments (ML-inference or LLM calls). That’s mostly
161
- needed for distributed computations.
162
- - **Resuming data processing** (in development). Introduces idempotent operations,
163
- allowing data processing to resume from the last successful process file/record/batch
164
- if it fails due to issues like failed LLM calls, ML inference or file download.
158
+ {
159
+ "class": "cat", "id": "1009", "num_annotators": 8,
160
+ "inference": {"class": "dog", "confidence": 0.68}
161
+ }
165
162
 
166
- Additional relatively new ideas:
163
+ Example of downloading only high-confidence cat images using JSON metadata:
167
164
 
168
- - **Functional style data processing.** Using a functional/chaining approach to data
169
- processing rather than declarative SQL, inspired by R-dplyr and some Python libraries.
170
- - **Data Versioning.** Treats raw files in cloud storage as the source of truth for data
171
- and implements data versioning, extending ideas from DVC (developed by the same team).
172
165
 
166
+ .. code:: py
173
167
 
174
- What DataChain is NOT?
175
- ======================
176
-
177
- - **Not a database** (Postgres, MySQL). Instead, it uses databases under the hood:
178
- `SQLite`_ in open-source and ClickHouse and other data warehouses for the commercial
179
- version.
180
- - **Not a data processing tool / data warehouse** (Spark, Snowflake, Big Query) since
181
- it delegates heavy data transformations to underlying data warehouses and focuses on
182
- AI specific data enrichments and orchestrating all the pieces together.
183
-
168
+ from datachain import Column, DataChain
184
169
 
185
- Quick Start
186
- -----------
170
+ meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta")
171
+ images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg")
187
172
 
188
- Data curation with a local model
189
- =================================
173
+ images_id = images.map(id=lambda file: file.path.split('.')[-2])
174
+ annotated = images_id.merge(meta, on="id", right_on="meta.id")
190
175
 
191
- We will evaluate chatbot dialogs stored as text files in Google Cloud Storage
192
- - 50 files total in this example.
193
- These dialogs involve users chatting with a bot while looking for better wireless plans.
194
- Our goal is to identify the successful dialogs.
176
+ likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
177
+ & (Column("meta.inference.class_") == "cat"))
178
+ likely_cats.export_files("high-confidence-cats/", signal="file")
195
179
 
196
- The data used in the examples is `publicly available`_. The sample code is designed to run on a local machine.
197
180
 
198
- First, we'll show batch inference with a simple sentiment model using the `transformers` library:
181
+ Data curation with a local AI model
182
+ ===================================
183
+ Batch inference with a simple sentiment model using the `transformers` library:
199
184
 
200
185
  .. code:: shell
201
186
 
@@ -246,30 +231,30 @@ LLM judging chatbots
246
231
  =============================
247
232
 
248
233
  LLMs can work as efficient universal classifiers. In the example below,
249
- we employ a free API from Mistral to judge the chatbot performance. Please get a free
234
+ we employ a free API from Mistral to judge the `publicly available`_ chatbot dialogs. Please get a free
250
235
  Mistral API key at https://console.mistral.ai
251
236
 
237
+
252
238
  .. code:: shell
253
239
 
254
- $ pip install mistralai
240
+ $ pip install mistralai (Requires version >=1.0.0)
255
241
  $ export MISTRAL_API_KEY=_your_key_
256
242
 
257
243
  DataChain can parallelize API calls; the free Mistral tier supports up to 4 requests at the same time.
258
244
 
259
245
  .. code:: py
260
246
 
261
- from mistralai.client import MistralClient
262
- from mistralai.models.chat_completion import ChatMessage
247
+ from mistralai import Mistral
263
248
  from datachain import File, DataChain, Column
264
249
 
265
250
  PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
266
251
 
267
252
  def eval_dialogue(file: File) -> bool:
268
- client = MistralClient()
269
- response = client.chat(
253
+ client = Mistral()
254
+ response = client.chat.complete(
270
255
  model="open-mixtral-8x22b",
271
- messages=[ChatMessage(role="system", content=PROMPT),
272
- ChatMessage(role="user", content=file.read())])
256
+ messages=[{"role": "system", "content": PROMPT},
257
+ {"role": "user", "content": file.read()}])
273
258
  result = response.choices[0].message.content
274
259
  return result.lower().startswith("success")
275
260
 
@@ -309,8 +294,8 @@ Instead of extracting this information from the Mistral response data structure
309
294
 
310
295
  .. code:: py
311
296
 
312
- from mistralai.client import MistralClient
313
- from mistralai.models.chat_completion import ChatMessage, ChatCompletionResponse
297
+ from mistralai import Mistral
298
+ from mistralai.models import ChatCompletionResponse
314
299
  from datachain import File, DataChain, Column
315
300
 
316
301
  PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
@@ -319,8 +304,8 @@ Instead of extracting this information from the Mistral response data structure
319
304
  client = MistralClient()
320
305
  return client.chat(
321
306
  model="open-mixtral-8x22b",
322
- messages=[ChatMessage(role="system", content=PROMPT),
323
- ChatMessage(role="user", content=file.read())])
307
+ messages=[{"role": "system", "content": PROMPT},
308
+ {"role": "user", "content": file.read()}])
324
309
 
325
310
  chain = (
326
311
  DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file")
@@ -438,7 +423,10 @@ Tutorials
438
423
  ---------
439
424
 
440
425
  * `Getting Started`_
441
- * `Multimodal <examples/multimodal/clip_fine_tuning.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/datachain/blob/main/examples/multimodal/clip_fine_tuning.ipynb>`__)
426
+ * `Multimodal <https://github.com/iterative/datachain-examples/blob/main/multimodal/clip_fine_tuning.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/datachain-examples/blob/main/multimodal/clip_fine_tuning.ipynb>`__)
427
+ * `LLM evaluations <https://github.com/iterative/datachain-examples/blob/main/llm/llm_chatbot_evaluation.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/datachain-examples/blob/main/llm/llm_chatbot_evaluation.ipynb>`__)
428
+ * `Reading JSON metadata <https://github.com/iterative/datachain-examples/blob/main/formats/json-metadata-tutorial.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/datachain-examples/blob/main/formats/json-metadata-tutorial.ipynb>`__)
429
+
442
430
 
443
431
  Contributions
444
432
  -------------
@@ -1,6 +1,6 @@
1
1
  datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
- datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
3
+ datachain/asyn.py,sha256=biF8M8fQujtj5xs0VLi8S16eBtzG6kceWlO_NILbCsg,8197
4
4
  datachain/cache.py,sha256=wznC2pge6RhlPTaJfBVGjmBc6bxWCPThu4aTFMltvFU,4076
5
5
  datachain/cli.py,sha256=DbmI1sXs7-KCQz6RdLE_JAp3XO3yrTSRJ71LdUzx-XE,33099
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
@@ -9,7 +9,7 @@ datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
9
9
  datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
10
10
  datachain/job.py,sha256=bk25bIqClhgRPzlXAhxpTtDeewibQe5l3S8Cf7db0gM,1229
11
11
  datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
12
- datachain/node.py,sha256=frxZWoEvqUvk9pyXmVaeiNCs3W-xjC_sENmUD11V06Q,6006
12
+ datachain/node.py,sha256=ihrP5l9HKpXLR0fR1wyb7QIdb7NR26dX6bB09qGX5B4,6005
13
13
  datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
14
  datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
15
  datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
@@ -17,35 +17,36 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
18
  datachain/utils.py,sha256=ROVCLwb37VmFRzgTlSGUDw4eJNgYGiQ4yMX581HfUX8,12988
19
19
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=9-7SnMjh5ruH9sdKDo8P5EklX9oC2EHH6bnku6ZqLko,80275
20
+ datachain/catalog/catalog.py,sha256=_BRaD261RnCJgXr_DJcDf58XmbjLiuLMSsX97E8k3z8,80771
21
21
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
- datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
22
+ datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
23
23
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
25
- datachain/client/azure.py,sha256=3RfDTAI_TszDy9WazHQd3bI3sS2wDFrNXfNqCDewZgE,2214
25
+ datachain/client/azure.py,sha256=LXSahE0Z6r4dXqpBkKnq3J5fg7N7ymC1lSn-1SoILGc,2687
26
26
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
27
27
  datachain/client/fsspec.py,sha256=G4QTm3KPhlaV74T3gLXJ86345_ak8CH38ezn2ET-oLc,13230
28
- datachain/client/gcs.py,sha256=Mt77W_l8_fK61gLm4mmxNmENuOM0ETwxdiFp4S8d-_w,4105
29
- datachain/client/local.py,sha256=SyGnqcrbtSvDK6IJsQa6NxxHwbWaWIP1GLZsQBXg_IA,4939
30
- datachain/client/s3.py,sha256=GfRZZzNPQPRsYjoef8bbsLbanJPUlCbyGTTK8ojzp8A,6136
28
+ datachain/client/gcs.py,sha256=P_E3mhzhXR9mJ_wc3AYZuczzwOJ0-D3J5qhJXeSU-xk,4518
29
+ datachain/client/local.py,sha256=H8TNY8pi2kA8y9_f_1XLUjJF66f229qC_b2y4xGkzdU,5300
30
+ datachain/client/s3.py,sha256=aQxfMH8G8bUjmHF1-6P90MSkXsU5DgOPEVlKWLu459I,6568
31
31
  datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
32
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
33
33
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
34
34
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
35
35
  datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
36
- datachain/data_storage/schema.py,sha256=Idi-29fckvZozzvkyz3nTR2FOIajPlSuPdIEO7SMvXM,7863
36
+ datachain/data_storage/schema.py,sha256=GwJIHkjhrnBxJAV1WvCMM8jiJN5h79LXDyzMmUDtRw0,8523
37
37
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=0r6L_a2hdGRoR_gl06v1qWhEFOS_Q31aldHyk07Yx-M,26857
39
- datachain/data_storage/warehouse.py,sha256=MXYkUG69UK2wbIFsZFvT7rKzXlnSitDMp3Vzj_IIsnA,33089
38
+ datachain/data_storage/sqlite.py,sha256=GEE07ZXTAtzdf53J1UDLscS0xZjukRGlmZzG6q0fZI0,28589
39
+ datachain/data_storage/warehouse.py,sha256=tyJJDxFae6XWgLmOoG0B_MJ_Z_UEMoW_wJb96zzwTtA,33471
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- datachain/lib/arrow.py,sha256=R8wDUDEa-5hYjI3HW9cqvOYYJpeeah5lbhFIL3gkmcE,4915
41
+ datachain/lib/arrow.py,sha256=D8N7zCppRdc5sTYT1hNIbROc-sKA_8FN5J_m-KjD3Us,4929
42
42
  datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
43
- datachain/lib/data_model.py,sha256=qfTtQNncS5pt9SvXdMEa5kClniaT6XBGBfO7onEz2TI,1632
43
+ datachain/lib/data_model.py,sha256=ZvtMRMcPpBxI-rOhkXb-ry1PkGYcEFFK1w1wH12vs4g,1718
44
44
  datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
45
- datachain/lib/dc.py,sha256=e24ecfIcypVkmVBqvr-p06zpwrw7GD20gy1gBJQPT-I,58012
45
+ datachain/lib/dc.py,sha256=0pwNb91GW8MnHLfFd2YvEtEH0n77c3nxp5ozwIyW86o,58827
46
46
  datachain/lib/file.py,sha256=ZHpdilDPYCob8uqtwUPtBvBNxVvQRq4AC_0IGg5m-G4,12003
47
47
  datachain/lib/image.py,sha256=TgYhRhzd4nkytfFMeykQkPyzqb5Le_-tU81unVMPn4Q,2328
48
- datachain/lib/meta_formats.py,sha256=jlSYWRUeDMjun_YCsQ2JxyaDJpEpokzHDPmKUAoCXnU,7034
48
+ datachain/lib/listing.py,sha256=nXLmGae_oQke4hnurzzWiHTEjHjWiqqHdB41Wb-hMTk,3521
49
+ datachain/lib/meta_formats.py,sha256=Hels85LJmNCz1aYVJvhymNdAt3qdJ2-qoxsIiUezrow,7198
49
50
  datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
50
51
  datachain/lib/pytorch.py,sha256=9PsypKseyKfIimTmTQOgb-pbNXgeeAHLdlWx0qRPULY,5660
51
52
  datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
@@ -62,26 +63,26 @@ datachain/lib/convert/flatten.py,sha256=YMoC00BqEy3zSpvCp6Q0DfxihuPmgjUJj1g2cesW
62
63
  datachain/lib/convert/python_to_sql.py,sha256=4gplGlr_Kg-Z40OpJUzJiarDWj7pwbUOk-dPOYYCJ9Q,2629
63
64
  datachain/lib/convert/sql_to_python.py,sha256=lGnKzSF_tz9Y_5SSKkrIU95QEjpcDzvOxIRkEKTQag0,443
64
65
  datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
65
- datachain/lib/convert/values_to_tuples.py,sha256=aVoHWMOUGLAiS6_BBwKJqVIne91VffOW6-dWyNE7oHg,3715
66
+ datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
66
67
  datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
67
68
  datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
68
69
  datachain/query/builtins.py,sha256=EmKPYsoQ46zwdyOn54MuCzvYFmfsBn5F8zyF7UBUfrc,2550
69
- datachain/query/dataset.py,sha256=sRKY2it_znlzTNOt_OCRe008rHu0TXMnFwvGsnthSO0,60209
70
+ datachain/query/dataset.py,sha256=7lxlybS7I5IPsgOqMz-W4vS6kWBDHkHQRqBHlIRYRPw,60473
70
71
  datachain/query/dispatch.py,sha256=GBh3EZHDp5AaXxrjOpfrpfsuy7Umnqxu-MAXcK9X3gc,12945
71
72
  datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
72
73
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
73
74
  datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
74
- datachain/query/schema.py,sha256=O3mTM5DRjvRAJCI7O9mR8wOdFJbgI1jIjvtfl5YvjI4,7755
75
- datachain/query/session.py,sha256=qTzkXgwMJdJhal3rVt3hdv3x1EXT1IHuXcwkC-Ex0As,4111
75
+ datachain/query/schema.py,sha256=BvHipN79CnSTbVFcfIEwzo1npe7HmThnk0iY-CSLEkM,7899
76
+ datachain/query/session.py,sha256=PkOLANS0s8KPz4wO17tAab-CMzIt7FK8RPzJiibExds,4290
76
77
  datachain/query/udf.py,sha256=j3NhmKK5rYG5TclcM2Sr0LhS1tmYLMjzMugx9G9iFLM,8100
77
78
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
79
  datachain/remote/studio.py,sha256=f5s6qSZ9uB4URGUoU_8_W1KZRRQQVSm6cgEBkBUEfuE,7226
79
80
  datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
80
81
  datachain/sql/selectable.py,sha256=fBM-wS1TUA42kVEAAiwqGtibIevyZAEritwt8PZGyLQ,1589
81
- datachain/sql/types.py,sha256=SShudhdIpdfTKDxWDDqOajYRkTCkIgQbilA94g4i-4E,10389
82
+ datachain/sql/types.py,sha256=1MFvECB_5A6QwQKKY3VPhvitgKDlc2aB7iBjY4hv1_s,13034
82
83
  datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
83
84
  datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
84
- datachain/sql/default/base.py,sha256=h44005q3qtMc9cjWmRufWwcBr5CfK_dnvG4IrcSQs_8,536
85
+ datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
85
86
  datachain/sql/functions/__init__.py,sha256=Ioyy7nSetrTLVnHGcGcmZU99HxUFcx-5PFbrh2dPNH0,396
86
87
  datachain/sql/functions/array.py,sha256=EB7nJSncUc1PuxlHyzU2gVhF8DuXaxpGlxb5e8X2KFY,1297
87
88
  datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
@@ -89,13 +90,13 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
89
90
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
90
91
  datachain/sql/functions/string.py,sha256=hIrF1fTvlPamDtm8UMnWDcnGfbbjCsHxZXS30U2Rzxo,651
91
92
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
92
- datachain/sql/sqlite/base.py,sha256=LBYmXqXsVF30fbcnR55evCZHbPDCzMdGk_ogPLps63s,12236
93
+ datachain/sql/sqlite/base.py,sha256=5nLvOv0xcOlEpfZeY3SWbI401MSGM2i29P3SRkd7TAc,12898
93
94
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
94
95
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
95
96
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
96
- datachain-0.3.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
97
- datachain-0.3.1.dist-info/METADATA,sha256=qR3OMpGUkx0cKelnl51d9uksn5H-Wn4LvTJbUnTMDuQ,17268
98
- datachain-0.3.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
99
- datachain-0.3.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
100
- datachain-0.3.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
101
- datachain-0.3.1.dist-info/RECORD,,
97
+ datachain-0.3.3.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
+ datachain-0.3.3.dist-info/METADATA,sha256=BDBQIVMBj7tqy0TntMooUyMlPEVgVHA4xvMESRHiF0I,16789
99
+ datachain-0.3.3.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
100
+ datachain-0.3.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
+ datachain-0.3.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
+ datachain-0.3.3.dist-info/RECORD,,