datachain 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cache.py +0 -1
- datachain/catalog/catalog.py +50 -153
- datachain/cli.py +4 -6
- datachain/client/fsspec.py +0 -1
- datachain/client/s3.py +0 -4
- datachain/data_storage/schema.py +4 -8
- datachain/data_storage/warehouse.py +6 -17
- datachain/error.py +0 -4
- datachain/lib/clip.py +1 -1
- datachain/lib/dc.py +17 -5
- datachain/lib/file.py +9 -11
- datachain/lib/image.py +1 -1
- datachain/lib/meta_formats.py +4 -8
- datachain/lib/model_store.py +6 -1
- datachain/lib/text.py +1 -1
- datachain/lib/webdataset.py +13 -0
- datachain/lib/webdataset_laion.py +13 -0
- datachain/listing.py +2 -2
- datachain/node.py +4 -26
- datachain/query/builtins.py +0 -14
- datachain/query/schema.py +1 -16
- datachain/utils.py +0 -3
- {datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/METADATA +6 -7
- {datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/RECORD +28 -28
- {datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/LICENSE +0 -0
- {datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/WHEEL +0 -0
- {datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/entry_points.txt +0 -0
- {datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/top_level.txt +0 -0
datachain/cache.py
CHANGED
datachain/catalog/catalog.py
CHANGED
|
@@ -12,7 +12,6 @@ import sys
|
|
|
12
12
|
import time
|
|
13
13
|
import traceback
|
|
14
14
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
15
|
-
from contextlib import contextmanager, nullcontext
|
|
16
15
|
from copy import copy
|
|
17
16
|
from dataclasses import dataclass
|
|
18
17
|
from functools import cached_property, reduce
|
|
@@ -23,7 +22,6 @@ from typing import (
|
|
|
23
22
|
TYPE_CHECKING,
|
|
24
23
|
Any,
|
|
25
24
|
Callable,
|
|
26
|
-
NamedTuple,
|
|
27
25
|
NoReturn,
|
|
28
26
|
Optional,
|
|
29
27
|
Union,
|
|
@@ -58,14 +56,13 @@ from datachain.error import (
|
|
|
58
56
|
PendingIndexingError,
|
|
59
57
|
QueryScriptCancelError,
|
|
60
58
|
QueryScriptCompileError,
|
|
61
|
-
QueryScriptDatasetNotFound,
|
|
62
59
|
QueryScriptRunError,
|
|
63
60
|
)
|
|
64
61
|
from datachain.listing import Listing
|
|
65
62
|
from datachain.node import DirType, Node, NodeWithPath
|
|
66
63
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
67
64
|
from datachain.remote.studio import StudioClient
|
|
68
|
-
from datachain.sql.types import JSON, Boolean, DateTime,
|
|
65
|
+
from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
|
|
69
66
|
from datachain.storage import Storage, StorageStatus, StorageURI
|
|
70
67
|
from datachain.utils import (
|
|
71
68
|
DataChainDir,
|
|
@@ -115,44 +112,19 @@ def noop(_: str):
|
|
|
115
112
|
pass
|
|
116
113
|
|
|
117
114
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
stream:
|
|
121
|
-
|
|
122
|
-
lines: list[str] = []
|
|
123
|
-
append = lines.append
|
|
115
|
+
def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
|
|
116
|
+
buffer = b""
|
|
117
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
118
|
+
buffer += byt
|
|
124
119
|
|
|
125
|
-
|
|
126
|
-
buffer = b""
|
|
127
|
-
while byt := stream.read(1): # Read one byte at a time
|
|
128
|
-
buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
|
|
129
|
-
|
|
130
|
-
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
131
|
-
line = buffer.decode("utf-8")
|
|
132
|
-
print(line, end="")
|
|
133
|
-
callback(line)
|
|
134
|
-
append(line)
|
|
135
|
-
buffer = b"" # Clear buffer for next line
|
|
136
|
-
|
|
137
|
-
if buffer: # Handle any remaining data in the buffer
|
|
120
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
138
121
|
line = buffer.decode("utf-8")
|
|
139
|
-
print(line, end="")
|
|
140
122
|
callback(line)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
thread = Thread(target=loop, daemon=True)
|
|
144
|
-
thread.start()
|
|
145
|
-
|
|
146
|
-
try:
|
|
147
|
-
yield lines
|
|
148
|
-
finally:
|
|
149
|
-
thread.join()
|
|
150
|
-
|
|
123
|
+
buffer = b"" # Clear buffer for next line
|
|
151
124
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
output: str
|
|
125
|
+
if buffer: # Handle any remaining data in the buffer
|
|
126
|
+
line = buffer.decode("utf-8")
|
|
127
|
+
callback(line)
|
|
156
128
|
|
|
157
129
|
|
|
158
130
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -541,8 +513,6 @@ def find_column_to_str( # noqa: PLR0911
|
|
|
541
513
|
)
|
|
542
514
|
if column == "name":
|
|
543
515
|
return posixpath.basename(row[field_lookup["path"]]) or ""
|
|
544
|
-
if column == "owner":
|
|
545
|
-
return row[field_lookup["owner_name"]] or ""
|
|
546
516
|
if column == "path":
|
|
547
517
|
is_dir = row[field_lookup["dir_type"]] == DirType.DIR
|
|
548
518
|
path = row[field_lookup["path"]]
|
|
@@ -651,11 +621,6 @@ class Catalog:
|
|
|
651
621
|
code_ast.body[-1:] = new_expressions
|
|
652
622
|
return code_ast
|
|
653
623
|
|
|
654
|
-
def compile_query_script(self, script: str) -> str:
|
|
655
|
-
code_ast = ast.parse(script)
|
|
656
|
-
code_ast = self.attach_query_wrapper(code_ast)
|
|
657
|
-
return ast.unparse(code_ast)
|
|
658
|
-
|
|
659
624
|
def parse_url(self, uri: str, **config: Any) -> tuple[Client, str]:
|
|
660
625
|
config = config or self.client_config
|
|
661
626
|
return Client.parse_url(uri, self.cache, **config)
|
|
@@ -699,16 +664,12 @@ class Catalog:
|
|
|
699
664
|
source_metastore = self.metastore.clone(client.uri)
|
|
700
665
|
|
|
701
666
|
columns = [
|
|
702
|
-
Column("vtype", String),
|
|
703
|
-
Column("dir_type", Int),
|
|
704
667
|
Column("path", String),
|
|
705
668
|
Column("etag", String),
|
|
706
669
|
Column("version", String),
|
|
707
670
|
Column("is_latest", Boolean),
|
|
708
671
|
Column("last_modified", DateTime(timezone=True)),
|
|
709
672
|
Column("size", Int64),
|
|
710
|
-
Column("owner_name", String),
|
|
711
|
-
Column("owner_id", String),
|
|
712
673
|
Column("location", JSON),
|
|
713
674
|
Column("source", String),
|
|
714
675
|
]
|
|
@@ -1549,7 +1510,6 @@ class Catalog:
|
|
|
1549
1510
|
row["etag"],
|
|
1550
1511
|
row["version"],
|
|
1551
1512
|
row["is_latest"],
|
|
1552
|
-
row["vtype"],
|
|
1553
1513
|
row["location"],
|
|
1554
1514
|
row["last_modified"],
|
|
1555
1515
|
)
|
|
@@ -1805,14 +1765,15 @@ class Catalog:
|
|
|
1805
1765
|
def query(
|
|
1806
1766
|
self,
|
|
1807
1767
|
query_script: str,
|
|
1808
|
-
|
|
1809
|
-
python_executable:
|
|
1768
|
+
env: Optional[Mapping[str, str]] = None,
|
|
1769
|
+
python_executable: str = sys.executable,
|
|
1810
1770
|
save: bool = False,
|
|
1811
1771
|
capture_output: bool = True,
|
|
1812
1772
|
output_hook: Callable[[str], None] = noop,
|
|
1813
1773
|
params: Optional[dict[str, str]] = None,
|
|
1814
1774
|
job_id: Optional[str] = None,
|
|
1815
|
-
|
|
1775
|
+
_execute_last_expression: bool = False,
|
|
1776
|
+
) -> None:
|
|
1816
1777
|
"""
|
|
1817
1778
|
Method to run custom user Python script to run a query and, as result,
|
|
1818
1779
|
creates new dataset from the results of a query.
|
|
@@ -1835,92 +1796,21 @@ class Catalog:
|
|
|
1835
1796
|
C.size > 1000
|
|
1836
1797
|
)
|
|
1837
1798
|
"""
|
|
1838
|
-
if
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
envs,
|
|
1851
|
-
capture_output,
|
|
1852
|
-
output_hook,
|
|
1853
|
-
params,
|
|
1854
|
-
save,
|
|
1855
|
-
job_id,
|
|
1856
|
-
)
|
|
1857
|
-
output = "".join(lines)
|
|
1858
|
-
|
|
1859
|
-
if proc.returncode:
|
|
1860
|
-
if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
|
|
1861
|
-
raise QueryScriptCancelError(
|
|
1862
|
-
"Query script was canceled by user",
|
|
1863
|
-
return_code=proc.returncode,
|
|
1864
|
-
output=output,
|
|
1865
|
-
)
|
|
1866
|
-
raise QueryScriptRunError(
|
|
1867
|
-
f"Query script exited with error code {proc.returncode}",
|
|
1868
|
-
return_code=proc.returncode,
|
|
1869
|
-
output=output,
|
|
1870
|
-
)
|
|
1871
|
-
|
|
1872
|
-
def _get_dataset_versions_by_job_id():
|
|
1873
|
-
for dr, dv, job in self.list_datasets_versions():
|
|
1874
|
-
if job and str(job.id) == job_id:
|
|
1875
|
-
yield dr, dv
|
|
1876
|
-
|
|
1877
|
-
try:
|
|
1878
|
-
dr, dv = max(
|
|
1879
|
-
_get_dataset_versions_by_job_id(), key=lambda x: x[1].created_at
|
|
1880
|
-
)
|
|
1881
|
-
except ValueError as e:
|
|
1882
|
-
if not save:
|
|
1883
|
-
return QueryResult(dataset=None, version=None, output=output)
|
|
1884
|
-
|
|
1885
|
-
raise QueryScriptDatasetNotFound(
|
|
1886
|
-
"No dataset found after running Query script",
|
|
1887
|
-
output=output,
|
|
1888
|
-
) from e
|
|
1889
|
-
|
|
1890
|
-
dr = self.update_dataset(
|
|
1891
|
-
dr,
|
|
1892
|
-
script_output=output,
|
|
1893
|
-
query_script=query_script,
|
|
1894
|
-
)
|
|
1895
|
-
self.update_dataset_version_with_warehouse_info(
|
|
1896
|
-
dr,
|
|
1897
|
-
dv.version,
|
|
1898
|
-
script_output=output,
|
|
1899
|
-
query_script=query_script,
|
|
1900
|
-
job_id=job_id,
|
|
1901
|
-
is_job_result=True,
|
|
1902
|
-
)
|
|
1903
|
-
return QueryResult(dataset=dr, version=dv.version, output=output)
|
|
1799
|
+
if _execute_last_expression:
|
|
1800
|
+
try:
|
|
1801
|
+
code_ast = ast.parse(query_script)
|
|
1802
|
+
code_ast = self.attach_query_wrapper(code_ast)
|
|
1803
|
+
query_script_compiled = ast.unparse(code_ast)
|
|
1804
|
+
except Exception as exc:
|
|
1805
|
+
raise QueryScriptCompileError(
|
|
1806
|
+
f"Query script failed to compile, reason: {exc}"
|
|
1807
|
+
) from exc
|
|
1808
|
+
else:
|
|
1809
|
+
query_script_compiled = query_script
|
|
1810
|
+
assert not save
|
|
1904
1811
|
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
python_executable: str,
|
|
1908
|
-
query_script: str,
|
|
1909
|
-
envs: Optional[Mapping[str, str]],
|
|
1910
|
-
capture_output: bool,
|
|
1911
|
-
output_hook: Callable[[str], None],
|
|
1912
|
-
params: Optional[dict[str, str]],
|
|
1913
|
-
save: bool,
|
|
1914
|
-
job_id: Optional[str],
|
|
1915
|
-
) -> tuple[list[str], subprocess.Popen]:
|
|
1916
|
-
try:
|
|
1917
|
-
query_script_compiled = self.compile_query_script(query_script)
|
|
1918
|
-
except Exception as exc:
|
|
1919
|
-
raise QueryScriptCompileError(
|
|
1920
|
-
f"Query script failed to compile, reason: {exc}"
|
|
1921
|
-
) from exc
|
|
1922
|
-
envs = dict(envs or os.environ)
|
|
1923
|
-
envs.update(
|
|
1812
|
+
env = dict(env or os.environ)
|
|
1813
|
+
env.update(
|
|
1924
1814
|
{
|
|
1925
1815
|
"DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
|
|
1926
1816
|
"PYTHONPATH": os.getcwd(), # For local imports
|
|
@@ -1929,19 +1819,28 @@ class Catalog:
|
|
|
1929
1819
|
"DATACHAIN_JOB_ID": job_id or "",
|
|
1930
1820
|
},
|
|
1931
1821
|
)
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1822
|
+
popen_kwargs = {}
|
|
1823
|
+
if capture_output:
|
|
1824
|
+
popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
1825
|
+
|
|
1826
|
+
cmd = [python_executable, "-c", query_script_compiled]
|
|
1827
|
+
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # type: ignore[call-overload] # noqa: S603
|
|
1828
|
+
if capture_output:
|
|
1829
|
+
args = (proc.stdout, output_hook)
|
|
1830
|
+
thread = Thread(target=_process_stream, args=args, daemon=True)
|
|
1831
|
+
thread.start()
|
|
1832
|
+
thread.join() # wait for the reader thread
|
|
1833
|
+
|
|
1834
|
+
if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
|
|
1835
|
+
raise QueryScriptCancelError(
|
|
1836
|
+
"Query script was canceled by user",
|
|
1837
|
+
return_code=proc.returncode,
|
|
1838
|
+
)
|
|
1839
|
+
if proc.returncode:
|
|
1840
|
+
raise QueryScriptRunError(
|
|
1841
|
+
f"Query script exited with error code {proc.returncode}",
|
|
1842
|
+
return_code=proc.returncode,
|
|
1843
|
+
)
|
|
1945
1844
|
|
|
1946
1845
|
def cp(
|
|
1947
1846
|
self,
|
|
@@ -2081,8 +1980,6 @@ class Catalog:
|
|
|
2081
1980
|
field_set.add("path")
|
|
2082
1981
|
elif column == "name":
|
|
2083
1982
|
field_set.add("path")
|
|
2084
|
-
elif column == "owner":
|
|
2085
|
-
field_set.add("owner_name")
|
|
2086
1983
|
elif column == "path":
|
|
2087
1984
|
field_set.add("dir_type")
|
|
2088
1985
|
field_set.add("path")
|
datachain/cli.py
CHANGED
|
@@ -24,7 +24,7 @@ logger = logging.getLogger("datachain")
|
|
|
24
24
|
|
|
25
25
|
TTL_HUMAN = "4h"
|
|
26
26
|
TTL_INT = 4 * 60 * 60
|
|
27
|
-
FIND_COLUMNS = ["du", "name", "
|
|
27
|
+
FIND_COLUMNS = ["du", "name", "path", "size", "type"]
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]:
|
|
@@ -579,9 +579,8 @@ def _node_data_to_ls_values(row, long_format=False):
|
|
|
579
579
|
value = name + ending
|
|
580
580
|
if long_format:
|
|
581
581
|
last_modified = row[2]
|
|
582
|
-
owner_name = row[3]
|
|
583
582
|
timestamp = last_modified if not is_dir else None
|
|
584
|
-
return long_line_str(value, timestamp
|
|
583
|
+
return long_line_str(value, timestamp)
|
|
585
584
|
return value
|
|
586
585
|
|
|
587
586
|
|
|
@@ -599,7 +598,7 @@ def _ls_urls_flat(
|
|
|
599
598
|
if client_cls.is_root_url(source):
|
|
600
599
|
buckets = client_cls.ls_buckets(**catalog.client_config)
|
|
601
600
|
if long:
|
|
602
|
-
values = (long_line_str(b.name, b.created
|
|
601
|
+
values = (long_line_str(b.name, b.created) for b in buckets)
|
|
603
602
|
else:
|
|
604
603
|
values = (b.name for b in buckets)
|
|
605
604
|
yield source, values
|
|
@@ -607,7 +606,7 @@ def _ls_urls_flat(
|
|
|
607
606
|
found = False
|
|
608
607
|
fields = ["name", "dir_type"]
|
|
609
608
|
if long:
|
|
610
|
-
fields.
|
|
609
|
+
fields.append("last_modified")
|
|
611
610
|
for data_source, results in catalog.ls([source], fields=fields, **kwargs):
|
|
612
611
|
values = (_node_data_to_ls_values(r, long) for r in results)
|
|
613
612
|
found = True
|
|
@@ -683,7 +682,6 @@ def ls_remote(
|
|
|
683
682
|
entry = long_line_str(
|
|
684
683
|
row["name"] + ("/" if row["dir_type"] else ""),
|
|
685
684
|
row["last_modified"],
|
|
686
|
-
row["owner_name"],
|
|
687
685
|
)
|
|
688
686
|
print(format_ls_entry(entry))
|
|
689
687
|
else:
|
datachain/client/fsspec.py
CHANGED
datachain/client/s3.py
CHANGED
|
@@ -119,8 +119,6 @@ class ClientS3(Client):
|
|
|
119
119
|
is_latest=v.get("IsLatest", True),
|
|
120
120
|
last_modified=v.get("LastModified", ""),
|
|
121
121
|
size=v["Size"],
|
|
122
|
-
owner_name=v.get("Owner", {}).get("DisplayName", ""),
|
|
123
|
-
owner_id=v.get("Owner", {}).get("ID", ""),
|
|
124
122
|
)
|
|
125
123
|
|
|
126
124
|
async def _fetch_dir(
|
|
@@ -165,8 +163,6 @@ class ClientS3(Client):
|
|
|
165
163
|
is_latest=v.get("IsLatest", True),
|
|
166
164
|
last_modified=v.get("LastModified", ""),
|
|
167
165
|
size=v["size"],
|
|
168
|
-
owner_name=v.get("Owner", {}).get("DisplayName", ""),
|
|
169
|
-
owner_id=v.get("Owner", {}).get("ID", ""),
|
|
170
166
|
)
|
|
171
167
|
|
|
172
168
|
def info_to_file(self, v: dict[str, Any], path: str) -> File:
|
datachain/data_storage/schema.py
CHANGED
|
@@ -10,9 +10,8 @@ from typing import (
|
|
|
10
10
|
|
|
11
11
|
import sqlalchemy as sa
|
|
12
12
|
from sqlalchemy.sql import func as f
|
|
13
|
-
from sqlalchemy.sql.expression import null, true
|
|
13
|
+
from sqlalchemy.sql.expression import false, null, true
|
|
14
14
|
|
|
15
|
-
from datachain.node import DirType
|
|
16
15
|
from datachain.sql.functions import path
|
|
17
16
|
from datachain.sql.types import Int, SQLType, UInt64
|
|
18
17
|
|
|
@@ -81,8 +80,7 @@ class DirExpansion:
|
|
|
81
80
|
def base_select(q):
|
|
82
81
|
return sa.select(
|
|
83
82
|
q.c.sys__id,
|
|
84
|
-
|
|
85
|
-
(q.c.dir_type == DirType.DIR).label("is_dir"),
|
|
83
|
+
false().label("is_dir"),
|
|
86
84
|
q.c.source,
|
|
87
85
|
q.c.path,
|
|
88
86
|
q.c.version,
|
|
@@ -94,7 +92,6 @@ class DirExpansion:
|
|
|
94
92
|
return (
|
|
95
93
|
sa.select(
|
|
96
94
|
f.min(q.c.sys__id).label("sys__id"),
|
|
97
|
-
q.c.vtype,
|
|
98
95
|
q.c.is_dir,
|
|
99
96
|
q.c.source,
|
|
100
97
|
q.c.path,
|
|
@@ -102,8 +99,8 @@ class DirExpansion:
|
|
|
102
99
|
f.max(q.c.location).label("location"),
|
|
103
100
|
)
|
|
104
101
|
.select_from(q)
|
|
105
|
-
.group_by(q.c.source, q.c.path, q.c.
|
|
106
|
-
.order_by(q.c.source, q.c.path, q.c.
|
|
102
|
+
.group_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
|
|
103
|
+
.order_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
|
|
107
104
|
)
|
|
108
105
|
|
|
109
106
|
@classmethod
|
|
@@ -113,7 +110,6 @@ class DirExpansion:
|
|
|
113
110
|
q = q.union_all(
|
|
114
111
|
sa.select(
|
|
115
112
|
sa.literal(-1).label("sys__id"),
|
|
116
|
-
sa.literal("").label("vtype"),
|
|
117
113
|
true().label("is_dir"),
|
|
118
114
|
q.c.source,
|
|
119
115
|
parent.label("path"),
|
|
@@ -28,7 +28,6 @@ from datachain.utils import sql_escape_like
|
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from sqlalchemy.sql._typing import _ColumnsClauseArgument
|
|
31
|
-
from sqlalchemy.sql.elements import ColumnElement
|
|
32
31
|
from sqlalchemy.sql.selectable import Select
|
|
33
32
|
from sqlalchemy.types import TypeEngine
|
|
34
33
|
|
|
@@ -341,9 +340,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
341
340
|
|
|
342
341
|
column_objects = [dr.c[c] for c in column_names]
|
|
343
342
|
# include all object types - file, tar archive, tar file (subobject)
|
|
344
|
-
select_query = dr.select(*column_objects).where(
|
|
345
|
-
dr.c.dir_type.in_(DirTypeGroup.FILE) & (dr.c.is_latest == true())
|
|
346
|
-
)
|
|
343
|
+
select_query = dr.select(*column_objects).where(dr.c.is_latest == true())
|
|
347
344
|
if path is None:
|
|
348
345
|
return select_query
|
|
349
346
|
if recursive:
|
|
@@ -420,7 +417,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
420
417
|
"""
|
|
421
418
|
|
|
422
419
|
def _prepare_entry(entry: Entry):
|
|
423
|
-
assert entry.dir_type is not None
|
|
424
420
|
return attrs.asdict(entry) | {"source": uri}
|
|
425
421
|
|
|
426
422
|
return [_prepare_entry(e) for e in entries]
|
|
@@ -440,7 +436,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
440
436
|
"""Inserts dataset rows directly into dataset table"""
|
|
441
437
|
|
|
442
438
|
@abstractmethod
|
|
443
|
-
def instr(self, source, target) ->
|
|
439
|
+
def instr(self, source, target) -> sa.ColumnElement:
|
|
444
440
|
"""
|
|
445
441
|
Return SQLAlchemy Boolean determining if a target substring is present in
|
|
446
442
|
source string column
|
|
@@ -500,7 +496,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
500
496
|
c = query.selected_columns
|
|
501
497
|
q = query.where(c.dir_type.in_(file_group))
|
|
502
498
|
if not include_subobjects:
|
|
503
|
-
q = q.where(c.
|
|
499
|
+
q = q.where((c.location == "") | (c.location.is_(None)))
|
|
504
500
|
return q
|
|
505
501
|
|
|
506
502
|
def get_nodes(self, query) -> Iterator[Node]:
|
|
@@ -624,8 +620,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
624
620
|
|
|
625
621
|
return sa.select(
|
|
626
622
|
de.c.sys__id,
|
|
627
|
-
|
|
628
|
-
case((de.c.is_dir == true(), DirType.DIR), else_=dr.c.dir_type).label(
|
|
623
|
+
case((de.c.is_dir == true(), DirType.DIR), else_=DirType.FILE).label(
|
|
629
624
|
"dir_type"
|
|
630
625
|
),
|
|
631
626
|
de.c.path,
|
|
@@ -634,8 +629,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
634
629
|
with_default(dr.c.is_latest),
|
|
635
630
|
dr.c.last_modified,
|
|
636
631
|
with_default(dr.c.size),
|
|
637
|
-
with_default(dr.c.owner_name),
|
|
638
|
-
with_default(dr.c.owner_id),
|
|
639
632
|
with_default(dr.c.sys__rand),
|
|
640
633
|
dr.c.location,
|
|
641
634
|
de.c.source,
|
|
@@ -650,7 +643,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
650
643
|
query = dr.select().where(
|
|
651
644
|
self.path_expr(dr) == path,
|
|
652
645
|
dr.c.is_latest == true(),
|
|
653
|
-
dr.c.dir_type != DirType.DIR,
|
|
654
646
|
)
|
|
655
647
|
row = next(self.db.execute(query), None)
|
|
656
648
|
if row is not None:
|
|
@@ -660,7 +652,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
660
652
|
dr.select()
|
|
661
653
|
.where(
|
|
662
654
|
dr.c.is_latest == true(),
|
|
663
|
-
dr.c.dir_type != DirType.DIR,
|
|
664
655
|
dr.c.path.startswith(path),
|
|
665
656
|
)
|
|
666
657
|
.exists()
|
|
@@ -761,13 +752,11 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
761
752
|
|
|
762
753
|
sub_glob = posixpath.join(path, "*")
|
|
763
754
|
dr = dataset_rows
|
|
764
|
-
selections = [
|
|
755
|
+
selections: list[sa.ColumnElement] = [
|
|
765
756
|
func.sum(dr.c.size),
|
|
766
757
|
]
|
|
767
758
|
if count_files:
|
|
768
|
-
selections.append(
|
|
769
|
-
func.sum(dr.c.dir_type.in_(DirTypeGroup.FILE)),
|
|
770
|
-
)
|
|
759
|
+
selections.append(func.count())
|
|
771
760
|
results = next(
|
|
772
761
|
self.db.execute(
|
|
773
762
|
dr.select(*selections).where(
|
datachain/error.py
CHANGED
datachain/lib/clip.py
CHANGED
|
@@ -18,7 +18,7 @@ def _get_encoder(model: Any, type: Literal["image", "text"]) -> Callable:
|
|
|
18
18
|
hasattr(model, method_name) and inspect.ismethod(getattr(model, method_name))
|
|
19
19
|
):
|
|
20
20
|
method = getattr(model, method_name)
|
|
21
|
-
return lambda x: method(torch.
|
|
21
|
+
return lambda x: method(torch.as_tensor(x).clone().detach())
|
|
22
22
|
|
|
23
23
|
# Check for model from clip or open_clip library
|
|
24
24
|
method_name = f"encode_{type}"
|
datachain/lib/dc.py
CHANGED
|
@@ -234,7 +234,6 @@ class DataChain(DatasetQuery):
|
|
|
234
234
|
DEFAULT_FILE_RECORD: ClassVar[dict] = {
|
|
235
235
|
"source": "",
|
|
236
236
|
"path": "",
|
|
237
|
-
"vtype": "",
|
|
238
237
|
"size": 0,
|
|
239
238
|
}
|
|
240
239
|
|
|
@@ -415,7 +414,7 @@ class DataChain(DatasetQuery):
|
|
|
415
414
|
.save(list_dataset_name, listing=True)
|
|
416
415
|
)
|
|
417
416
|
|
|
418
|
-
dc = cls.from_dataset(list_dataset_name, session=session)
|
|
417
|
+
dc = cls.from_dataset(list_dataset_name, session=session, settings=settings)
|
|
419
418
|
dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type})
|
|
420
419
|
|
|
421
420
|
return ls(dc, list_path, recursive=recursive, object_name=object_name)
|
|
@@ -426,6 +425,7 @@ class DataChain(DatasetQuery):
|
|
|
426
425
|
name: str,
|
|
427
426
|
version: Optional[int] = None,
|
|
428
427
|
session: Optional[Session] = None,
|
|
428
|
+
settings: Optional[dict] = None,
|
|
429
429
|
) -> "DataChain":
|
|
430
430
|
"""Get data from a saved Dataset. It returns the chain itself.
|
|
431
431
|
|
|
@@ -438,7 +438,7 @@ class DataChain(DatasetQuery):
|
|
|
438
438
|
chain = DataChain.from_dataset("my_cats")
|
|
439
439
|
```
|
|
440
440
|
"""
|
|
441
|
-
return DataChain(name=name, version=version, session=session)
|
|
441
|
+
return DataChain(name=name, version=version, session=session, settings=settings)
|
|
442
442
|
|
|
443
443
|
@classmethod
|
|
444
444
|
def from_json(
|
|
@@ -1622,6 +1622,8 @@ class DataChain(DatasetQuery):
|
|
|
1622
1622
|
model_name: str = "",
|
|
1623
1623
|
source: bool = True,
|
|
1624
1624
|
nrows=None,
|
|
1625
|
+
session: Optional[Session] = None,
|
|
1626
|
+
settings: Optional[dict] = None,
|
|
1625
1627
|
**kwargs,
|
|
1626
1628
|
) -> "DataChain":
|
|
1627
1629
|
"""Generate chain from csv files.
|
|
@@ -1638,6 +1640,8 @@ class DataChain(DatasetQuery):
|
|
|
1638
1640
|
model_name : Generated model name.
|
|
1639
1641
|
source : Whether to include info about the source file.
|
|
1640
1642
|
nrows : Optional row limit.
|
|
1643
|
+
session : Session to use for the chain.
|
|
1644
|
+
settings : Settings to use for the chain.
|
|
1641
1645
|
|
|
1642
1646
|
Example:
|
|
1643
1647
|
Reading a csv file:
|
|
@@ -1654,7 +1658,9 @@ class DataChain(DatasetQuery):
|
|
|
1654
1658
|
from pyarrow.csv import ConvertOptions, ParseOptions, ReadOptions
|
|
1655
1659
|
from pyarrow.dataset import CsvFileFormat
|
|
1656
1660
|
|
|
1657
|
-
chain = DataChain.from_storage(
|
|
1661
|
+
chain = DataChain.from_storage(
|
|
1662
|
+
path, session=session, settings=settings, **kwargs
|
|
1663
|
+
)
|
|
1658
1664
|
|
|
1659
1665
|
column_names = None
|
|
1660
1666
|
if not header:
|
|
@@ -1701,6 +1707,8 @@ class DataChain(DatasetQuery):
|
|
|
1701
1707
|
object_name: str = "",
|
|
1702
1708
|
model_name: str = "",
|
|
1703
1709
|
source: bool = True,
|
|
1710
|
+
session: Optional[Session] = None,
|
|
1711
|
+
settings: Optional[dict] = None,
|
|
1704
1712
|
**kwargs,
|
|
1705
1713
|
) -> "DataChain":
|
|
1706
1714
|
"""Generate chain from parquet files.
|
|
@@ -1713,6 +1721,8 @@ class DataChain(DatasetQuery):
|
|
|
1713
1721
|
object_name : Created object column name.
|
|
1714
1722
|
model_name : Generated model name.
|
|
1715
1723
|
source : Whether to include info about the source file.
|
|
1724
|
+
session : Session to use for the chain.
|
|
1725
|
+
settings : Settings to use for the chain.
|
|
1716
1726
|
|
|
1717
1727
|
Example:
|
|
1718
1728
|
Reading a single file:
|
|
@@ -1725,7 +1735,9 @@ class DataChain(DatasetQuery):
|
|
|
1725
1735
|
dc = DataChain.from_parquet("s3://mybucket/dir")
|
|
1726
1736
|
```
|
|
1727
1737
|
"""
|
|
1728
|
-
chain = DataChain.from_storage(
|
|
1738
|
+
chain = DataChain.from_storage(
|
|
1739
|
+
path, session=session, settings=settings, **kwargs
|
|
1740
|
+
)
|
|
1729
1741
|
return chain.parse_tabular(
|
|
1730
1742
|
output=output,
|
|
1731
1743
|
object_name=object_name,
|
datachain/lib/file.py
CHANGED
|
@@ -118,7 +118,6 @@ class File(DataModel):
|
|
|
118
118
|
is_latest: bool = Field(default=True)
|
|
119
119
|
last_modified: datetime = Field(default=TIME_ZERO)
|
|
120
120
|
location: Optional[Union[dict, list[dict]]] = Field(default=None)
|
|
121
|
-
vtype: str = Field(default="")
|
|
122
121
|
|
|
123
122
|
_datachain_column_types: ClassVar[dict[str, Any]] = {
|
|
124
123
|
"source": String,
|
|
@@ -129,7 +128,6 @@ class File(DataModel):
|
|
|
129
128
|
"is_latest": Boolean,
|
|
130
129
|
"last_modified": DateTime,
|
|
131
130
|
"location": JSON,
|
|
132
|
-
"vtype": String,
|
|
133
131
|
}
|
|
134
132
|
|
|
135
133
|
_unique_id_keys: ClassVar[list[str]] = [
|
|
@@ -139,7 +137,6 @@ class File(DataModel):
|
|
|
139
137
|
"etag",
|
|
140
138
|
"version",
|
|
141
139
|
"is_latest",
|
|
142
|
-
"vtype",
|
|
143
140
|
"location",
|
|
144
141
|
"last_modified",
|
|
145
142
|
]
|
|
@@ -195,14 +192,15 @@ class File(DataModel):
|
|
|
195
192
|
with VFileRegistry.resolve(self, self.location) as f: # type: ignore[arg-type]
|
|
196
193
|
yield f
|
|
197
194
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
195
|
+
else:
|
|
196
|
+
uid = self.get_uid()
|
|
197
|
+
client = self._catalog.get_client(self.source)
|
|
198
|
+
if self._caching_enabled:
|
|
199
|
+
client.download(uid, callback=self._download_cb)
|
|
200
|
+
with client.open_object(
|
|
201
|
+
uid, use_cache=self._caching_enabled, cb=self._download_cb
|
|
202
|
+
) as f:
|
|
203
|
+
yield io.TextIOWrapper(f) if mode == "r" else f
|
|
206
204
|
|
|
207
205
|
def read(self, length: int = -1):
|
|
208
206
|
"""Returns file contents."""
|
datachain/lib/image.py
CHANGED
|
@@ -34,7 +34,7 @@ def convert_image(
|
|
|
34
34
|
from transformers.image_processing_utils import BaseImageProcessor
|
|
35
35
|
|
|
36
36
|
if isinstance(transform, BaseImageProcessor):
|
|
37
|
-
img = torch.
|
|
37
|
+
img = torch.as_tensor(img.pixel_values[0]).clone().detach() # type: ignore[assignment,attr-defined]
|
|
38
38
|
except ImportError:
|
|
39
39
|
pass
|
|
40
40
|
if device:
|
datachain/lib/meta_formats.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
# pip install datamodel-code-generator
|
|
2
|
-
# pip install jmespath
|
|
3
|
-
#
|
|
4
1
|
import csv
|
|
5
2
|
import json
|
|
6
3
|
import tempfile
|
|
7
4
|
import uuid
|
|
8
5
|
from collections.abc import Iterator
|
|
9
6
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
7
|
+
from typing import Callable
|
|
11
8
|
|
|
12
9
|
import datamodel_code_generator
|
|
13
10
|
import jmespath as jsp
|
|
@@ -85,7 +82,6 @@ def read_schema(source_file, data_type="csv", expr=None, model_name=None):
|
|
|
85
82
|
use_standard_collections=True,
|
|
86
83
|
)
|
|
87
84
|
epilogue = f"""
|
|
88
|
-
{model_name}.model_rebuild()
|
|
89
85
|
DataModel.register({model_name})
|
|
90
86
|
spec = {model_name}
|
|
91
87
|
"""
|
|
@@ -122,9 +118,9 @@ def read_meta( # noqa: C901
|
|
|
122
118
|
print(f"{model_output}")
|
|
123
119
|
# Below 'spec' should be a dynamically converted DataModel from Pydantic
|
|
124
120
|
if not spec:
|
|
125
|
-
|
|
126
|
-
exec(model_output,
|
|
127
|
-
spec =
|
|
121
|
+
gl = globals()
|
|
122
|
+
exec(model_output, gl) # type: ignore[arg-type] # noqa: S102
|
|
123
|
+
spec = gl["spec"]
|
|
128
124
|
|
|
129
125
|
if not (spec) and not (schema_from):
|
|
130
126
|
raise ValueError(
|
datachain/lib/model_store.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import logging
|
|
2
3
|
from typing import ClassVar, Optional
|
|
3
4
|
|
|
@@ -69,7 +70,11 @@ class ModelStore:
|
|
|
69
70
|
|
|
70
71
|
@staticmethod
|
|
71
72
|
def is_pydantic(val):
|
|
72
|
-
return
|
|
73
|
+
return (
|
|
74
|
+
not hasattr(val, "__origin__")
|
|
75
|
+
and inspect.isclass(val)
|
|
76
|
+
and issubclass(val, BaseModel)
|
|
77
|
+
)
|
|
73
78
|
|
|
74
79
|
@staticmethod
|
|
75
80
|
def to_pydantic(val) -> Optional[type[BaseModel]]:
|
datachain/lib/text.py
CHANGED
|
@@ -33,7 +33,7 @@ def convert_text(
|
|
|
33
33
|
res = tokenizer(text)
|
|
34
34
|
|
|
35
35
|
tokens = res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res
|
|
36
|
-
tokens = torch.
|
|
36
|
+
tokens = torch.as_tensor(tokens).clone().detach()
|
|
37
37
|
if device:
|
|
38
38
|
tokens = tokens.to(device)
|
|
39
39
|
|
datachain/lib/webdataset.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import json
|
|
3
3
|
import tarfile
|
|
4
|
+
import warnings
|
|
4
5
|
from collections.abc import Iterator, Sequence
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from typing import (
|
|
@@ -19,6 +20,18 @@ from datachain.lib.data_model import DataModel
|
|
|
19
20
|
from datachain.lib.file import File, TarVFile
|
|
20
21
|
from datachain.lib.utils import DataChainError
|
|
21
22
|
|
|
23
|
+
# The `json` method of the Pydantic `BaseModel` class has been deprecated
|
|
24
|
+
# and will be removed in Pydantic v3. For more details, see:
|
|
25
|
+
# https://github.com/pydantic/pydantic/issues/10033
|
|
26
|
+
# Until then, we can ignore the warning.
|
|
27
|
+
warnings.filterwarnings(
|
|
28
|
+
"ignore",
|
|
29
|
+
category=UserWarning,
|
|
30
|
+
message=(
|
|
31
|
+
'Field name "json" in "WDSAllFile" shadows an attribute in parent "WDSBasic"'
|
|
32
|
+
),
|
|
33
|
+
)
|
|
34
|
+
|
|
22
35
|
|
|
23
36
|
class WDSError(DataChainError):
|
|
24
37
|
def __init__(self, tar_stream, message: str):
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
@@ -7,6 +8,18 @@ from pydantic import BaseModel, Field
|
|
|
7
8
|
from datachain.lib.file import File
|
|
8
9
|
from datachain.lib.webdataset import WDSBasic, WDSReadableSubclass
|
|
9
10
|
|
|
11
|
+
# The `json` method of the Pydantic `BaseModel` class has been deprecated
|
|
12
|
+
# and will be removed in Pydantic v3. For more details, see:
|
|
13
|
+
# https://github.com/pydantic/pydantic/issues/10033
|
|
14
|
+
# Until then, we can ignore the warning.
|
|
15
|
+
warnings.filterwarnings(
|
|
16
|
+
"ignore",
|
|
17
|
+
category=UserWarning,
|
|
18
|
+
message=(
|
|
19
|
+
'Field name "json" in "WDSLaion" shadows an attribute in parent "WDSBasic"'
|
|
20
|
+
),
|
|
21
|
+
)
|
|
22
|
+
|
|
10
23
|
|
|
11
24
|
class Laion(WDSReadableSubclass):
|
|
12
25
|
uid: str = Field(default="")
|
datachain/listing.py
CHANGED
|
@@ -104,7 +104,7 @@ class Listing:
|
|
|
104
104
|
return self.warehouse.get_node_by_path(self.dataset_rows, path)
|
|
105
105
|
|
|
106
106
|
def ls_path(self, node, fields):
|
|
107
|
-
if node.
|
|
107
|
+
if node.location or node.dir_type == DirType.TAR_ARCHIVE:
|
|
108
108
|
return self.warehouse.select_node_fields_by_parent_path_tar(
|
|
109
109
|
self.dataset_rows, node.path, fields
|
|
110
110
|
)
|
|
@@ -235,7 +235,7 @@ class Listing:
|
|
|
235
235
|
return self.warehouse.size(self.dataset_rows, node, count_files)
|
|
236
236
|
|
|
237
237
|
def subtree_files(self, node: Node, sort=None):
|
|
238
|
-
if node.dir_type == DirType.TAR_ARCHIVE or node.
|
|
238
|
+
if node.dir_type == DirType.TAR_ARCHIVE or node.location:
|
|
239
239
|
include_subobjects = True
|
|
240
240
|
else:
|
|
241
241
|
include_subobjects = False
|
datachain/node.py
CHANGED
|
@@ -49,18 +49,15 @@ class DirTypeGroup:
|
|
|
49
49
|
class Node:
|
|
50
50
|
sys__id: int = 0
|
|
51
51
|
sys__rand: int = 0
|
|
52
|
-
vtype: str = ""
|
|
53
|
-
dir_type: Optional[int] = None
|
|
54
52
|
path: str = ""
|
|
55
53
|
etag: str = ""
|
|
56
54
|
version: Optional[str] = None
|
|
57
55
|
is_latest: bool = True
|
|
58
56
|
last_modified: Optional[datetime] = None
|
|
59
57
|
size: int = 0
|
|
60
|
-
owner_name: str = ""
|
|
61
|
-
owner_id: str = ""
|
|
62
58
|
location: Optional[str] = None
|
|
63
59
|
source: StorageURI = StorageURI("")
|
|
60
|
+
dir_type: int = DirType.FILE
|
|
64
61
|
|
|
65
62
|
@property
|
|
66
63
|
def is_dir(self) -> bool:
|
|
@@ -113,7 +110,6 @@ class Node:
|
|
|
113
110
|
version=self.version or "",
|
|
114
111
|
etag=self.etag,
|
|
115
112
|
is_latest=self.is_latest,
|
|
116
|
-
vtype=self.vtype,
|
|
117
113
|
location=self.location,
|
|
118
114
|
last_modified=self.last_modified or TIME_ZERO,
|
|
119
115
|
)
|
|
@@ -145,38 +141,20 @@ class Node:
|
|
|
145
141
|
|
|
146
142
|
@attrs.define
|
|
147
143
|
class Entry:
|
|
148
|
-
vtype: str = ""
|
|
149
|
-
dir_type: Optional[int] = None
|
|
150
144
|
path: str = ""
|
|
151
145
|
etag: str = ""
|
|
152
146
|
version: str = ""
|
|
153
147
|
is_latest: bool = True
|
|
154
148
|
last_modified: Optional[datetime] = None
|
|
155
149
|
size: int = 0
|
|
156
|
-
owner_name: str = ""
|
|
157
|
-
owner_id: str = ""
|
|
158
150
|
location: Optional[str] = None
|
|
159
151
|
|
|
160
|
-
@property
|
|
161
|
-
def is_dir(self) -> bool:
|
|
162
|
-
return self.dir_type == DirType.DIR
|
|
163
|
-
|
|
164
|
-
@classmethod
|
|
165
|
-
def from_dir(cls, path: str, **kwargs) -> "Entry":
|
|
166
|
-
return cls(dir_type=DirType.DIR, path=path, **kwargs)
|
|
167
|
-
|
|
168
152
|
@classmethod
|
|
169
153
|
def from_file(cls, path: str, **kwargs) -> "Entry":
|
|
170
|
-
return cls(
|
|
171
|
-
|
|
172
|
-
@classmethod
|
|
173
|
-
def root(cls):
|
|
174
|
-
return cls(dir_type=DirType.DIR)
|
|
154
|
+
return cls(path=path, **kwargs)
|
|
175
155
|
|
|
176
156
|
@property
|
|
177
157
|
def full_path(self) -> str:
|
|
178
|
-
if self.is_dir and self.path:
|
|
179
|
-
return self.path + "/"
|
|
180
158
|
return self.path
|
|
181
159
|
|
|
182
160
|
@property
|
|
@@ -229,9 +207,9 @@ class NodeWithPath:
|
|
|
229
207
|
TIME_FMT = "%Y-%m-%d %H:%M"
|
|
230
208
|
|
|
231
209
|
|
|
232
|
-
def long_line_str(name: str, timestamp: Optional[datetime]
|
|
210
|
+
def long_line_str(name: str, timestamp: Optional[datetime]) -> str:
|
|
233
211
|
if timestamp is None:
|
|
234
212
|
time = "-"
|
|
235
213
|
else:
|
|
236
214
|
time = timestamp.strftime(TIME_FMT)
|
|
237
|
-
return f"{
|
|
215
|
+
return f"{time: <19} {name}"
|
datachain/query/builtins.py
CHANGED
|
@@ -22,10 +22,6 @@ def load_tar(raw):
|
|
|
22
22
|
C.source,
|
|
23
23
|
C.path,
|
|
24
24
|
C.size,
|
|
25
|
-
C.vtype,
|
|
26
|
-
C.dir_type,
|
|
27
|
-
C.owner_name,
|
|
28
|
-
C.owner_id,
|
|
29
25
|
C.is_latest,
|
|
30
26
|
C.last_modified,
|
|
31
27
|
C.version,
|
|
@@ -38,10 +34,6 @@ def index_tar(
|
|
|
38
34
|
source,
|
|
39
35
|
parent_path,
|
|
40
36
|
size,
|
|
41
|
-
vtype,
|
|
42
|
-
dir_type,
|
|
43
|
-
owner_name,
|
|
44
|
-
owner_id,
|
|
45
37
|
is_latest,
|
|
46
38
|
last_modified,
|
|
47
39
|
version,
|
|
@@ -53,10 +45,6 @@ def index_tar(
|
|
|
53
45
|
source=source,
|
|
54
46
|
path=parent_path,
|
|
55
47
|
size=size,
|
|
56
|
-
vtype=vtype,
|
|
57
|
-
dir_type=dir_type,
|
|
58
|
-
owner_name=owner_name,
|
|
59
|
-
owner_id=owner_id,
|
|
60
48
|
is_latest=bool(is_latest),
|
|
61
49
|
last_modified=last_modified,
|
|
62
50
|
version=version,
|
|
@@ -70,7 +58,6 @@ def index_tar(
|
|
|
70
58
|
source=source,
|
|
71
59
|
path=full_path,
|
|
72
60
|
size=info.size,
|
|
73
|
-
vtype="tar",
|
|
74
61
|
location={
|
|
75
62
|
"vtype": "tar",
|
|
76
63
|
"offset": info.offset_data,
|
|
@@ -81,7 +68,6 @@ def index_tar(
|
|
|
81
68
|
"version": version,
|
|
82
69
|
"size": size,
|
|
83
70
|
"etag": etag,
|
|
84
|
-
"vtype": "",
|
|
85
71
|
"location": None,
|
|
86
72
|
},
|
|
87
73
|
},
|
datachain/query/schema.py
CHANGED
|
@@ -9,7 +9,7 @@ import attrs
|
|
|
9
9
|
import sqlalchemy as sa
|
|
10
10
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
11
11
|
|
|
12
|
-
from datachain.sql.types import JSON, Boolean, DateTime,
|
|
12
|
+
from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from datachain.catalog import Catalog
|
|
@@ -222,10 +222,6 @@ class DatasetRow:
|
|
|
222
222
|
"path": String,
|
|
223
223
|
"size": Int64,
|
|
224
224
|
"location": JSON,
|
|
225
|
-
"vtype": String,
|
|
226
|
-
"dir_type": Int,
|
|
227
|
-
"owner_name": String,
|
|
228
|
-
"owner_id": String,
|
|
229
225
|
"is_latest": Boolean,
|
|
230
226
|
"last_modified": DateTime,
|
|
231
227
|
"version": String,
|
|
@@ -238,10 +234,6 @@ class DatasetRow:
|
|
|
238
234
|
source: str = "",
|
|
239
235
|
size: int = 0,
|
|
240
236
|
location: Optional[dict[str, Any]] = None,
|
|
241
|
-
vtype: str = "",
|
|
242
|
-
dir_type: int = 0,
|
|
243
|
-
owner_name: str = "",
|
|
244
|
-
owner_id: str = "",
|
|
245
237
|
is_latest: bool = True,
|
|
246
238
|
last_modified: Optional[datetime] = None,
|
|
247
239
|
version: str = "",
|
|
@@ -251,10 +243,7 @@ class DatasetRow:
|
|
|
251
243
|
str,
|
|
252
244
|
int,
|
|
253
245
|
Optional[str],
|
|
254
|
-
str,
|
|
255
246
|
int,
|
|
256
|
-
str,
|
|
257
|
-
str,
|
|
258
247
|
bool,
|
|
259
248
|
datetime,
|
|
260
249
|
str,
|
|
@@ -271,10 +260,6 @@ class DatasetRow:
|
|
|
271
260
|
path,
|
|
272
261
|
size,
|
|
273
262
|
location,
|
|
274
|
-
vtype,
|
|
275
|
-
dir_type,
|
|
276
|
-
owner_name,
|
|
277
|
-
owner_id,
|
|
278
263
|
is_latest,
|
|
279
264
|
last_modified,
|
|
280
265
|
version,
|
datachain/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.13
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -80,7 +80,6 @@ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
|
|
|
80
80
|
Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
|
|
81
81
|
Requires-Dist: pytest-servers[all] >=0.5.5 ; extra == 'tests'
|
|
82
82
|
Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
|
|
83
|
-
Requires-Dist: pytest-asyncio >=0.23.2 ; extra == 'tests'
|
|
84
83
|
Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
|
|
85
84
|
Requires-Dist: virtualenv ; extra == 'tests'
|
|
86
85
|
Requires-Dist: dulwich ; extra == 'tests'
|
|
@@ -96,12 +95,14 @@ Requires-Dist: transformers >=4.36.0 ; extra == 'torch'
|
|
|
96
95
|
Provides-Extra: vector
|
|
97
96
|
Requires-Dist: usearch ; extra == 'vector'
|
|
98
97
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
================
|
|
99
|
+
|logo| DataChain
|
|
100
|
+
================
|
|
102
101
|
|
|
103
102
|
|PyPI| |Python Version| |Codecov| |Tests|
|
|
104
103
|
|
|
104
|
+
.. |logo| image:: docs/assets/datachain.svg
|
|
105
|
+
:height: 24
|
|
105
106
|
.. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
|
|
106
107
|
:target: https://pypi.org/project/datachain/
|
|
107
108
|
:alt: PyPI
|
|
@@ -115,8 +116,6 @@ Requires-Dist: usearch ; extra == 'vector'
|
|
|
115
116
|
:target: https://github.com/iterative/datachain/actions/workflows/tests.yml
|
|
116
117
|
:alt: Tests
|
|
117
118
|
|
|
118
|
-
----------------
|
|
119
|
-
|
|
120
119
|
DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
|
|
121
120
|
It is made to organize your unstructured data into datasets and wrangle it at scale on
|
|
122
121
|
your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
|
|
@@ -1,65 +1,65 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=biF8M8fQujtj5xs0VLi8S16eBtzG6kceWlO_NILbCsg,8197
|
|
4
|
-
datachain/cache.py,sha256=
|
|
5
|
-
datachain/cli.py,sha256=
|
|
4
|
+
datachain/cache.py,sha256=WP-ktH_bRn3w2g1JOOQ7rCPsZyR4OM6K1Kb7yZsSSns,4056
|
|
5
|
+
datachain/cli.py,sha256=alMjnoBUBLvBSMBR51N09rA_aUEdHJwyxSRogF7VbbA,30891
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
8
8
|
datachain/dataset.py,sha256=EcYjhHg1dxxPbDwSuIxc-mDRDo3v_pYf79fMy4re1oA,14740
|
|
9
|
-
datachain/error.py,sha256=
|
|
9
|
+
datachain/error.py,sha256=OnZ8OaBtDdTZPy8XQiy29SAjqdQArQeorYbP5ju7ldc,1199
|
|
10
10
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
11
|
-
datachain/listing.py,sha256=
|
|
12
|
-
datachain/node.py,sha256=
|
|
11
|
+
datachain/listing.py,sha256=LgL0lV10AzD1v52ajSaJKFnyiq4hNXwQiqaGySWGQsw,8290
|
|
12
|
+
datachain/node.py,sha256=gacKxUPLgJ1ul6LJWz7nylYjUWPbyUY5cqaBFDOnO9E,5756
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
|
|
15
15
|
datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
|
|
16
16
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
|
-
datachain/utils.py,sha256=
|
|
18
|
+
datachain/utils.py,sha256=Z9-lPNvrrAh_VWpzVBJ7L5-Oy_Oo1V0ZW7G0MVDyPK4,13065
|
|
19
19
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
20
|
+
datachain/catalog/catalog.py,sha256=hhLciKHD0dVwniFzUsYORQ72WpnM40QYT0ydoyx1Kvw,69308
|
|
21
21
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
22
22
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
23
23
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
24
24
|
datachain/client/azure.py,sha256=LXSahE0Z6r4dXqpBkKnq3J5fg7N7ymC1lSn-1SoILGc,2687
|
|
25
25
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
26
|
-
datachain/client/fsspec.py,sha256=
|
|
26
|
+
datachain/client/fsspec.py,sha256=Hy3-4HRV-3MozOybqAnF-qL0EoMYFHynpTG_YZphjZE,13298
|
|
27
27
|
datachain/client/gcs.py,sha256=P_E3mhzhXR9mJ_wc3AYZuczzwOJ0-D3J5qhJXeSU-xk,4518
|
|
28
28
|
datachain/client/hf.py,sha256=R-F6Ks6aVM9wSNkIXOkOnZFwsJlfdRwJjymRa78RLjM,1246
|
|
29
29
|
datachain/client/local.py,sha256=H8TNY8pi2kA8y9_f_1XLUjJF66f229qC_b2y4xGkzdU,5300
|
|
30
|
-
datachain/client/s3.py,sha256=
|
|
30
|
+
datachain/client/s3.py,sha256=zs41EvYW1bS_pUxnkCnJILzUJpL2V1jvvVKSN4BKYcc,6326
|
|
31
31
|
datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
|
|
32
32
|
datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
|
|
33
33
|
datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
|
|
34
34
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
35
|
datachain/data_storage/metastore.py,sha256=cHN0xmbUvChyayHHZm3Vqxr87jFqojPSlGBqhTPStlE,54519
|
|
36
|
-
datachain/data_storage/schema.py,sha256=
|
|
36
|
+
datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
|
|
37
37
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
38
|
datachain/data_storage/sqlite.py,sha256=Z4B2KDL4C8Uio2aLMxaKv0t2MoOtCV3bSqWg4X9mTFg,28048
|
|
39
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/warehouse.py,sha256=s5hhVUWrlEopE6eGOqzXHeNtRapK30G8gj0Vkt_HHFQ,32649
|
|
40
40
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
datachain/lib/arrow.py,sha256=dV17oGiknqEW55ogGK_9T0ycNFwd2z-EFOW0AQiR6TU,5840
|
|
42
|
-
datachain/lib/clip.py,sha256=
|
|
42
|
+
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
43
43
|
datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
|
|
44
44
|
datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
|
|
45
|
-
datachain/lib/dc.py,sha256=
|
|
46
|
-
datachain/lib/file.py,sha256=
|
|
45
|
+
datachain/lib/dc.py,sha256=C-sfWRinV8pDK2P6UHLbScOahTlTiVQpoxUUdVllF2k,68710
|
|
46
|
+
datachain/lib/file.py,sha256=rXmyzUFgnLQ4J3CyOCcg-guhzAz4x9Ug595FbNn4Y2E,11398
|
|
47
47
|
datachain/lib/hf.py,sha256=ZiMvgy3DYiklGKZv-w7gevrHOgn3bGfpTlpDPOHCNqs,5336
|
|
48
|
-
datachain/lib/image.py,sha256=
|
|
48
|
+
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
49
49
|
datachain/lib/listing.py,sha256=S9Xn_Saxu4xk3K_01VexkfMZW0INQiATlidt2bzgWKY,3938
|
|
50
50
|
datachain/lib/listing_info.py,sha256=sr5KzCXlCxlPuRmy_pVadD4miLpp5y0btvyaIPcluwI,996
|
|
51
|
-
datachain/lib/meta_formats.py,sha256=
|
|
52
|
-
datachain/lib/model_store.py,sha256=
|
|
51
|
+
datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw,6635
|
|
52
|
+
datachain/lib/model_store.py,sha256=xcrQ69-jcQs716U4UFOSoSKM7EvFIWqxlPhIcE4X7oI,2497
|
|
53
53
|
datachain/lib/pytorch.py,sha256=vK3GbWCy7kunN7ubul6w1hrWmJLja56uTCiMG_7XVQA,5623
|
|
54
54
|
datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
|
|
55
55
|
datachain/lib/signal_schema.py,sha256=hqQLwUmt3w8RLa96MtubK9N2CBXqqTPrUkSRXc0ktt4,20275
|
|
56
|
-
datachain/lib/text.py,sha256=
|
|
56
|
+
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
57
57
|
datachain/lib/udf.py,sha256=nG7DDuPgZ5ZuijwvDoCq-OZMxlDM8vFNzyxMmik0Y1c,11716
|
|
58
58
|
datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
|
|
59
59
|
datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
|
|
60
60
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
|
-
datachain/lib/webdataset.py,sha256=
|
|
62
|
-
datachain/lib/webdataset_laion.py,sha256=
|
|
61
|
+
datachain/lib/webdataset.py,sha256=ZzGLtOUA-QjP4kttGgNqhrioDuDnomWFlsow4fLdezQ,8717
|
|
62
|
+
datachain/lib/webdataset_laion.py,sha256=aGMWeFmeYNK75ewO9JTA11iB1i3QtTzUfenQA5jajfo,2535
|
|
63
63
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
64
|
datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
|
|
65
65
|
datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
|
|
@@ -68,13 +68,13 @@ datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xd
|
|
|
68
68
|
datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
|
|
69
69
|
datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
|
|
70
70
|
datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
|
|
71
|
-
datachain/query/builtins.py,sha256=
|
|
71
|
+
datachain/query/builtins.py,sha256=U6yHPF9bzxqK5iwyqCqbJxo8ggBVx9FtuXxRrQQ0SNM,2244
|
|
72
72
|
datachain/query/dataset.py,sha256=B2EmGOL8gjrdU_WhU88Dj7FsxvxrNeKwe2STXnU9T9E,58369
|
|
73
73
|
datachain/query/dispatch.py,sha256=GBh3EZHDp5AaXxrjOpfrpfsuy7Umnqxu-MAXcK9X3gc,12945
|
|
74
74
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
75
75
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
76
76
|
datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
77
|
-
datachain/query/schema.py,sha256=
|
|
77
|
+
datachain/query/schema.py,sha256=ytlkA1xFAUOia25u8d6pxvxBSRl3uivLuOe2eHaw-qc,7550
|
|
78
78
|
datachain/query/session.py,sha256=UPH5Z4fzCDsvj81ji0e8GA6Mgra3bOAEpVq4htqOtis,4317
|
|
79
79
|
datachain/query/udf.py,sha256=j3NhmKK5rYG5TclcM2Sr0LhS1tmYLMjzMugx9G9iFLM,8100
|
|
80
80
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -96,9 +96,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
|
|
|
96
96
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
97
97
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
98
98
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
99
|
-
datachain-0.3.
|
|
100
|
-
datachain-0.3.
|
|
101
|
-
datachain-0.3.
|
|
102
|
-
datachain-0.3.
|
|
103
|
-
datachain-0.3.
|
|
104
|
-
datachain-0.3.
|
|
99
|
+
datachain-0.3.13.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
100
|
+
datachain-0.3.13.dist-info/METADATA,sha256=pzMOR9LYuLR26Wifk4GPS9Wi1mmqCC5CIBZyA-X5_oo,17073
|
|
101
|
+
datachain-0.3.13.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
102
|
+
datachain-0.3.13.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
103
|
+
datachain-0.3.13.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
104
|
+
datachain-0.3.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|