datachain 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/cli.py CHANGED
@@ -15,8 +15,8 @@ import shtab
15
15
  from datachain import Session, utils
16
16
  from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
17
17
  from datachain.lib.dc import DataChain
18
+ from datachain.studio import process_studio_cli_args
18
19
  from datachain.telemetry import telemetry
19
- from datachain.utils import DataChainDir
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from datachain.catalog import Catalog
@@ -98,6 +98,134 @@ def add_show_args(parser: ArgumentParser) -> None:
98
98
  )
99
99
 
100
100
 
101
+ def add_studio_parser(subparsers, parent_parser) -> None:
102
+ studio_help = "Commands to authenticate DataChain with Iterative Studio"
103
+ studio_description = (
104
+ "Authenticate DataChain with Studio and set the token. "
105
+ "Once this token has been properly configured,\n"
106
+ "DataChain will utilize it for seamlessly sharing datasets\n"
107
+ "and using Studio features from CLI"
108
+ )
109
+
110
+ studio_parser = subparsers.add_parser(
111
+ "studio",
112
+ parents=[parent_parser],
113
+ description=studio_description,
114
+ help=studio_help,
115
+ )
116
+ studio_subparser = studio_parser.add_subparsers(
117
+ dest="cmd",
118
+ help="Use `DataChain studio CMD --help` to display command-specific help.",
119
+ required=True,
120
+ )
121
+
122
+ studio_login_help = "Authenticate DataChain with Studio host"
123
+ studio_login_description = (
124
+ "By default, this command authenticates the DataChain with Studio\n"
125
+ "using default scopes and assigns a random name as the token name."
126
+ )
127
+ login_parser = studio_subparser.add_parser(
128
+ "login",
129
+ parents=[parent_parser],
130
+ description=studio_login_description,
131
+ help=studio_login_help,
132
+ )
133
+
134
+ login_parser.add_argument(
135
+ "-H",
136
+ "--hostname",
137
+ action="store",
138
+ default=None,
139
+ help="The hostname of the Studio instance to authenticate with.",
140
+ )
141
+ login_parser.add_argument(
142
+ "-s",
143
+ "--scopes",
144
+ action="store",
145
+ default=None,
146
+ help="The scopes for the authentication token. ",
147
+ )
148
+
149
+ login_parser.add_argument(
150
+ "-n",
151
+ "--name",
152
+ action="store",
153
+ default=None,
154
+ help="The name of the authentication token. It will be used to\n"
155
+ "identify token shown in Studio profile.",
156
+ )
157
+
158
+ login_parser.add_argument(
159
+ "--no-open",
160
+ action="store_true",
161
+ default=False,
162
+ help="Use authentication flow based on user code.\n"
163
+ "You will be presented with user code to enter in browser.\n"
164
+ "DataChain will also use this if it cannot launch browser on your behalf.",
165
+ )
166
+
167
+ studio_logout_help = "Logout user from Studio"
168
+ studio_logout_description = "This removes the studio token from your global config."
169
+
170
+ studio_subparser.add_parser(
171
+ "logout",
172
+ parents=[parent_parser],
173
+ description=studio_logout_description,
174
+ help=studio_logout_help,
175
+ )
176
+
177
+ studio_team_help = "Set the default team for DataChain"
178
+ studio_team_description = (
179
+ "Set the default team for DataChain to use when interacting with Studio."
180
+ )
181
+
182
+ team_parser = studio_subparser.add_parser(
183
+ "team",
184
+ parents=[parent_parser],
185
+ description=studio_team_description,
186
+ help=studio_team_help,
187
+ )
188
+ team_parser.add_argument(
189
+ "team_name",
190
+ action="store",
191
+ help="The name of the team to set as the default.",
192
+ )
193
+ team_parser.add_argument(
194
+ "--global",
195
+ action="store_true",
196
+ default=False,
197
+ help="Set the team globally for all DataChain projects.",
198
+ )
199
+
200
+ studio_token_help = "View the token datachain uses to contact Studio" # noqa: S105 # nosec B105
201
+
202
+ studio_subparser.add_parser(
203
+ "token",
204
+ parents=[parent_parser],
205
+ description=studio_token_help,
206
+ help=studio_token_help,
207
+ )
208
+
209
+ studio_ls_dataset_help = "List the available datasets from Studio"
210
+ studio_ls_dataset_description = (
211
+ "This command lists all the datasets available in Studio.\n"
212
+ "It will show the dataset name and the number of versions available."
213
+ )
214
+
215
+ ls_dataset_parser = studio_subparser.add_parser(
216
+ "datasets",
217
+ parents=[parent_parser],
218
+ description=studio_ls_dataset_description,
219
+ help=studio_ls_dataset_help,
220
+ )
221
+ ls_dataset_parser.add_argument(
222
+ "--team",
223
+ action="store",
224
+ default=None,
225
+ help="The team to list datasets for. By default, it will use team from config.",
226
+ )
227
+
228
+
101
229
  def get_parser() -> ArgumentParser: # noqa: PLR0915
102
230
  try:
103
231
  __version__ = version("datachain")
@@ -121,12 +249,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
121
249
  action="store_true",
122
250
  help="AWS anon (aka awscli's --no-sign-request)",
123
251
  )
124
- parent_parser.add_argument(
125
- "--ttl",
126
- type=human_time_type,
127
- default=TTL_HUMAN,
128
- help="Time-to-live of data source cache. Negative equals forever.",
129
- )
130
252
  parent_parser.add_argument(
131
253
  "-u", "--update", action="count", default=0, help="Update cache"
132
254
  )
@@ -226,6 +348,8 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
226
348
  help="Use a different filename for the resulting .edatachain file",
227
349
  )
228
350
 
351
+ add_studio_parser(subp, parent_parser)
352
+
229
353
  parse_pull = subp.add_parser(
230
354
  "pull",
231
355
  parents=[parent_parser],
@@ -638,16 +762,13 @@ def format_ls_entry(entry: str) -> str:
638
762
 
639
763
 
640
764
  def ls_remote(
641
- url: str,
642
- username: str,
643
- token: str,
644
765
  paths: Iterable[str],
645
766
  long: bool = False,
646
767
  ):
647
768
  from datachain.node import long_line_str
648
769
  from datachain.remote.studio import StudioClient
649
770
 
650
- client = StudioClient(url, username, token)
771
+ client = StudioClient()
651
772
  first = True
652
773
  for path, response in client.ls(paths):
653
774
  if not first:
@@ -679,17 +800,14 @@ def ls(
679
800
  **kwargs,
680
801
  ):
681
802
  if config is None:
682
- from .config import get_remote_config, read_config
803
+ from .config import Config
683
804
 
684
- config = get_remote_config(read_config(DataChainDir.find().root), remote=remote)
805
+ config = Config().get_remote_config(remote=remote)
685
806
  remote_type = config["type"]
686
807
  if remote_type == "local":
687
808
  ls_local(sources, long=long, **kwargs)
688
809
  else:
689
810
  ls_remote(
690
- config["url"],
691
- config["username"],
692
- config["token"],
693
811
  sources,
694
812
  long=long,
695
813
  )
@@ -887,7 +1005,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
887
1005
  edatachain_only=False,
888
1006
  no_edatachain_file=True,
889
1007
  no_glob=args.no_glob,
890
- ttl=args.ttl,
891
1008
  )
892
1009
  elif args.command == "clone":
893
1010
  catalog.clone(
@@ -897,7 +1014,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
897
1014
  update=bool(args.update),
898
1015
  recursive=bool(args.recursive),
899
1016
  no_glob=args.no_glob,
900
- ttl=args.ttl,
901
1017
  no_cp=args.no_cp,
902
1018
  edatachain=args.edatachain,
903
1019
  edatachain_file=args.edatachain_file,
@@ -923,7 +1039,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
923
1039
  args.sources,
924
1040
  long=bool(args.long),
925
1041
  remote=args.remote,
926
- ttl=args.ttl,
927
1042
  update=bool(args.update),
928
1043
  client_config=client_config,
929
1044
  )
@@ -957,7 +1072,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
957
1072
  show_bytes=args.bytes,
958
1073
  depth=args.depth,
959
1074
  si=args.si,
960
- ttl=args.ttl,
961
1075
  update=bool(args.update),
962
1076
  client_config=client_config,
963
1077
  )
@@ -965,7 +1079,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
965
1079
  results_found = False
966
1080
  for result in catalog.find(
967
1081
  args.sources,
968
- ttl=args.ttl,
969
1082
  update=bool(args.update),
970
1083
  names=args.name,
971
1084
  inames=args.iname,
@@ -983,7 +1096,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
983
1096
  index(
984
1097
  catalog,
985
1098
  args.sources,
986
- ttl=args.ttl,
987
1099
  update=bool(args.update),
988
1100
  )
989
1101
  elif args.command == "completion":
@@ -1001,6 +1113,8 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1001
1113
  clear_cache(catalog)
1002
1114
  elif args.command == "gc":
1003
1115
  garbage_collect(catalog)
1116
+ elif args.command == "studio":
1117
+ process_studio_cli_args(args)
1004
1118
  else:
1005
1119
  print(f"invalid command: {args.command}", file=sys.stderr)
1006
1120
  return 1
@@ -124,6 +124,9 @@ class Client(ABC):
124
124
  def get_client(source: str, cache: DataChainCache, **kwargs) -> "Client":
125
125
  cls = Client.get_implementation(source)
126
126
  storage_url, _ = cls.split_url(source)
127
+ if os.name == "nt":
128
+ storage_url = storage_url.removeprefix("/")
129
+
127
130
  return cls.from_name(storage_url, cache, kwargs)
128
131
 
129
132
  @classmethod
@@ -171,6 +174,12 @@ class Client(ABC):
171
174
 
172
175
  @classmethod
173
176
  def split_url(cls, url: str) -> tuple[str, str]:
177
+ """
178
+ Splits the URL into two pieces:
179
+ 1. bucket name without protocol (everything up until the first /)
180
+ 2. path which is the rest of URL starting from bucket name
181
+ e.g s3://my-bucket/animals/dogs -> (my-bucket, animals/dogs)
182
+ """
174
183
  fill_path = url[len(cls.PREFIX) :]
175
184
  path_split = fill_path.split("/", 1)
176
185
  bucket = path_split[0]
datachain/client/local.py CHANGED
@@ -29,25 +29,7 @@ class FileClient(Client):
29
29
 
30
30
  @classmethod
31
31
  def get_uri(cls, name) -> StorageURI:
32
- """
33
- This returns root of FS as uri, e.g
34
- Linux & Mac : file:///
35
- Windows: file:///C:/
36
- """
37
- return StorageURI(Path(name).as_uri())
38
-
39
- @staticmethod
40
- def root_dir() -> str:
41
- """
42
- Returns file system root path.
43
- Linux & MacOS: /
44
- Windows: C:/
45
- """
46
- return Path.cwd().anchor.replace(os.sep, posixpath.sep)
47
-
48
- @staticmethod
49
- def root_path() -> Path:
50
- return Path(FileClient.root_dir())
32
+ return StorageURI(f'{cls.PREFIX}/{name.removeprefix("/")}')
51
33
 
52
34
  @classmethod
53
35
  def ls_buckets(cls, **kwargs):
@@ -75,23 +57,20 @@ class FileClient(Client):
75
57
 
76
58
  @classmethod
77
59
  def split_url(cls, url: str) -> tuple[str, str]:
78
- """
79
- Splits url into two components:
80
- 1. root of the FS which will later on become the name of the storage
81
- 2. path which will later on become partial path
82
- Note that URL needs to be have file:/// protocol.
83
- Examples:
84
- file:///tmp/dir -> / + tmp/dir
85
- file:///c:/windows/files -> c:/ + windows/files
86
- """
87
60
  parsed = urlparse(url)
88
61
  if parsed.scheme == "file":
89
62
  scheme, rest = url.split(":", 1)
90
- uri = f"{scheme.lower()}:{rest}"
63
+ url = f"{scheme.lower()}:{rest}"
91
64
  else:
92
- uri = cls.path_to_uri(url)
93
-
94
- return cls.root_dir(), uri.removeprefix(cls.root_path().as_uri())
65
+ url = cls.path_to_uri(url)
66
+
67
+ fill_path = url[len(cls.PREFIX) :]
68
+ path_split = fill_path.rsplit("/", 1)
69
+ bucket = path_split[0]
70
+ if os.name == "nt":
71
+ bucket = bucket.removeprefix("/")
72
+ path = path_split[1] if len(path_split) > 1 else ""
73
+ return bucket, path
95
74
 
96
75
  @classmethod
97
76
  def from_name(cls, name: str, cache, kwargs) -> "FileClient":
datachain/config.py CHANGED
@@ -1,62 +1,137 @@
1
- import os
2
1
  from collections.abc import Mapping
3
- from typing import TYPE_CHECKING, Optional
2
+ from contextlib import contextmanager
3
+ from enum import Enum
4
+ from typing import Optional, Union
4
5
 
5
- from tomlkit import load
6
+ from tomlkit import TOMLDocument, dump, load
6
7
 
7
- if TYPE_CHECKING:
8
- from tomlkit import TOMLDocument
8
+ from datachain.utils import DataChainDir, global_config_dir, system_config_dir
9
9
 
10
10
 
11
- def read_config(datachain_root: str) -> Optional["TOMLDocument"]:
12
- config_path = os.path.join(datachain_root, "config")
13
- try:
14
- with open(config_path, encoding="utf-8") as f:
15
- return load(f)
16
- except FileNotFoundError:
17
- return None
11
+ # Define an enum with value system, global and local
12
+ class ConfigLevel(Enum):
13
+ SYSTEM = "system"
14
+ GLOBAL = "global"
15
+ LOCAL = "local"
18
16
 
19
17
 
20
- def get_remote_config(
21
- config: Optional["TOMLDocument"], remote: str = ""
22
- ) -> Mapping[str, str]:
23
- if config is None:
24
- return {"type": "local"}
25
- if not remote:
18
+ class Config:
19
+ SYSTEM_LEVELS = (ConfigLevel.SYSTEM, ConfigLevel.GLOBAL)
20
+ LOCAL_LEVELS = (ConfigLevel.LOCAL,)
21
+
22
+ # In the order of precedence
23
+ LEVELS = SYSTEM_LEVELS + LOCAL_LEVELS
24
+
25
+ def __init__(
26
+ self,
27
+ level: Optional[ConfigLevel] = None,
28
+ ):
29
+ self.level = level
30
+
31
+ self.init()
32
+
33
+ @classmethod
34
+ def get_dir(cls, level: Optional[ConfigLevel]) -> str:
35
+ if level == ConfigLevel.SYSTEM:
36
+ return system_config_dir()
37
+ if level == ConfigLevel.GLOBAL:
38
+ return global_config_dir()
39
+
40
+ return str(DataChainDir.find().root)
41
+
42
+ def init(self):
43
+ d = DataChainDir(self.get_dir(self.level))
44
+ d.init()
45
+
46
+ def load_one(self, level: Optional[ConfigLevel] = None) -> TOMLDocument:
47
+ config_path = DataChainDir(self.get_dir(level)).config
48
+
26
49
  try:
27
- remote = config["core"]["default-remote"] # type: ignore[index,assignment]
28
- except KeyError:
50
+ with open(config_path, encoding="utf-8") as f:
51
+ return load(f)
52
+ except FileNotFoundError:
53
+ return TOMLDocument()
54
+
55
+ def load_config_to_level(self) -> TOMLDocument:
56
+ merged_conf = TOMLDocument()
57
+
58
+ for merge_level in self.LEVELS:
59
+ if merge_level == self.level:
60
+ break
61
+ config = self.load_one(merge_level)
62
+ if config:
63
+ merge(merged_conf, config)
64
+
65
+ return merged_conf
66
+
67
+ def read(self) -> TOMLDocument:
68
+ if self.level is None:
69
+ return self.load_config_to_level()
70
+ return self.load_one(self.level)
71
+
72
+ @contextmanager
73
+ def edit(self):
74
+ config = self.load_one(self.level)
75
+ yield config
76
+
77
+ self.write(config)
78
+
79
+ def config_file(self):
80
+ return DataChainDir(self.get_dir(self.level)).config
81
+
82
+ def write(self, config: TOMLDocument):
83
+ with open(self.config_file(), "w") as f:
84
+ dump(config, f)
85
+
86
+ def get_remote_config(self, remote: str = "") -> Mapping[str, str]:
87
+ config = self.read()
88
+
89
+ if not config:
29
90
  return {"type": "local"}
30
- try:
31
- remote_conf: Mapping[str, str] = config["remote"][remote] # type: ignore[assignment,index]
32
- except KeyError:
33
- raise Exception(
34
- f"missing config section for default remote: remote.{remote}"
35
- ) from None
36
- except Exception as exc:
37
- raise Exception("invalid config") from exc
38
-
39
- if not isinstance(remote_conf, Mapping):
40
- raise TypeError(f"config section remote.{remote} must be a mapping")
41
-
42
- remote_type = remote_conf.get("type")
43
- if remote_type not in ("local", "http"):
44
- raise Exception(
45
- f'config section remote.{remote} must have "type" with one of: '
46
- '"local", "http"'
47
- )
48
-
49
- if remote_type == "http":
50
- for key in ["url", "username", "token"]:
91
+ if not remote:
51
92
  try:
52
- remote_conf[key]
93
+ remote = config["core"]["default-remote"] # type: ignore[index,assignment]
53
94
  except KeyError:
54
- raise Exception(
55
- f"config section remote.{remote} of type {remote_type} "
56
- f"must contain key {key}"
57
- ) from None
58
- elif remote_type != "local":
59
- raise Exception(
60
- f"config section remote.{remote} has invalid remote type {remote_type}"
61
- )
62
- return remote_conf
95
+ return {"type": "local"}
96
+ try:
97
+ remote_conf: Mapping[str, str] = config["remote"][remote] # type: ignore[assignment,index]
98
+ except KeyError:
99
+ raise Exception(
100
+ f"missing config section for default remote: remote.{remote}"
101
+ ) from None
102
+ except Exception as exc:
103
+ raise Exception("invalid config") from exc
104
+
105
+ if not isinstance(remote_conf, Mapping):
106
+ raise TypeError(f"config section remote.{remote} must be a mapping")
107
+
108
+ remote_type = remote_conf.get("type")
109
+ if remote_type not in ("local", "http"):
110
+ raise Exception(
111
+ f'config section remote.{remote} must have "type" with one of: '
112
+ '"local", "http"'
113
+ )
114
+
115
+ if remote_type == "http":
116
+ for key in ["url", "username", "token"]:
117
+ try:
118
+ remote_conf[key]
119
+ except KeyError:
120
+ raise Exception(
121
+ f"config section remote.{remote} of type {remote_type} "
122
+ f"must contain key {key}"
123
+ ) from None
124
+ elif remote_type != "local":
125
+ raise Exception(
126
+ f"config section remote.{remote} has invalid remote type {remote_type}"
127
+ )
128
+ return remote_conf
129
+
130
+
131
+ def merge(into: Union[TOMLDocument, dict], update: Union[TOMLDocument, dict]):
132
+ """Merges second dict into first recursively"""
133
+ for key, val in update.items():
134
+ if isinstance(into.get(key), dict) and isinstance(val, dict):
135
+ merge(into[key], val) # type: ignore[arg-type]
136
+ else:
137
+ into[key] = val
@@ -26,6 +26,13 @@ if TYPE_CHECKING:
26
26
  from sqlalchemy.sql.elements import ColumnElement
27
27
 
28
28
 
29
+ DEFAULT_DELIMITER = "__"
30
+
31
+
32
+ def col_name(name: str, object_name: str = "file") -> str:
33
+ return f"{object_name}{DEFAULT_DELIMITER}{name}"
34
+
35
+
29
36
  def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
30
37
  """
31
38
  Removes duplicate columns from a list of columns.
@@ -76,64 +83,81 @@ def convert_rows_custom_column_types(
76
83
 
77
84
 
78
85
  class DirExpansion:
79
- @staticmethod
80
- def base_select(q):
86
+ def __init__(self, object_name: str):
87
+ self.object_name = object_name
88
+
89
+ def col_name(self, name: str, object_name: Optional[str] = None) -> str:
90
+ object_name = object_name or self.object_name
91
+ return col_name(name, object_name)
92
+
93
+ def c(self, query, name: str, object_name: Optional[str] = None) -> str:
94
+ return getattr(query.c, self.col_name(name, object_name=object_name))
95
+
96
+ def base_select(self, q):
81
97
  return sa.select(
82
- q.c.sys__id,
83
- false().label("is_dir"),
84
- q.c.source,
85
- q.c.path,
86
- q.c.version,
87
- q.c.location,
98
+ self.c(q, "id", object_name="sys"),
99
+ false().label(self.col_name("is_dir")),
100
+ self.c(q, "source"),
101
+ self.c(q, "path"),
102
+ self.c(q, "version"),
103
+ self.c(q, "location"),
88
104
  )
89
105
 
90
- @staticmethod
91
- def apply_group_by(q):
106
+ def apply_group_by(self, q):
92
107
  return (
93
108
  sa.select(
94
109
  f.min(q.c.sys__id).label("sys__id"),
95
- q.c.is_dir,
96
- q.c.source,
97
- q.c.path,
98
- q.c.version,
99
- f.max(q.c.location).label("location"),
110
+ self.c(q, "is_dir"),
111
+ self.c(q, "source"),
112
+ self.c(q, "path"),
113
+ self.c(q, "version"),
114
+ f.max(self.c(q, "location")).label(self.col_name("location")),
100
115
  )
101
116
  .select_from(q)
102
- .group_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
103
- .order_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
117
+ .group_by(
118
+ self.c(q, "source"),
119
+ self.c(q, "path"),
120
+ self.c(q, "is_dir"),
121
+ self.c(q, "version"),
122
+ )
123
+ .order_by(
124
+ self.c(q, "source"),
125
+ self.c(q, "path"),
126
+ self.c(q, "is_dir"),
127
+ self.c(q, "version"),
128
+ )
104
129
  )
105
130
 
106
- @classmethod
107
- def query(cls, q):
108
- q = cls.base_select(q).cte(recursive=True)
109
- parent = path.parent(q.c.path)
131
+ def query(self, q):
132
+ q = self.base_select(q).cte(recursive=True)
133
+ parent = path.parent(self.c(q, "path"))
110
134
  q = q.union_all(
111
135
  sa.select(
112
136
  sa.literal(-1).label("sys__id"),
113
- true().label("is_dir"),
114
- q.c.source,
115
- parent.label("path"),
116
- sa.literal("").label("version"),
117
- null().label("location"),
137
+ true().label(self.col_name("is_dir")),
138
+ self.c(q, "source"),
139
+ parent.label(self.col_name("path")),
140
+ sa.literal("").label(self.col_name("version")),
141
+ null().label(self.col_name("location")),
118
142
  ).where(parent != "")
119
143
  )
120
- return cls.apply_group_by(q)
144
+ return self.apply_group_by(q)
121
145
 
122
146
 
123
147
  class DataTable:
124
- dataset_dir_expansion = staticmethod(DirExpansion.query)
125
-
126
148
  def __init__(
127
149
  self,
128
150
  name: str,
129
151
  engine: "Engine",
130
152
  metadata: Optional["sa.MetaData"] = None,
131
153
  column_types: Optional[dict[str, SQLType]] = None,
154
+ object_name: str = "file",
132
155
  ):
133
156
  self.name: str = name
134
157
  self.engine = engine
135
158
  self.metadata: sa.MetaData = metadata if metadata is not None else sa.MetaData()
136
159
  self.column_types: dict[str, SQLType] = column_types or {}
160
+ self.object_name = object_name
137
161
 
138
162
  @staticmethod
139
163
  def copy_column(
@@ -204,9 +228,18 @@ class DataTable:
204
228
  def columns(self) -> "ReadOnlyColumnCollection[str, sa.Column[Any]]":
205
229
  return self.table.columns
206
230
 
207
- @property
208
- def c(self):
209
- return self.columns
231
+ def col_name(self, name: str, object_name: Optional[str] = None) -> str:
232
+ object_name = object_name or self.object_name
233
+ return col_name(name, object_name)
234
+
235
+ def without_object(
236
+ self, column_name: str, object_name: Optional[str] = None
237
+ ) -> str:
238
+ object_name = object_name or self.object_name
239
+ return column_name.removeprefix(f"{object_name}{DEFAULT_DELIMITER}")
240
+
241
+ def c(self, name: str, object_name: Optional[str] = None):
242
+ return getattr(self.columns, self.col_name(name, object_name=object_name))
210
243
 
211
244
  @property
212
245
  def table(self) -> "sa.Table":
@@ -246,7 +279,7 @@ class DataTable:
246
279
  ]
247
280
 
248
281
  def dir_expansion(self):
249
- return self.dataset_dir_expansion(self)
282
+ return DirExpansion(self.object_name)
250
283
 
251
284
 
252
285
  PARTITION_COLUMN_ID = "partition_id"