sibi-dst 2025.1.13__py3-none-any.whl → 2025.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/__init__.py +7 -1
- sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
- sibi_dst/df_helper/_df_helper.py +417 -117
- sibi_dst/df_helper/_parquet_artifact.py +255 -283
- sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
- sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
- sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
- sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
- sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
- sibi_dst/osmnx_helper/route_path_builder.py +45 -46
- sibi_dst/utils/base.py +302 -96
- sibi_dst/utils/clickhouse_writer.py +472 -206
- sibi_dst/utils/data_utils.py +139 -186
- sibi_dst/utils/data_wrapper.py +317 -73
- sibi_dst/utils/date_utils.py +1 -0
- sibi_dst/utils/df_utils.py +193 -213
- sibi_dst/utils/file_utils.py +3 -2
- sibi_dst/utils/filepath_generator.py +314 -152
- sibi_dst/utils/log_utils.py +581 -242
- sibi_dst/utils/manifest_manager.py +60 -76
- sibi_dst/utils/parquet_saver.py +33 -27
- sibi_dst/utils/phone_formatter.py +88 -95
- sibi_dst/utils/update_planner.py +180 -178
- sibi_dst/utils/webdav_client.py +116 -166
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +29 -27
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
@@ -1,187 +1,349 @@
|
|
1
|
-
import datetime
|
1
|
+
import datetime as dt
|
2
2
|
import re
|
3
|
+
from pathlib import PurePosixPath
|
4
|
+
from typing import Iterable, List, Optional
|
3
5
|
|
4
6
|
import fsspec
|
7
|
+
from fsspec.utils import infer_storage_options
|
5
8
|
|
6
9
|
from .log_utils import Logger
|
7
10
|
|
8
11
|
|
9
12
|
class FilePathGenerator:
|
10
13
|
"""
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
Now supports generating appropriate paths for both pandas and Dask.
|
14
|
+
Scans date-partitioned directories base/YYYY/MM/DD and returns paths for pandas or Dask.
|
15
|
+
Works with any fsspec filesystem.
|
15
16
|
"""
|
16
17
|
|
17
|
-
def __init__(
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
- exclude_patterns (list): List of regex patterns to exclude from file paths.
|
29
|
-
- file_extension (str): File extension to look for (default: 'parquet').
|
30
|
-
"""
|
31
|
-
self.base_path = base_path.rstrip('/')
|
32
|
-
self.fs = fs # Filesystem object
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
base_path: str = "",
|
21
|
+
*,
|
22
|
+
fs=None,
|
23
|
+
logger: Optional[Logger] = None,
|
24
|
+
debug: bool = False,
|
25
|
+
storage_options: Optional[dict] = None,
|
26
|
+
exclude_patterns: Optional[Iterable[str]] = None,
|
27
|
+
file_extension: str = "parquet",
|
28
|
+
):
|
33
29
|
self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
|
34
|
-
self.debug =
|
35
|
-
self.storage_options =
|
36
|
-
self.
|
37
|
-
self.
|
30
|
+
self.debug = debug
|
31
|
+
self.storage_options = storage_options or {}
|
32
|
+
self.file_extension = file_extension.lstrip(".")
|
33
|
+
self._compiled_exclusions = [re.compile(p) for p in (exclude_patterns or [])]
|
34
|
+
|
35
|
+
# Normalize base path & derive protocol + root path
|
36
|
+
opts = infer_storage_options(base_path or ".")
|
37
|
+
proto = opts.get("protocol") or "file"
|
38
|
+
root = opts.get("path") or "" # protocol-stripped
|
39
|
+
|
40
|
+
# If no fs given, make one from base_path
|
41
|
+
if fs is None:
|
42
|
+
self.fs, resolved_root = fsspec.core.url_to_fs(base_path or ".", **self.storage_options)
|
43
|
+
# Prefer resolved_root (already stripped and normalized by fsspec)
|
44
|
+
root = resolved_root or root
|
45
|
+
else:
|
46
|
+
self.fs = fs
|
38
47
|
|
39
|
-
|
40
|
-
|
41
|
-
self.fs, _ = fsspec.core.url_to_fs(self.base_path, **self.storage_options)
|
48
|
+
self._protocol = proto if isinstance(proto, str) else (proto[0] if proto else "file")
|
49
|
+
self._root = self._ensure_no_trailing_slash(self._to_posix(root))
|
42
50
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
51
|
+
if self.debug:
|
52
|
+
self.logger.debug(
|
53
|
+
f"FilePathGenerator init: protocol={self._protocol!r}, root={self._root!r}, fs={type(self.fs).__name__}"
|
54
|
+
)
|
47
55
|
|
48
|
-
|
49
|
-
start_date (str or datetime): Start date in 'YYYY-MM-DD' format or datetime object.
|
50
|
-
end_date (str or datetime): End date in 'YYYY-MM-DD' format or datetime object.
|
51
|
-
engine (str): 'pandas' or 'dask' to specify which library the paths are intended for.
|
56
|
+
# ------------------------- public API -------------------------
|
52
57
|
|
53
|
-
|
54
|
-
list: List of file paths.
|
58
|
+
def generate_file_paths(self, start_date, end_date, engine: str = "dask") -> List[str]:
|
55
59
|
"""
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
60
|
+
Return a list of file (engine='dask') or dataset directory (engine='pandas') paths
|
61
|
+
for all dates in [start_date, end_date].
|
62
|
+
"""
|
63
|
+
sd = self._to_date(start_date)
|
64
|
+
ed = self._to_date(end_date)
|
65
|
+
if sd > ed:
|
66
|
+
sd, ed = ed, sd # be forgiving on reversed dates
|
67
|
+
|
68
|
+
paths: List[str] = []
|
69
|
+
current = sd
|
70
|
+
while current <= ed:
|
71
|
+
y, m, d = current.year, current.month, current.day
|
72
|
+
paths.extend(self._collect_paths_for_day(y, m, d, engine))
|
73
|
+
current += dt.timedelta(days=1)
|
68
74
|
|
75
|
+
if self.debug:
|
76
|
+
self.logger.debug(f"Generated {len(paths)} path(s) for {sd}..{ed} (engine={engine})")
|
69
77
|
return paths
|
70
78
|
|
71
|
-
|
72
|
-
"""
|
73
|
-
Collect appropriate paths for a given date, depending on the engine.
|
79
|
+
# ------------------------- internals -------------------------
|
74
80
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
day (int): Day component of the date.
|
79
|
-
engine (str): 'pandas' or 'dask'.
|
81
|
+
def _collect_paths_for_day(self, year: int, month: int, day: int, engine: str) -> List[str]:
|
82
|
+
# IMPORTANT: use protocol-stripped paths with fs methods
|
83
|
+
day_dir = self._join(self._root, f"{year:04d}", f"{month:02d}", f"{day:02d}")
|
80
84
|
|
81
|
-
|
82
|
-
list: List of file or directory paths.
|
83
|
-
"""
|
84
|
-
base_dir = f"{self.base_path}/{year}/{str(month).zfill(2)}/{str(day).zfill(2)}"
|
85
|
-
|
86
|
-
if not self.fs.exists(base_dir):
|
85
|
+
if not self.fs.exists(day_dir):
|
87
86
|
if self.debug:
|
88
|
-
self.logger.debug(f"Directory does not exist: {
|
87
|
+
self.logger.debug(f"Directory does not exist: {day_dir}")
|
89
88
|
return []
|
90
89
|
|
91
|
-
if engine ==
|
92
|
-
#
|
93
|
-
|
94
|
-
all_paths = self.fs.glob(
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
90
|
+
if engine == "dask":
|
91
|
+
# Try recursive glob first
|
92
|
+
pattern = self._join(day_dir, "**", f"*.{self.file_extension}")
|
93
|
+
all_paths = self.fs.glob(pattern) or []
|
94
|
+
|
95
|
+
# Some filesystems don’t support recursive glob well; fallback to find()
|
96
|
+
if not all_paths:
|
97
|
+
try:
|
98
|
+
found = self.fs.find(day_dir) # recursive listing
|
99
|
+
except Exception:
|
100
|
+
found = []
|
101
|
+
all_paths = [p for p in found if p.endswith(f".{self.file_extension}")]
|
102
|
+
|
103
|
+
# Filter out dirs & excluded patterns
|
104
|
+
file_paths = [
|
105
|
+
p for p in all_paths
|
106
|
+
if not self._is_dir(p) and not self._is_excluded(p)
|
107
|
+
]
|
108
|
+
|
109
|
+
elif engine == "pandas":
|
110
|
+
# For pandas, return the dataset directory for the day (if not excluded)
|
111
|
+
file_paths = [day_dir] if self._is_dir(day_dir) and not self._is_excluded(day_dir) else []
|
113
112
|
else:
|
114
|
-
raise ValueError("
|
113
|
+
raise ValueError("engine must be 'pandas' or 'dask'.")
|
115
114
|
|
116
|
-
|
115
|
+
# Reattach protocol ONLY for returned paths
|
116
|
+
return [self._with_protocol(p) for p in file_paths]
|
117
117
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
118
|
+
def _is_dir(self, path: str) -> bool:
|
119
|
+
try:
|
120
|
+
return bool(self.fs.isdir(path))
|
121
|
+
except Exception:
|
122
|
+
# Robust fallback via info()
|
123
|
+
try:
|
124
|
+
return (self.fs.info(path).get("type") == "directory")
|
125
|
+
except Exception:
|
126
|
+
return False
|
123
127
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
return file_paths
|
128
|
-
|
129
|
-
def _exclude_unwanted_paths(self, paths):
|
130
|
-
"""
|
131
|
-
Exclude paths that match any of the exclusion patterns.
|
132
|
-
"""
|
133
|
-
# Combine default patterns with user-provided patterns
|
134
|
-
exclude_patterns = self.exclude_patterns
|
128
|
+
def _is_excluded(self, path: str) -> bool:
|
129
|
+
return any(pat.search(path) for pat in self._compiled_exclusions)
|
135
130
|
|
136
|
-
|
137
|
-
compiled_patterns = [re.compile(pattern) for pattern in exclude_patterns]
|
138
|
-
|
139
|
-
# Filter out paths matching any of the exclude patterns
|
140
|
-
filtered_paths = [
|
141
|
-
path for path in paths
|
142
|
-
if not any(pattern.match(path) for pattern in compiled_patterns)
|
143
|
-
]
|
144
|
-
|
145
|
-
return filtered_paths
|
131
|
+
# ------------------------- helpers -------------------------
|
146
132
|
|
147
133
|
@staticmethod
|
148
|
-
def
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
"""
|
156
|
-
Usage:
|
157
|
-
# Initialize the generator
|
158
|
-
generator = FilePathGenerator(
|
159
|
-
base_path='/Users/lvalverdeb/TeamDev/sibi-dst/IbisDataWH/logistics_storage/products/tracking',
|
160
|
-
debug=True
|
161
|
-
)
|
134
|
+
def _to_date(x) -> dt.date:
|
135
|
+
if isinstance(x, dt.datetime):
|
136
|
+
return x.date()
|
137
|
+
if isinstance(x, dt.date):
|
138
|
+
return x
|
139
|
+
return dt.datetime.strptime(str(x), "%Y-%m-%d").date()
|
162
140
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
# Read data with Dask
|
167
|
-
import dask.dataframe as dd
|
168
|
-
|
169
|
-
df = dd.read_parquet(dataset_paths)
|
170
|
-
|
171
|
-
# Now you can use df as a Dask DataFrame
|
172
|
-
print(df.head())
|
173
|
-
|
174
|
-
# Generate file paths for pandas
|
175
|
-
file_paths = generator.generate_file_paths('2024-01-01', '2024-01-05', engine='pandas')
|
176
|
-
|
177
|
-
# Read data with pandas
|
178
|
-
import pandas as pd
|
141
|
+
@staticmethod
|
142
|
+
def _to_posix(path: str) -> str:
|
143
|
+
return PurePosixPath(path).as_posix()
|
179
144
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
dataframes.append(df)
|
145
|
+
@staticmethod
|
146
|
+
def _ensure_no_trailing_slash(path: str) -> str:
|
147
|
+
return path[:-1] if path.endswith("/") else path
|
184
148
|
|
185
|
-
|
186
|
-
|
187
|
-
|
149
|
+
@staticmethod
|
150
|
+
def _join(*parts: str) -> str:
|
151
|
+
p = PurePosixPath(parts[0])
|
152
|
+
for part in parts[1:]:
|
153
|
+
p = p / part
|
154
|
+
return p.as_posix()
|
155
|
+
|
156
|
+
def _with_protocol(self, path: str) -> str:
|
157
|
+
# If path already has a scheme, leave it
|
158
|
+
if "://" in path:
|
159
|
+
return path
|
160
|
+
# For local file, return absolute-like path without scheme or keep 'file://'? Keep scheme for consistency.
|
161
|
+
return f"{self._protocol}://{path}"
|
162
|
+
|
163
|
+
# import datetime
|
164
|
+
# import re
|
165
|
+
#
|
166
|
+
# import fsspec
|
167
|
+
#
|
168
|
+
# from .log_utils import Logger
|
169
|
+
#
|
170
|
+
#
|
171
|
+
# class FilePathGenerator:
|
172
|
+
# """
|
173
|
+
# Dynamically generates file paths by scanning directories starting from the base path
|
174
|
+
# and determining the innermost directory structure.
|
175
|
+
#
|
176
|
+
# Now supports generating appropriate paths for both pandas and Dask.
|
177
|
+
# """
|
178
|
+
#
|
179
|
+
# def __init__(self, base_path='', fs=None, logger=None, **kwargs):
|
180
|
+
# """
|
181
|
+
# Initialize the FilePathGenerator.
|
182
|
+
#
|
183
|
+
# Parameters:
|
184
|
+
# base_path (str): Base directory path where data files are stored.
|
185
|
+
# fs (fsspec.AbstractFileSystem, optional): Filesystem object to use for file operations.
|
186
|
+
# logger (Logger, optional): Logger instance for logging information.
|
187
|
+
# **kwargs: Additional keyword arguments.
|
188
|
+
# - debug (bool): If True, enables debug logging.
|
189
|
+
# - storage_options (dict): Options for the filesystem (e.g., credentials, tokens).
|
190
|
+
# - exclude_patterns (list): List of regex patterns to exclude from file paths.
|
191
|
+
# - file_extension (str): File extension to look for (default: 'parquet').
|
192
|
+
# """
|
193
|
+
# self.base_path = base_path.rstrip('/')
|
194
|
+
# self.fs = fs # Filesystem object
|
195
|
+
# self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
|
196
|
+
# self.debug = kwargs.get('debug', False)
|
197
|
+
# self.storage_options = kwargs.get('storage_options', {})
|
198
|
+
# self.exclude_patterns = kwargs.get('exclude_patterns', [])
|
199
|
+
# self.file_extension = kwargs.get('file_extension', 'parquet').lstrip('.')
|
200
|
+
#
|
201
|
+
# # If fs is not provided, initialize it based on base_path and storage_options
|
202
|
+
# if self.fs is None:
|
203
|
+
# self.fs, _ = fsspec.core.url_to_fs(self.base_path, **self.storage_options)
|
204
|
+
#
|
205
|
+
# def generate_file_paths(self, start_date, end_date, engine='dask'):
|
206
|
+
# """
|
207
|
+
# Generate paths dynamically for files within the date range by scanning directories.
|
208
|
+
# Returns a list of file paths compatible with the specified engine.
|
209
|
+
#
|
210
|
+
# Parameters:
|
211
|
+
# start_date (str or datetime): Start date in 'YYYY-MM-DD' format or datetime object.
|
212
|
+
# end_date (str or datetime): End date in 'YYYY-MM-DD' format or datetime object.
|
213
|
+
# engine (str): 'pandas' or 'dask' to specify which library the paths are intended for.
|
214
|
+
#
|
215
|
+
# Returns:
|
216
|
+
# list: List of file paths.
|
217
|
+
# """
|
218
|
+
# start_date = self._convert_to_datetime(start_date)
|
219
|
+
# end_date = self._convert_to_datetime(end_date)
|
220
|
+
#
|
221
|
+
# paths = []
|
222
|
+
# curr_date = start_date
|
223
|
+
#
|
224
|
+
# while curr_date <= end_date:
|
225
|
+
# year, month, day = curr_date.year, curr_date.month, curr_date.day
|
226
|
+
# day_paths = self._collect_paths(year, month, day, engine)
|
227
|
+
# if day_paths:
|
228
|
+
# paths.extend(day_paths)
|
229
|
+
# curr_date += datetime.timedelta(days=1)
|
230
|
+
#
|
231
|
+
# return paths
|
232
|
+
#
|
233
|
+
# def _collect_paths(self, year, month, day, engine):
|
234
|
+
# """
|
235
|
+
# Collect appropriate paths for a given date, depending on the engine.
|
236
|
+
#
|
237
|
+
# Parameters:
|
238
|
+
# year (int): Year component of the date.
|
239
|
+
# month (int): Month component of the date.
|
240
|
+
# day (int): Day component of the date.
|
241
|
+
# engine (str): 'pandas' or 'dask'.
|
242
|
+
#
|
243
|
+
# Returns:
|
244
|
+
# list: List of file or directory paths.
|
245
|
+
# """
|
246
|
+
# base_dir = f"{self.base_path}/{year}/{str(month).zfill(2)}/{str(day).zfill(2)}"
|
247
|
+
#
|
248
|
+
# if not self.fs.exists(base_dir):
|
249
|
+
# if self.debug:
|
250
|
+
# self.logger.debug(f"Directory does not exist: {base_dir}")
|
251
|
+
# return []
|
252
|
+
#
|
253
|
+
# if engine == 'dask':
|
254
|
+
# # Collect individual file paths
|
255
|
+
# file_pattern = f"{base_dir}/**/*.{self.file_extension}"
|
256
|
+
# all_paths = self.fs.glob(file_pattern)
|
257
|
+
#
|
258
|
+
# if not all_paths and self.debug:
|
259
|
+
# self.logger.debug(f"No files found with pattern: {file_pattern}")
|
260
|
+
#
|
261
|
+
# # Exclude unwanted files and directories
|
262
|
+
# filtered_paths = self._exclude_unwanted_paths(all_paths)
|
263
|
+
#
|
264
|
+
# # Filter out directories
|
265
|
+
# file_paths = [path for path in filtered_paths if not self.fs.isdir(path)]
|
266
|
+
#
|
267
|
+
# elif engine == 'pandas':
|
268
|
+
# # Collect dataset directories
|
269
|
+
# # Assume that the base_dir is a Parquet dataset
|
270
|
+
# if self.fs.isdir(base_dir):
|
271
|
+
# file_paths = [base_dir]
|
272
|
+
# else:
|
273
|
+
# file_paths = []
|
274
|
+
#
|
275
|
+
# else:
|
276
|
+
# raise ValueError("Engine must be 'pandas' or 'dask'.")
|
277
|
+
#
|
278
|
+
# protocol = self.fs.protocol if isinstance(self.fs.protocol, str) else self.fs.protocol[0]
|
279
|
+
#
|
280
|
+
# # Ensure the protocol is included in the paths
|
281
|
+
# file_paths = [
|
282
|
+
# f"{protocol}://{path}" if not path.startswith(f"{protocol}://") else path
|
283
|
+
# for path in file_paths
|
284
|
+
# ]
|
285
|
+
#
|
286
|
+
# if self.debug:
|
287
|
+
# self.logger.debug(f"Collected {len(file_paths)} paths from {base_dir} for engine '{engine}'")
|
288
|
+
#
|
289
|
+
# return file_paths
|
290
|
+
#
|
291
|
+
# def _exclude_unwanted_paths(self, paths):
|
292
|
+
# """
|
293
|
+
# Exclude paths that match any of the exclusion patterns.
|
294
|
+
# """
|
295
|
+
# # Combine default patterns with user-provided patterns
|
296
|
+
# exclude_patterns = self.exclude_patterns
|
297
|
+
#
|
298
|
+
# # Compile regex patterns for efficiency
|
299
|
+
# compiled_patterns = [re.compile(pattern) for pattern in exclude_patterns]
|
300
|
+
#
|
301
|
+
# # Filter out paths matching any of the exclude patterns
|
302
|
+
# filtered_paths = [
|
303
|
+
# path for path in paths
|
304
|
+
# if not any(pattern.match(path) for pattern in compiled_patterns)
|
305
|
+
# ]
|
306
|
+
#
|
307
|
+
# return filtered_paths
|
308
|
+
#
|
309
|
+
# @staticmethod
|
310
|
+
# def _convert_to_datetime(date):
|
311
|
+
# """Convert a date string or datetime object into a datetime object."""
|
312
|
+
# if isinstance(date, str):
|
313
|
+
# return datetime.datetime.strptime(date, '%Y-%m-%d')
|
314
|
+
# return date
|
315
|
+
#
|
316
|
+
#
|
317
|
+
# """
|
318
|
+
# Usage:
|
319
|
+
# # Initialize the generator
|
320
|
+
# generator = FilePathGenerator(
|
321
|
+
# base_path='/Users/lvalverdeb/TeamDev/sibi-dst/IbisDataWH/logistics_storage/products/tracking',
|
322
|
+
# debug=True
|
323
|
+
# )
|
324
|
+
#
|
325
|
+
# # Generate dataset paths for Dask
|
326
|
+
# dataset_paths = generator.generate_file_paths('2024-01-01', '2024-01-05', engine='dask')
|
327
|
+
#
|
328
|
+
# # Read data with Dask
|
329
|
+
# import dask.dataframe as dd
|
330
|
+
#
|
331
|
+
# df = dd.read_parquet(dataset_paths)
|
332
|
+
#
|
333
|
+
# # Now you can use df as a Dask DataFrame
|
334
|
+
# print(df.head())
|
335
|
+
#
|
336
|
+
# # Generate file paths for pandas
|
337
|
+
# file_paths = generator.generate_file_paths('2024-01-01', '2024-01-05', engine='pandas')
|
338
|
+
#
|
339
|
+
# # Read data with pandas
|
340
|
+
# import pandas as pd
|
341
|
+
#
|
342
|
+
# dataframes = []
|
343
|
+
# for fp in file_paths:
|
344
|
+
# df = pd.read_parquet(fp)
|
345
|
+
# dataframes.append(df)
|
346
|
+
#
|
347
|
+
# df_pandas = pd.concat(dataframes, ignore_index=True)
|
348
|
+
# print(df_pandas.head())
|
349
|
+
# """
|