datasourcelib 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,6 @@ class SyncBase(ABC):
14
14
  raise NotImplementedError
15
15
 
16
16
  @abstractmethod
17
- def sync(self, **kwargs) -> bool:
17
+ def sync(self, **kwargs) -> Dict[str, Any]:
18
18
  """Execute sync operation. Returns True on success, False otherwise."""
19
19
  raise NotImplementedError
@@ -10,6 +10,7 @@ from ..datasources.sql_source import SQLDataSource
10
10
  from ..datasources.azure_devops_source import AzureDevOpsSource
11
11
  from ..datasources.sharepoint_source import SharePointSource
12
12
  from ..datasources.blob_source import BlobStorageSource
13
+ from ..datasources.dataverse_source import DataverseSource
13
14
 
14
15
  # concrete strategies
15
16
  from datasourcelib.strategies.full_load import FullLoadStrategy
@@ -35,11 +36,12 @@ class SyncManager:
35
36
  DataSourceType.SQL: SQLDataSource,
36
37
  DataSourceType.AZURE_DEVOPS: AzureDevOpsSource,
37
38
  DataSourceType.SHAREPOINT: SharePointSource,
38
- DataSourceType.BLOB_STORAGE: BlobStorageSource
39
+ DataSourceType.BLOB_STORAGE: BlobStorageSource,
40
+ DataSourceType.Dataverse: DataverseSource
39
41
  }
40
42
 
41
- def execute_sync(self, sync_type: SyncType,
42
- source_type: DataSourceType,
43
+ def execute_sync(self, sync_type: str,
44
+ source_type: str,
43
45
  source_config: Dict[str, Any],
44
46
  vector_db_config: Dict[str, Any],
45
47
  **kwargs) -> Dict[str, Any]:
@@ -47,6 +49,33 @@ class SyncManager:
47
49
  logger.info(f"Execute {sync_type} sync using {source_type} source")
48
50
 
49
51
  try:
52
+ # validate and convert sync_type and source_type to their Enum members
53
+ def _to_enum(enum_cls, val, label):
54
+ if isinstance(val, enum_cls):
55
+ return val
56
+ s = str(val)
57
+ # case-insensitive name match
58
+ for member in enum_cls:
59
+ if member.name.lower() == s.lower():
60
+ return member
61
+ # try by value
62
+ try:
63
+ return enum_cls(val)
64
+ except Exception:
65
+ names = ", ".join([m.name for m in enum_cls])
66
+ values = ", ".join([str(m.value) for m in enum_cls])
67
+ raise ValueError(f"Invalid {label}. Permitted names: {names}. Permitted values: {values}")
68
+
69
+ try:
70
+ sync_type = _to_enum(SyncType, sync_type, "sync_type")
71
+ source_type = _to_enum(DataSourceType, source_type, "source_type")
72
+ except ValueError as ex:
73
+ logger.error(str(ex))
74
+ return {
75
+ "status": SyncStatus.FAILED,
76
+ "message": str(ex),
77
+ "started_at": start
78
+ }
50
79
  # Get data source class
51
80
  source_cls = self._datasource_map.get(source_type)
52
81
  if not source_cls:
@@ -76,15 +105,7 @@ class SyncManager:
76
105
  }
77
106
 
78
107
  # Execute sync
79
- success = strategy.sync(**kwargs)
80
- status = SyncStatus.SUCCESS if success else SyncStatus.FAILED
81
-
82
- return {
83
- "status": status,
84
- "message": f"{sync_type} completed" if success else f"{sync_type} failed",
85
- "started_at": start,
86
- "finished_at": datetime.utcnow()
87
- }
108
+ return strategy.sync(**kwargs)
88
109
 
89
110
  except Exception as ex:
90
111
  logger.exception("SyncManager.execute_sync failed")
@@ -4,4 +4,5 @@ class DataSourceType(str, Enum):
4
4
  SQL = "sql"
5
5
  AZURE_DEVOPS = "azure_devops"
6
6
  SHAREPOINT = "sharepoint"
7
- BLOB_STORAGE = "blob_storage"
7
+ BLOB_STORAGE = "blob_storage"
8
+ Dataverse = "Dataverse"
@@ -0,0 +1,305 @@
1
+ from typing import Any, Dict, List, Optional, Tuple
2
+ from datasourcelib.datasources.datasource_base import DataSourceBase
3
+ from datasourcelib.utils.logger import get_logger
4
+ from datasourcelib.utils.validators import require_keys
5
+ from datasourcelib.utils.aggregation import generate_grouped_summaries
6
+ import pyodbc
7
+ import time
8
+ import pandas as pd
9
+
10
+ # optional requests import (webapi mode)
11
+ try:
12
+ import requests # type: ignore
13
+ except Exception:
14
+ requests = None # lazy import
15
+
16
+ logger = get_logger(__name__)
17
+
18
+ class DataverseSource(DataSourceBase):
19
+
20
+ def __init__(self, config: Dict[str, Any]):
21
+ super().__init__(config)
22
+ self._conn = None
23
+ self._mode = (self.config.get("dv_mode") or "tds").lower() # "tds" or "webapi"
24
+ self._access_token: Optional[str] = None
25
+ self._headers: Dict[str, str] = {}
26
+ self._max_retries = int(self.config.get("dv_max_retries", 3))
27
+
28
+ def validate_config(self) -> bool:
29
+ """
30
+ Validate required keys depending on selected dv_mode.
31
+ - tds: requires either 'tds_connection_string' OR ('dataverse_server' and 'dataverse_database')
32
+ - webapi: requires 'webapi_url','client_id','client_secret','tenant_id' (or 'resource')
33
+ """
34
+ try:
35
+ if self._mode == "webapi":
36
+ require_keys(self.config, ["dv_webapi_url", "dv_webapi_client_id", "dv_webapi_client_secret", "dv_webapi_tenant_id"])
37
+ else:
38
+ # TDS mode (ODBC)
39
+ if "dv_tds_connection_string" in self.config:
40
+ return True
41
+ # otherwise require components
42
+ require_keys(self.config, ["dv_tds_server", "dv_tds_database"])
43
+ # if not using integrated auth require creds
44
+ if not bool(self.config.get("dv_tds_windows_auth", False)):
45
+ require_keys(self.config, ["dv_tds_username", "dv_tds_password"])
46
+ return True
47
+ except Exception as ex:
48
+ logger.error("DataverseSource.validate_config failed: %s", ex)
49
+ return False
50
+
51
+ # -------------------------
52
+ # Connection helpers
53
+ # -------------------------
54
+ def _get_available_driver(self) -> str:
55
+ """Return first suitable ODBC driver for SQL/Dataverse TDS access."""
56
+ preferred_drivers = [
57
+ "ODBC Driver 18 for SQL Server",
58
+ "ODBC Driver 17 for SQL Server",
59
+ "SQL Server Native Client 11.0",
60
+ "SQL Server"
61
+ ]
62
+ try:
63
+ drivers = pyodbc.drivers()
64
+ logger.info("Available ODBC drivers: %s", drivers)
65
+
66
+ for d in preferred_drivers:
67
+ if d in drivers:
68
+ logger.info("Using ODBC driver: %s", d)
69
+ return d
70
+
71
+ # fallback to first available
72
+ if drivers:
73
+ logger.warning("No preferred driver found. Using: %s", drivers[0])
74
+ return drivers[0]
75
+ raise RuntimeError("No ODBC drivers available")
76
+ except Exception as ex:
77
+ logger.error("DataverseSource._get_available_driver failed: %s", ex)
78
+ raise
79
+
80
+ def _build_tds_conn_str(self) -> str:
81
+ """Build valid connection string with proper parameter names."""
82
+ if "dv_tds_connection_string" in self.config:
83
+ return self.config["dv_tds_connection_string"]
84
+
85
+ driver = self._get_available_driver()
86
+ # Fix: use correct config key names (dv_tds_server, not dv_tds_dataverse_server)
87
+ server = self.config.get("dv_tds_server", "").strip()
88
+ database = self.config.get("dv_tds_database", "").strip()
89
+
90
+ if not server:
91
+ raise ValueError("dv_tds_server are required")
92
+
93
+ logger.info("Building TDS connection (driver=%s, server=%s, database=%s)", driver, server, database)
94
+
95
+ # Use curly braces for driver name (handles spaces in driver names)
96
+ parts = [f"DRIVER={{{driver}}}"]
97
+ parts.append(f"Server={server}")
98
+ parts.append(f"Database={database}")
99
+ password = None
100
+ if bool(self.config.get("dv_tds_windows_auth", False)):
101
+ parts.append("Trusted_Connection=yes")
102
+ logger.info("Using Windows authentication")
103
+ else:
104
+ username = self.config.get("dv_tds_username", "").strip()
105
+ password = self.config.get("dv_tds_password", "").strip()
106
+
107
+ if not username or not password:
108
+ raise ValueError("dv_tds_username and dv_tds_password required when Windows auth disabled")
109
+
110
+ parts.append(f"UID={username}")
111
+ parts.append(f"PWD={password}")
112
+ parts.append("Authentication=ActiveDirectoryInteractive")
113
+ # Encryption settings
114
+ if not bool(self.config.get("dv_tds_is_onprem", False)):
115
+ parts.append("Encrypt=yes")
116
+ parts.append("TrustServerCertificate=no")
117
+ else:
118
+ parts.append("Encrypt=optional")
119
+ parts.append("TrustServerCertificate=yes")
120
+
121
+ conn_str = ";".join(parts)
122
+ logger.debug("Connection string: %s", conn_str.replace(password or "", "***") if password else conn_str)
123
+ return conn_str
124
+
125
+ def _obtain_webapi_token(self) -> Tuple[str, Dict[str, str]]:
126
+ """
127
+ Acquire OAuth2 token using client credentials flow.
128
+ Returns (access_token, headers)
129
+ Config expected keys: tenant_id, client_id, client_secret, optional resource
130
+ """
131
+ if requests is None:
132
+ raise RuntimeError("requests package required for Dataverse Web API mode")
133
+ tenant = self.config["dv_webapi_tenant_id"]
134
+ client_id = self.config["dv_webapi_client_id"]
135
+ client_secret = self.config["dv_webapi_client_secret"]
136
+ # resource or scope: prefer explicit resource, else fallback to webapi_url host
137
+ resource = self.config.get("dv_webapi_resource")
138
+ if not resource:
139
+ # infer resource from webapi_url e.g. https://<org>.crm.dynamics.com
140
+ webapi_url = self.config["dv_webapi_url"].rstrip("/")
141
+ resource = webapi_url.split("://")[-1]
142
+ resource = f"https://{resource}" # as resource
143
+ token_url = f"https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token"
144
+ data = {
145
+ "grant_type": "client_credentials",
146
+ "client_id": client_id,
147
+ "client_secret": client_secret,
148
+ "scope": f"{resource}/.default"
149
+ }
150
+ resp = requests.post(token_url, data=data, timeout=30)
151
+ resp.raise_for_status()
152
+ j = resp.json()
153
+ token = j.get("access_token")
154
+ if not token:
155
+ raise RuntimeError("Failed to obtain access token for Dataverse webapi")
156
+ headers = {"Authorization": f"Bearer {token}", "Accept": "application/json", "OData-MaxVersion": "4.0", "OData-Version": "4.0"}
157
+ return token, headers
158
+
159
+ # -------------------------
160
+ # Public connection API
161
+ # -------------------------
162
+ def connect(self) -> bool:
163
+ try:
164
+ if self._mode == "webapi":
165
+ token, headers = self._obtain_webapi_token()
166
+ self._access_token = token
167
+ self._headers = headers
168
+ self._connected = True
169
+ logger.info("DataverseSource connected (webapi mode) to %s", self.config.get("dv_webapi_url"))
170
+ return True
171
+ # else TDS mode
172
+ conn_str = self._build_tds_conn_str()
173
+ self._conn = pyodbc.connect(conn_str, timeout=int(self.config.get("dv_tds_timeout", 30)))
174
+ self._connected = True
175
+ logger.info("DataverseSource connected (dv_tds mode) to %s/%s", self.config.get("dv_server"), self.config.get("dv_database"))
176
+ return True
177
+ except pyodbc.Error as ex:
178
+ logger.error("DataverseSource.connect failed - ODBC Error: %s", ex)
179
+ self._connected = False
180
+ return False
181
+ except requests.RequestException as ex:
182
+ logger.error("DataverseSource.connect failed - HTTP Error: %s", ex)
183
+ self._connected = False
184
+ return False
185
+ except Exception as ex:
186
+ logger.exception("DataverseSource.connect failed")
187
+ self._connected = False
188
+ return False
189
+
190
+ def disconnect(self) -> None:
191
+ try:
192
+ if self._conn:
193
+ try:
194
+ self._conn.close()
195
+ except Exception:
196
+ pass
197
+ self._conn = None
198
+ self._access_token = None
199
+ self._headers = {}
200
+ finally:
201
+ self._connected = False
202
+ logger.info("DataverseSource disconnected")
203
+
204
+ # -------------------------
205
+ # Data fetch
206
+ # -------------------------
207
+ def fetch_data(self, query: Optional[str] = None, **kwargs) -> List[Dict[str, Any]]:
208
+ """
209
+ Fetch rows from Dataverse.
210
+ - TDS mode: executes SQL query (config key 'tds_query' or provided 'query')
211
+ - WebAPI mode: calls Dataverse Web API path fragment (e.g. 'accounts?$select=name') or uses 'entity_set' + query params
212
+ Returns list[dict].
213
+ """
214
+ attempt = 0
215
+ while attempt < self._max_retries:
216
+ try:
217
+ if not getattr(self, "_connected", False):
218
+ ok = self.connect()
219
+ if not ok:
220
+ raise RuntimeError("DataverseSource: cannot connect")
221
+
222
+ if self._mode == "webapi":
223
+ if requests is None:
224
+ raise RuntimeError("requests package required for webapi mode")
225
+ webapi_url = self.config["dv_webapi_url"].rstrip("/")
226
+ # if query provided, treat it as path fragment; else use entity_set from config
227
+ path_fragment = query or self.config.get("dv_webapi_entity_set")
228
+ if not path_fragment:
229
+ raise ValueError("DataverseSource.fetch_data requires a webapi 'query' or 'entity_set' in config")
230
+ url = f"{webapi_url}/api/data/v9.1/{path_fragment.lstrip('/')}"
231
+ params = kwargs.get("params")
232
+ resp = requests.get(url, headers=self._headers, params=params, timeout=60)
233
+ resp.raise_for_status()
234
+ j = resp.json()
235
+ items: Any = []
236
+ # Dataverse OData responses typically use 'value' for collections
237
+ if isinstance(j, dict) and "value" in j:
238
+ items = j["value"]
239
+ # otherwise return the raw json wrapped in a list or as-is
240
+ elif isinstance(j, list):
241
+ items= j
242
+ else:
243
+ items= [j]
244
+
245
+ df = pd.DataFrame(items)
246
+ # filter columns if configured
247
+ keep = self.config.get("dv_webapi_columns_to_keep")
248
+ if isinstance(keep, list) and keep:
249
+ cols_to_keep = [c for c in df.columns if c in keep]
250
+ else:
251
+ # exclude SharePoint metadata columns (start with '__' or prefixed with '@')
252
+ cols_to_keep = [c for c in df.columns if not str(c).startswith("__") and not str(c).startswith("@")]
253
+ df = df[cols_to_keep]
254
+
255
+ results = df.to_dict("records")
256
+ return results
257
+ # else TDS mode
258
+ sql = query or self.config.get("dv_tds_query") or self.config.get("dv_sql_query")
259
+ if not sql:
260
+ raise ValueError("DataverseSource.fetch_data requires a SQL query (tds mode)")
261
+
262
+ cur = self._conn.cursor()
263
+ try:
264
+ cur.execute(sql)
265
+ cols = [c[0] for c in (cur.description or [])]
266
+ rows = cur.fetchall()
267
+ results: List[Dict[str, Any]] = []
268
+ for r in rows:
269
+ results.append({cols[i]: r[i] for i in range(len(cols))})
270
+
271
+ df = pd.DataFrame(results)
272
+ summaries = generate_grouped_summaries(
273
+ df=df,
274
+ aggregation_field=self.config.get("dv_tds_aggregation_field"),
275
+ row_format=self.config.get("dv_tds_row_format"),
276
+ constants={"title": ""},
277
+ header_format=self.config.get("dv_tds_header_format"),
278
+ sort_by=self.config.get("dv_tds_sort_by"), # or a column/list if you want ordering
279
+ validate=True # ensures all placeholders exist
280
+ )
281
+
282
+ return summaries
283
+ finally:
284
+ try:
285
+ cur.close()
286
+ except Exception:
287
+ pass
288
+
289
+ except Exception as ex:
290
+ attempt += 1
291
+ logger.warning("DataverseSource.fetch_data attempt %d/%d failed: %s", attempt, self._max_retries, ex)
292
+ # transient retry for network/connection errors
293
+ if attempt >= self._max_retries:
294
+ logger.exception("DataverseSource.fetch_data final failure")
295
+ raise
296
+ # backoff
297
+ time.sleep(min(2 ** attempt, 10))
298
+ # try reconnect for next attempt
299
+ try:
300
+ self.disconnect()
301
+ except Exception:
302
+ pass
303
+
304
+ # unreachable; defensive
305
+ return []
@@ -0,0 +1,159 @@
1
+ from typing import Any, Dict, List, Optional
2
+ from datasourcelib.datasources.datasource_base import DataSourceBase
3
+ from datasourcelib.utils.logger import get_logger
4
+ from datasourcelib.utils.validators import require_keys
5
+ import os
6
+ import pyodbc
7
+
8
+
9
+ logger = get_logger(__name__)
10
+
11
+ class SQLDataSource(DataSourceBase):
12
+
13
+ def __init__(self, config: Dict[str, Any]):
14
+ super().__init__(config)
15
+ self._conn = None
16
+ self._is_sqlite = False
17
+
18
+ def validate_config(self) -> bool:
19
+ """
20
+ Validate config. If sql_windows_auth is True then sql_username/sql_password are optional.
21
+ Otherwise require sql_username and sql_password.
22
+ """
23
+ try:
24
+ # Always require server/database at minimum
25
+ require_keys(self.config, ["sql_server", "sql_database"])
26
+ # If not using Windows authentication, require credentials
27
+ if not bool(self.config.get("sql_windows_auth", False)):
28
+ require_keys(self.config, ["sql_username", "sql_password"])
29
+ return True
30
+ except Exception as ex:
31
+ logger.error("SQLDataSource.validate_config: %s", ex)
32
+ return False
33
+
34
+ def connect(self) -> bool:
35
+ try:
36
+ sql_server = self.config.get("sql_server", "")
37
+ sql_database = self.config.get("sql_database", "")
38
+ sql_is_onprem = self.config.get("sql_is_onprem", False)
39
+
40
+ # Determine auth mode: sql_windows_auth (Trusted Connection) overrides username/password
41
+ sql_windows_auth = bool(self.config.get("sql_windows_auth", False))
42
+
43
+ # Get available driver
44
+ sql_driver = self._get_available_driver()
45
+
46
+ # Build connection string
47
+ conn_params = [
48
+ f'DRIVER={sql_driver}',
49
+ f'SERVER={sql_server}',
50
+ f'DATABASE={sql_database}',
51
+ ]
52
+
53
+ if sql_windows_auth:
54
+ # Use integrated Windows authentication (Trusted Connection)
55
+ # This will use the current process credentials / kerberos ticket.
56
+ conn_params.append('Trusted_Connection=yes')
57
+ logger.info("SQLDataSource using Windows (integrated) authentication")
58
+ else:
59
+ sql_username = self.config.get("sql_username", "")
60
+ sql_password = self.config.get("sql_password", "")
61
+ conn_params.extend([f'UID={sql_username}', f'PWD={sql_password}'])
62
+
63
+ # Add encryption settings based on environment
64
+ if not sql_is_onprem:
65
+ conn_params.extend([
66
+ 'Encrypt=yes',
67
+ 'TrustServerCertificate=no'
68
+ ])
69
+ else:
70
+ conn_params.extend([
71
+ 'Encrypt=optional',
72
+ 'TrustServerCertificate=yes'
73
+ ])
74
+
75
+ conn_str = ';'.join(conn_params)
76
+
77
+ # Attempt connection with timeout
78
+ self._conn = pyodbc.connect(conn_str, timeout=30)
79
+ self._connected = True
80
+ logger.info("SQLDataSource connected to %s using driver %s (sql_windows_auth=%s)", sql_server, sql_driver, sql_windows_auth)
81
+ return True
82
+
83
+ except pyodbc.Error as ex:
84
+ logger.error("SQLDataSource.connect failed - ODBC Error: %s", ex)
85
+ self._connected = False
86
+ return False
87
+ except Exception as ex:
88
+ logger.error("SQLDataSource.connect failed - Unexpected Error: %s", ex)
89
+ self._connected = False
90
+ return False
91
+
92
+ def disconnect(self) -> None:
93
+ try:
94
+ if self._conn:
95
+ self._conn.close()
96
+ finally:
97
+ self._conn = None
98
+ self._connected = False
99
+ logger.info("SQLDataSource disconnected")
100
+
101
+ def fetch_data(self, query: Optional[str] = None, **kwargs) -> List[Dict[str, Any]]:
102
+ max_retries = 3
103
+ retry_count = 0
104
+
105
+ while retry_count < max_retries:
106
+ try:
107
+ if not self._connected:
108
+ ok = self.connect()
109
+ if not ok:
110
+ raise RuntimeError("SQLDataSource: not connected and cannot connect")
111
+
112
+ query = self.config.get("sql_query")
113
+ if not query:
114
+ raise ValueError("SQLDataSource.fetch_data requires a query")
115
+
116
+ cur = self._conn.cursor()
117
+ try:
118
+ cur.execute(query)
119
+ cols = [d[0] if hasattr(d, "__len__") else d[0] for d in (cur.description or [])]
120
+ rows = cur.fetchall()
121
+ results: List[Dict[str, Any]] = []
122
+ for r in rows:
123
+ results.append({cols[i]: r[i] for i in range(len(cols))})
124
+ return results
125
+ finally:
126
+ try:
127
+ cur.close()
128
+ except Exception:
129
+ pass
130
+
131
+ except pyodbc.OperationalError as ex:
132
+ # Handle connection lost
133
+ retry_count += 1
134
+ logger.warning("Connection lost, attempt %d of %d: %s", retry_count, max_retries, ex)
135
+ self.disconnect()
136
+ if retry_count >= max_retries:
137
+ raise
138
+ except Exception as ex:
139
+ logger.error("Query execution failed: %s", ex)
140
+ raise
141
+
142
+ def _get_available_driver(self) -> str:
143
+ """Get first available SQL Server driver from preferred list."""
144
+ preferred_drivers = [
145
+ 'ODBC Driver 18 for SQL Server',
146
+ 'ODBC Driver 17 for SQL Server',
147
+ 'SQL Server Native Client 11.0',
148
+ 'SQL Server'
149
+ ]
150
+
151
+ try:
152
+ available_drivers = pyodbc.drivers()
153
+ for driver in preferred_drivers:
154
+ if driver in available_drivers:
155
+ return driver
156
+ raise RuntimeError(f"No suitable SQL Server driver found. Available drivers: {available_drivers}")
157
+ except Exception as ex:
158
+ logger.error("Failed to get SQL drivers: %s", ex)
159
+ raise
@@ -1,22 +1,47 @@
1
1
  from datasourcelib.core.sync_base import SyncBase
2
2
  from datasourcelib.utils.logger import get_logger
3
- from datetime import datetime, timedelta
3
+ from datetime import datetime, timezone
4
+ from typing import Dict, Any, Optional
4
5
 
5
6
  logger = get_logger(__name__)
6
7
 
7
8
  class DailyLoadStrategy(SyncBase):
8
- """Daily scheduled load (wraps incremental)."""
9
+ """Daily scheduled load strategy (wraps incremental sync)."""
9
10
 
10
11
  def validate(self) -> bool:
12
+ """Validate strategy preconditions."""
11
13
  return True
12
14
 
13
- def sync(self, run_date: str = None, **kwargs) -> bool:
15
+ def sync(self, run_date: Optional[str] = None, **kwargs) -> Dict[str, Any]:
16
+ """
17
+ Execute daily load for the given run_date (ISO date string).
18
+ If run_date is None, today's UTC date is used.
19
+
20
+ Returns a dict with status, message and ISO timestamps.
21
+ """
22
+ # Ensure run_date and started_at exist even if exceptions occur early
23
+ run_date = run_date
24
+ started_at = datetime.now(timezone.utc).isoformat()
14
25
  try:
15
- run_date = run_date or datetime.utcnow().date().isoformat()
16
- logger.info("Starting daily load for %s", run_date)
17
- # Typically call incremental with last_sync = previous day midnight
18
- # TODO implement scheduling integration externally; the strategy here is idempotent
19
- return True
20
- except Exception:
26
+ run_date = run_date or datetime.now(timezone.utc).date().isoformat()
27
+ logger.info("Starting daily load for %s (requested run_date=%s)", started_at, run_date)
28
+
29
+ # TODO: call incremental sync / processing here, for example:
30
+ # result = self.incremental_sync(last_sync=..., **kwargs)
31
+
32
+ finished_at = datetime.now(timezone.utc).isoformat()
33
+ return {
34
+ "status": "success",
35
+ "message": f"Daily load completed for {run_date}",
36
+ "started_at": started_at,
37
+ "finished_at": finished_at
38
+ }
39
+ except Exception as ex:
21
40
  logger.exception("DailyLoadStrategy.sync failed")
22
- return False
41
+ finished_at = datetime.now(timezone.utc).isoformat()
42
+ return {
43
+ "status": "failure",
44
+ "message": f"Exception: {ex}",
45
+ "started_at": started_at,
46
+ "finished_at": finished_at
47
+ }
@@ -1,38 +1,99 @@
1
+ from typing import Dict, Any
2
+ from datetime import datetime, timezone
3
+
1
4
  from datasourcelib.core.sync_base import SyncBase
2
5
  from datasourcelib.utils.logger import get_logger
3
6
  from datasourcelib.indexes.azure_search_index import AzureSearchIndexer
7
+
4
8
  logger = get_logger(__name__)
5
9
 
10
+
6
11
  class FullLoadStrategy(SyncBase):
7
12
  """Full load: replace or reload entire source into vector DB."""
8
13
 
9
14
  def validate(self) -> bool:
10
- # Minimal validation: required keys exist
11
- dsok = self.data_source.validate_config()
12
- return dsok
15
+ # Minimal validation: required keys exist on datasource
16
+ try:
17
+ return bool(self.data_source and self.data_source.validate_config())
18
+ except Exception:
19
+ logger.exception("FullLoadStrategy.validate failed")
20
+ return False
13
21
 
14
- def sync(self, **kwargs) -> bool:
22
+ def sync(self, **kwargs) -> Dict[str, Any]:
23
+ """
24
+ Execute full load: read data from data_source and index into vector DB (Azure Search).
25
+ Returns a dict with status, message and ISO timestamps.
26
+ """
27
+ started_at = datetime.now(timezone.utc).isoformat()
15
28
  try:
16
- logger.info("Running full data load")
29
+ logger.info("Running full data load (started_at=%s)", started_at)
30
+
31
+ # Fetch data from configured data source
17
32
  data = self.data_source.fetch_data(**kwargs)
18
- for key, value in kwargs.items():
19
- print(f"{key} = {value}")
20
- # Implement real extract -> transform -> load to vector DB
21
- # Example pseudocode:
22
- # vector_client.upsert_batch(self.vector_db_config, rows)
23
- # New: use AzureSearchIndexer to create index and upload documents if requested
24
- if isinstance(data, list) and data:
25
- indexer = AzureSearchIndexer(self.vector_db_config or {})
26
- if not indexer.validate_config():
27
- logger.error("Vector DB config invalid for Azure Search indexer")
28
- return False
29
- ok = indexer.index(data)
30
- if not ok:
31
- logger.error("Indexing data to Azure Search failed")
32
- return False
33
-
34
- logger.info("Full data load finished successfully")
35
- return True
36
- except Exception:
33
+
34
+ # Log kwargs for debugging at debug level
35
+ if kwargs:
36
+ logger.debug("FullLoadStrategy.sync kwargs: %s", kwargs)
37
+
38
+ # If no data returned, finish gracefully
39
+ total_records = len(data) if isinstance(data, (list, tuple)) else (1 if data is not None else 0)
40
+ if total_records == 0:
41
+ finished_at = datetime.now(timezone.utc).isoformat()
42
+ msg = "No records returned from data source"
43
+ logger.info(msg)
44
+ return {
45
+ "status": "success",
46
+ "message": msg,
47
+ "started_at": started_at,
48
+ "finished_at": finished_at,
49
+ "loaded_records": 0
50
+ }
51
+
52
+ # Use AzureSearchIndexer to create index and upload documents if requested
53
+ indexer = AzureSearchIndexer(self.vector_db_config or {})
54
+ if not indexer.validate_config():
55
+ finished_at = datetime.now(timezone.utc).isoformat()
56
+ msg = "Vector DB config invalid for Azure Search indexer"
57
+ logger.error(msg)
58
+ return {
59
+ "status": "failure",
60
+ "message": msg,
61
+ "started_at": started_at,
62
+ "finished_at": finished_at,
63
+ "loaded_records": 0
64
+ }
65
+
66
+ ok = indexer.index(data)
67
+ if not ok:
68
+ finished_at = datetime.now(timezone.utc).isoformat()
69
+ msg = "Indexing data to Azure Search failed"
70
+ logger.error(msg)
71
+ return {
72
+ "status": "failure",
73
+ "message": msg,
74
+ "started_at": started_at,
75
+ "finished_at": finished_at,
76
+ "loaded_records": total_records
77
+ }
78
+
79
+ finished_at = datetime.now(timezone.utc).isoformat()
80
+ msg = f"Full load completed. Loaded {total_records} records."
81
+ logger.info("Full data load finished successfully (%s)", msg)
82
+ return {
83
+ "status": "success",
84
+ "message": msg,
85
+ "started_at": started_at,
86
+ "finished_at": finished_at,
87
+ "loaded_records": total_records
88
+ }
89
+
90
+ except Exception as ex:
37
91
  logger.exception("FullLoadStrategy.sync failed")
38
- return False
92
+ finished_at = datetime.now(timezone.utc).isoformat()
93
+ return {
94
+ "status": "failure",
95
+ "message": f"Exception: {ex}",
96
+ "started_at": started_at,
97
+ "finished_at": finished_at,
98
+ "loaded_records": 0
99
+ }
@@ -1,7 +1,7 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timezone
2
2
  from datasourcelib.core.sync_base import SyncBase
3
3
  from datasourcelib.utils.logger import get_logger
4
-
4
+ from typing import Dict, Any
5
5
  logger = get_logger(__name__)
6
6
 
7
7
  class IncrementalLoadStrategy(SyncBase):
@@ -14,14 +14,27 @@ class IncrementalLoadStrategy(SyncBase):
14
14
  return False
15
15
  return True
16
16
 
17
- def sync(self, last_sync: str = None, **kwargs) -> bool:
17
+ def sync(self, last_sync: str = None, **kwargs) -> Dict[str, Any]:
18
18
  try:
19
+ started_at = datetime.now(timezone.utc).isoformat()
19
20
  last = last_sync or self.source_config.get("last_sync")
20
21
  logger.info("Running incremental load since %s", last)
21
22
  # TODO: fetch delta rows since 'last' and upsert to vector DB
22
23
  # After successful run store new last_sync timestamp
23
24
  logger.info("Incremental load completed")
24
- return True
25
- except Exception:
25
+ finished_at = datetime.now(timezone.utc).isoformat()
26
+ return {
27
+ "status": "success",
28
+ "message": f"Incremental load completed since {last}",
29
+ "started_at": started_at,
30
+ "finished_at": finished_at
31
+ }
32
+ except Exception as ex:
26
33
  logger.exception("IncrementalLoadStrategy.sync failed")
27
- return False
34
+ finished_at = datetime.now(timezone.utc).isoformat()
35
+ return {
36
+ "status": "failure",
37
+ "message": f"Exception: {ex}",
38
+ "started_at": started_at,
39
+ "finished_at": finished_at
40
+ }
@@ -1,6 +1,7 @@
1
1
  from datasourcelib.core.sync_base import SyncBase
2
2
  from datasourcelib.utils.logger import get_logger
3
-
3
+ from typing import Dict, Any
4
+ from datetime import datetime, timezone
4
5
  logger = get_logger(__name__)
5
6
 
6
7
  class OnDemandLoadStrategy(SyncBase):
@@ -9,11 +10,24 @@ class OnDemandLoadStrategy(SyncBase):
9
10
  def validate(self) -> bool:
10
11
  return True
11
12
 
12
- def sync(self, **kwargs) -> bool:
13
+ def sync(self, **kwargs) -> Dict[str, Any]:
13
14
  try:
15
+ started_at = datetime.now(timezone.utc).isoformat()
14
16
  logger.info("On-demand sync invoked with params: %s", kwargs)
15
17
  # Use kwargs to drive partial loads, filters, ids etc.
16
- return True
17
- except Exception:
18
+ finished_at = datetime.now(timezone.utc).isoformat()
19
+ return {
20
+ "status": "success",
21
+ "message": f"Ondemand load completed.",
22
+ "started_at": started_at,
23
+ "finished_at": finished_at
24
+ }
25
+ except Exception as ex:
18
26
  logger.exception("OnDemandLoadStrategy.sync failed")
19
- return False
27
+ finished_at = datetime.now(timezone.utc).isoformat()
28
+ return {
29
+ "status": "failure",
30
+ "message": f"Exception: {ex}",
31
+ "started_at": started_at,
32
+ "finished_at": finished_at
33
+ }
@@ -1,6 +1,7 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timezone
2
2
  from datasourcelib.core.sync_base import SyncBase
3
3
  from datasourcelib.utils.logger import get_logger
4
+ from typing import Dict, Any
4
5
 
5
6
  logger = get_logger(__name__)
6
7
 
@@ -11,14 +12,27 @@ class TimeRangeLoadStrategy(SyncBase):
11
12
  # rely on params at runtime; minimal validation OK
12
13
  return True
13
14
 
14
- def sync(self, start: str = None, end: str = None, **kwargs) -> bool:
15
+ def sync(self, start: str = None, end: str = None, **kwargs) -> Dict[str, Any]:
15
16
  try:
17
+ started_at = datetime.now(timezone.utc).isoformat()
16
18
  if not start or not end:
17
19
  logger.error("TimeRangeLoadStrategy requires 'start' and 'end'")
18
20
  return False
19
21
  logger.info("Time range load between %s and %s", start, end)
20
22
  # TODO: query source for timeframe and upsert
21
- return True
22
- except Exception:
23
+ finished_at = datetime.now(timezone.utc).isoformat()
24
+ return {
25
+ "status": "success",
26
+ "message": f"TimeRange load completed between {start} and {end}",
27
+ "started_at": started_at,
28
+ "finished_at": finished_at
29
+ }
30
+ except Exception as ex:
23
31
  logger.exception("TimeRangeLoadStrategy.sync failed")
24
- return False
32
+ finished_at = datetime.now(timezone.utc).isoformat()
33
+ return {
34
+ "status": "failure",
35
+ "message": f"Exception: {ex}",
36
+ "started_at": started_at,
37
+ "finished_at": finished_at
38
+ }
@@ -0,0 +1,152 @@
1
+
2
+ import pandas as pd
3
+ from string import Formatter
4
+ from typing import Iterable, Any, Dict, List, Optional, Union
5
+
6
+ def _placeholders(fmt: str) -> List[str]:
7
+ """
8
+ Extract top-level placeholder names from a format string.
9
+ e.g., 'Number {i} is {fname}' -> ['i', ' """
10
+ return [field_name for _, field_name, _, _ in Formatter().parse(fmt) if field_name]
11
+
12
+ def _safe_str(x) -> str:
13
+ return "" if pd.isna(x) else str(x).strip()
14
+
15
+ def generate_grouped_summaries(
16
+ df: pd.DataFrame,
17
+ aggregation_field: str,
18
+ row_format: str,
19
+ *,
20
+ header_format: str = "{group_value} has {count} record{plural}.",
21
+ constants: Optional[Dict[str, Union[str, int, float]]] = None,
22
+ drop_empty_groups: bool = True,
23
+ sort_by: Optional[Union[str, Iterable[str]]] = None,
24
+ validate: bool = True
25
+ ) -> List[Dict[str, Any]]:
26
+ """
27
+ Build grouped summaries strictly when `aggregation_field` exists in `df` and is non-empty.
28
+
29
+ Parameters
30
+ ----------
31
+ df : pd.DataFrame
32
+ Source dataset.
33
+ aggregation_field : str
34
+ Column name to group by. Must exist in `df`.
35
+ row_format : str
36
+ Format string applied per row within a group.
37
+ You may use placeholders for any df columns, plus:
38
+ - {i}: 1-based sequence number within group
39
+ - constants you provide (e.g., {title_prefix})
40
+ headertr, optional
41
+ Format string for group headers. Available placeholders:
42
+ - {group_value}: the group key
43
+ - {count}: number of rows in the group
44
+ - {plural}: '' when count==1 else 's'
45
+ Default: "{group_value} has {count} record{plural}."
46
+ constants : dict, optional
47
+ Additional fixed values to be merged into each row's format context.
48
+ Example: {"title_prefix": "Mr"}
49
+ drop_empty_groups : bool, optional
50
+ If True, rows with blank/empty group values are discarded before grouping.
51
+ sort_by : str | Iterable[str] | None, optional
52
+ If provided, sorts rows within each group by these columns before formatting.
53
+ validate : bool, optional
54
+ If True, checks that all placeholders used in `row_format` and `header_format`
55
+ are available (in df columns or computed context). Raises ValueError if missing.
56
+
57
+ Returns
58
+ -------
59
+ List[str]
60
+ One formatted string per group (header + row lines joined with spaces).
61
+
62
+ Raises
63
+ ------
64
+ ValueError
65
+ - If `aggregation_field` is missing or empty
66
+ - If no non-empty values exist for `aggregation_field` (with drop_empty_groups=True)
67
+ - If required placeholders are missing when `validate=True`
68
+ KeyError
69
+ - If columns referenced in `sort_by` are missing
70
+ """
71
+ # Basic checks
72
+ if df.empty:
73
+ return []
74
+
75
+ agg_field = (aggregation_field or "").strip()
76
+ if not agg_field:
77
+ return df.to_dict("records")
78
+ if agg_field not in df.columns:
79
+ raise ValueError(f"aggregation_field '{agg_field}' not found in DataFrame columns: {list(df.columns)}")
80
+
81
+ # Prepare working frame
82
+ working = df.copy()
83
+ working[agg_field] = working[agg_field].astype(str).str.strip()
84
+
85
+ if drop_empty_groups:
86
+ working = working[working[agg_field].astype(bool)]
87
+
88
+ if working.empty:
89
+ raise ValueError(f"No rows with non-empty values found for aggregation_field '{agg_field}'.")
90
+
91
+ # Optional sort within groups
92
+ if sort_by is not None:
93
+ sort_cols = [sort_by] if isinstance(sort_by, str) else list(sort_by)
94
+ missing_sort = [c for c in sort_cols if c not in working.columns]
95
+ if missing_sort:
96
+ raise KeyError(f"sort_by columns not found in DataFrame: {missing_sort}")
97
+ working = working.sort_values(sort_cols, kind="stable")
98
+
99
+ # Validation of placeholders (if requested)
100
+ if validate:
101
+ df_cols = set(working.columns)
102
+ row_keys = set(_placeholders(row_format))
103
+ header_keys = set(_placeholders(header_format))
104
+ # Context keys provided by the function
105
+ provided_keys = {"i", "group_value", "count", "plural"}
106
+ constant_keys = set((constants or {}).keys())
107
+
108
+ missing_row = [k for k in row_keys if k not in df_cols and k not in constant_keys and k not in provided_keys]
109
+ missing_header = [k for k in header_keys if k not in provided_keys and k not in constant_keys and k not in df_cols]
110
+ if missing_row:
111
+ raise ValueError(
112
+ f"row_format references missing keys: {missing_row}. "
113
+ f"Ensure these are either df columns or in `constants`."
114
+ )
115
+ if missing_header:
116
+ raise ValueError(
117
+ f"header_format references missing keys: {missing_header}. "
118
+ f"Use only {{group_value}}, {{count}}, {{plural}} or provide constants."
119
+ )
120
+
121
+ # Build summaries per group
122
+ summaries = []
123
+ for group_value, group_df in working.groupby(agg_field, sort=True):
124
+ group_df = group_df.reset_index(drop=True)
125
+ count = len(group_df)
126
+ plural = "" if count == 1 else "s"
127
+
128
+ header_ctx = {
129
+ "group_value": _safe_str(group_value),
130
+ "count": count,
131
+ "plural": plural,
132
+ **(constants or {}),
133
+ }
134
+ header = header_format.format(**header_ctx)
135
+
136
+ lines = []
137
+ for i, row in enumerate(group_df.to_dict(orient="records"), start=1):
138
+ # Row context = df row + sequence + constants (constants override df if same key)
139
+ row_ctx = {k: _safe_str(v) for k, v in row.items()}
140
+ row_ctx.update({"i": i})
141
+ if constants:
142
+ # Constants override row values with same keys
143
+ row_ctx.update(constants)
144
+
145
+ lines.append(row_format.format(**row_ctx))
146
+
147
+ content = header + " " + " ".join(lines)
148
+ summaries.append(
149
+ {"content" : content, "id": group_value}
150
+ )
151
+
152
+ return summaries
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
@@ -1,33 +1,36 @@
1
1
  datasourcelib/__init__.py,sha256=I7JTSZ1J6ULg_TfdMEgFcd1regkCHuyKdZT4DcPtoyQ,78
2
2
  datasourcelib/core/__init__.py,sha256=nsXojDd97T7eMqqtCsZr1qSYLBitvKydSZRb9Dg7hqU,462
3
- datasourcelib/core/sync_base.py,sha256=AfwwaV3rJOFKVmKKpSj-BwznnCDCaeuT4LLNDfA3NAY,716
4
- datasourcelib/core/sync_manager.py,sha256=lj070S3PwSNcB0UL_ZDzDAm6uJ9G38TY491vQZ1dL3o,3849
3
+ datasourcelib/core/sync_base.py,sha256=fKbsJYtPIV0ow7sGH7O7GmAEeeSefvD16LBOz0dP4TU,726
4
+ datasourcelib/core/sync_manager.py,sha256=pfnvWv4AwmlJJUIsfxNNxYDBOsa7juTIxgFJIEZ5bIM,4842
5
5
  datasourcelib/core/sync_types.py,sha256=KVZB7PkfkFTzghoe--U8jLeAU8XAfba9qMRIVcUjuMc,297
6
6
  datasourcelib/datasources/__init__.py,sha256=lZtgs0vT-2gub5UZo8BUnREZl3K_-_xYqUP8mjf8vhM,436
7
7
  datasourcelib/datasources/azure_devops_source copy.py,sha256=g-IOCq5vGwwteU21jZPWW_GggMu1_myVJkP0_BmSdGY,7282
8
8
  datasourcelib/datasources/azure_devops_source.py,sha256=3hyZIrUdgwZEQNjb2iZGDMJcAw3Z6r7oV0hWAq_zMsg,8005
9
9
  datasourcelib/datasources/blob_source.py,sha256=Qk61_ulqUSPYDaiMzqgvJAu43c4AjTlDRdfFg4VwgDU,3574
10
10
  datasourcelib/datasources/datasource_base.py,sha256=N8fOGvTl8oWWAiydLI0Joz66luq73a5yovO0XA9Q3jk,1068
11
- datasourcelib/datasources/datasource_types.py,sha256=eEiWymYS05X_TxwuB7P3MpphPG1En67h3kRiSGeHjQ0,176
11
+ datasourcelib/datasources/datasource_types.py,sha256=jpm4f9n1l7X9aBD58Pbr9evXiCHHEhRCLojGwchUD7A,205
12
+ datasourcelib/datasources/dataverse_source.py,sha256=PTIWArl_rRMap5QfH8ST5kCewE0Ax1xPZ1vgSxeujpU,14080
12
13
  datasourcelib/datasources/sharepoint_source - Copy.py,sha256=7V1c-zyvTo4IuPN_YMrKwLZFgbtipbP-mtunmXjOLJQ,17664
13
14
  datasourcelib/datasources/sharepoint_source.py,sha256=t3rly2mVEI2qEDuUVqstck5ktkZW0BnF16Bke_NjPLI,23126
14
15
  datasourcelib/datasources/sql_source.py,sha256=ntZjiFXpa7V797x7mAATJV0LH-g878VHuRw-QTxEe28,6372
16
+ datasourcelib/datasources/sql_source_bkup.py,sha256=ntZjiFXpa7V797x7mAATJV0LH-g878VHuRw-QTxEe28,6372
15
17
  datasourcelib/indexes/__init__.py,sha256=S8dz-lyxy1BTuDuLGRJNLrZD_1ku_FIUnDEm6HhMyT0,94
16
18
  datasourcelib/indexes/azure_search_index.py,sha256=kznAz06UXgyT1Clqj6gRhnBQ5HFw40ZQHJElRFIcbRo,22115
17
19
  datasourcelib/strategies/__init__.py,sha256=kot3u62KIAqYBg9M-KRE4mkMII_zwrDBZNf8Dj1vmX8,399
18
- datasourcelib/strategies/daily_load.py,sha256=Rh-veUhxKYsplwHTyko_Zp9C6NkUJV5VAGtg-p7Iy34,856
19
- datasourcelib/strategies/full_load.py,sha256=U1a9wO_ZLRnMInvU0IRW-ZKnhu0Cv437VcNMKIYuzMA,1691
20
- datasourcelib/strategies/incremental_load.py,sha256=TVqmDLu3m571nqGvzo_69i36QtYe4sBpllFwfPNL0TE,1178
21
- datasourcelib/strategies/ondemand_load.py,sha256=VxzAYgrW2ebTOC3xm61CerL2AFehZUJLnKrqtGRGJoE,644
22
- datasourcelib/strategies/timerange_load.py,sha256=c62BN2yXwVFaA_dQV54qenP4vrb4rcFqbx6m-nqhaTA,900
20
+ datasourcelib/strategies/daily_load.py,sha256=A9BnPqPfbPO8UeBy-jtS53eORK7QWWqLOWHrtyFLbl4,1909
21
+ datasourcelib/strategies/full_load.py,sha256=4BS_g4loR28OVqSDwXBCH2jCKbJLZxx6354KCOi_Qjk,4020
22
+ datasourcelib/strategies/incremental_load.py,sha256=CY1tAyXwjZLoq5zMLwB5i5qmT_L8JBaiBxDy9hx8QkQ,1822
23
+ datasourcelib/strategies/ondemand_load.py,sha256=MgenKJbJePLeErdEkXKsz1h7RuR8yT0RV_X523G7UUs,1304
24
+ datasourcelib/strategies/timerange_load.py,sha256=W_sSZg059Lw2o9tmdGKM9D5-z1pph7AN1ftalXhuyjo,1557
23
25
  datasourcelib/utils/__init__.py,sha256=9pSIpaK-kdmNuDzwl0Z7QU-_lV3cZE-iwOEPh3RBBTs,298
26
+ datasourcelib/utils/aggregation.py,sha256=5aOBcxay4eTyY-S4BRafNgSi37AY-JXERzcCv055E8w,6060
24
27
  datasourcelib/utils/byte_reader.py,sha256=GaoPXwJa2YTWG1Kim0K6JG20eVSaWkZJd1o9bswxHmc,9082
25
28
  datasourcelib/utils/exceptions.py,sha256=mgcDaW1k3VndgpMOwSm7NqgyRTvvE2a5ehn3x4fYQww,369
26
29
  datasourcelib/utils/file_reader.py,sha256=Zr0rwNTRWE6KeVJEXgTOPS1_JI74LiUSiX5-6qojmN0,7301
27
30
  datasourcelib/utils/logger.py,sha256=Sl6lNlvubxtK9ztzyq7vjGVyA8_-pZ_ixpk5jfVsh6U,424
28
31
  datasourcelib/utils/validators.py,sha256=fLgmRAb5OZSdMVlHu_n0RKJUDl-G8dI8JsRSfxIquh8,205
29
- datasourcelib-0.1.5.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
30
- datasourcelib-0.1.5.dist-info/METADATA,sha256=jDGgTdya-zt_go_TpEOJNfTQUI7CsbjM4m-Fg51XdqU,1199
31
- datasourcelib-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
- datasourcelib-0.1.5.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
33
- datasourcelib-0.1.5.dist-info/RECORD,,
32
+ datasourcelib-0.1.7.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
33
+ datasourcelib-0.1.7.dist-info/METADATA,sha256=Jo1RgpvptXpS-FxA6g9-7rVkknZDfzUrOpMQVFxG-9Y,1199
34
+ datasourcelib-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ datasourcelib-0.1.7.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
36
+ datasourcelib-0.1.7.dist-info/RECORD,,