datasourcelib 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/PKG-INFO +1 -1
  2. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/setup.py +1 -1
  3. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/azure_devops_source.py +1 -2
  4. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/dataverse_source.py +68 -7
  5. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib.egg-info/PKG-INFO +1 -1
  6. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/LICENSE +0 -0
  7. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/MANIFEST.in +0 -0
  8. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/README.md +0 -0
  9. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/pyproject.toml +0 -0
  10. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/setup.cfg +0 -0
  11. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/__init__.py +0 -0
  12. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/core/__init__.py +0 -0
  13. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/core/sync_base.py +0 -0
  14. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/core/sync_manager.py +0 -0
  15. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/core/sync_types.py +0 -0
  16. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/__init__.py +0 -0
  17. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/azure_devops_source copy.py +0 -0
  18. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/blob_source.py +0 -0
  19. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/datasource_base.py +0 -0
  20. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/datasource_types.py +0 -0
  21. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/sharepoint_source - Copy.py +0 -0
  22. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/sharepoint_source.py +0 -0
  23. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/sql_source.py +0 -0
  24. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/datasources/sql_source_bkup.py +0 -0
  25. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/indexes/__init__.py +0 -0
  26. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/indexes/azure_search_index.py +0 -0
  27. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/strategies/__init__.py +0 -0
  28. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/strategies/daily_load.py +0 -0
  29. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/strategies/full_load.py +0 -0
  30. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/strategies/incremental_load.py +0 -0
  31. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/strategies/ondemand_load.py +0 -0
  32. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/strategies/timerange_load.py +0 -0
  33. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/__init__.py +0 -0
  34. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/aggregation.py +0 -0
  35. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/byte_reader.py +0 -0
  36. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/exceptions.py +0 -0
  37. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/file_reader.py +0 -0
  38. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/logger.py +0 -0
  39. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib/utils/validators.py +0 -0
  40. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib.egg-info/SOURCES.txt +0 -0
  41. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib.egg-info/dependency_links.txt +0 -0
  42. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib.egg-info/requires.txt +0 -0
  43. {datasourcelib-0.1.8 → datasourcelib-0.1.10}/src/datasourcelib.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="datasourcelib",
5
- version="0.1.8",
5
+ version="0.1.10",
6
6
  packages=find_packages(where="src", exclude=["tests.*", "tests", "examples.*", "examples"]),
7
7
  package_dir={"": "src"},
8
8
  install_requires=[
@@ -176,8 +176,7 @@ class AzureDevOpsSource(DataSourceBase):
176
176
  "release_type": rtype,
177
177
  "target_date": target_date,
178
178
  "description": c_desc,
179
- "full": fullfeature,
180
- "fields": norm_fields # full field set for this work item
179
+ "full": fullfeature
181
180
  }
182
181
  work_item_details.append(entry)
183
182
 
@@ -3,6 +3,7 @@ from datasourcelib.datasources.datasource_base import DataSourceBase
3
3
  from datasourcelib.utils.logger import get_logger
4
4
  from datasourcelib.utils.validators import require_keys
5
5
  from datasourcelib.utils.aggregation import generate_grouped_summaries
6
+ from azure.identity import DefaultAzureCredential
6
7
  import pyodbc
7
8
  import time
8
9
  import pandas as pd
@@ -26,12 +27,26 @@ class DataverseSource(DataSourceBase):
26
27
  self._max_retries = int(self.config.get("dv_max_retries", 3))
27
28
 
28
29
  def validate_config(self) -> bool:
30
+
29
31
  """
30
32
  Validate required keys depending on selected dv_mode.
31
33
  - tds: requires either 'tds_connection_string' OR ('dataverse_server' and 'dataverse_database')
32
- - webapi: requires 'webapi_url','client_id','client_secret','tenant_id' (or 'resource')
34
+ - webapi:
35
+ * client credentials: 'dv_webapi_url','dv_webapi_client_id','dv_webapi_client_secret','dv_webapi_tenant_id'
36
+ * managed identity: 'dv_webapi_url' + dv_webapi_managed_identity_auth=True
33
37
  """
34
- try:
38
+
39
+ try:
40
+ if self._mode == "webapi":
41
+ use_mi = bool(self.config.get("dv_webapi_managed_identity_auth", False))
42
+ require_keys(self.config, ["dv_webapi_url"])
43
+ if not use_mi:
44
+ require_keys(
45
+ self.config,
46
+ ["dv_webapi_client_id", "dv_webapi_client_secret", "dv_webapi_tenant_id"]
47
+ )
48
+ return True
49
+
35
50
  if self._mode == "webapi":
36
51
  require_keys(self.config, ["dv_webapi_url", "dv_webapi_client_id", "dv_webapi_client_secret", "dv_webapi_tenant_id"])
37
52
  else:
@@ -123,11 +138,44 @@ class DataverseSource(DataSourceBase):
123
138
  return conn_str
124
139
 
125
140
  def _obtain_webapi_token(self) -> Tuple[str, Dict[str, str]]:
141
+
126
142
  """
127
- Acquire OAuth2 token using client credentials flow.
143
+ Acquire OAuth2 token for Dataverse Web API.
128
144
  Returns (access_token, headers)
129
- Config expected keys: tenant_id, client_id, client_secret, optional resource
145
+
146
+ Paths:
147
+ - Managed Identity (dv_webapi_managed_identity_auth=True): uses DefaultAzureCredential and scope '<webapi_url>/.default'
148
+ - Client Credentials (dv_webapi_managed_identity_auth=False): uses OAuth2 client credentials with tenant/client/secret
130
149
  """
150
+
151
+ webapi_url = self.config["dv_webapi_url"].rstrip("/")
152
+ # Scope for MSAL-style request
153
+ scope = f"{webapi_url}/.default"
154
+
155
+ use_mi = bool(self.config.get("dv_webapi_managed_identity_auth", False))
156
+
157
+ # --- Managed Identity / DefaultAzureCredential path ---
158
+ if use_mi:
159
+ if DefaultAzureCredential is None:
160
+ raise RuntimeError(
161
+ "azure-identity package required for managed identity auth (pip install azure-identity)"
162
+ )
163
+ # DefaultAzureCredential works with system/user-assigned MI in Azure,
164
+ # and falls back to developer credentials locally (Azure CLI/VS Code).
165
+ credential = DefaultAzureCredential()
166
+ # Obtain token for Dataverse scope
167
+ token_obj = credential.get_token(scope)
168
+ token = token_obj.token
169
+ if not token:
170
+ raise RuntimeError("Failed to obtain Managed Identity token for Dataverse Web API")
171
+ headers = {
172
+ "Authorization": f"Bearer {token}",
173
+ "Accept": "application/json",
174
+ "OData-MaxVersion": "4.0",
175
+ "OData-Version": "4.0"
176
+ }
177
+ return token, headers
178
+
131
179
  if requests is None:
132
180
  raise RuntimeError("requests package required for Dataverse Web API mode")
133
181
  tenant = self.config["dv_webapi_tenant_id"]
@@ -166,7 +214,10 @@ class DataverseSource(DataSourceBase):
166
214
  self._access_token = token
167
215
  self._headers = headers
168
216
  self._connected = True
169
- logger.info("DataverseSource connected (webapi mode) to %s", self.config.get("dv_webapi_url"))
217
+
218
+ auth_mode = "managed identity" if bool(self.config.get("dv_webapi_managed_identity_auth", False)) else "client credentials"
219
+ logger.info("DataverseSource connected (webapi, %s) to %s", auth_mode, self.config.get("dv_webapi_url"))
220
+
170
221
  return True
171
222
  # else TDS mode
172
223
  conn_str = self._build_tds_conn_str()
@@ -251,9 +302,19 @@ class DataverseSource(DataSourceBase):
251
302
  # exclude SharePoint metadata columns (start with '__' or prefixed with '@')
252
303
  cols_to_keep = [c for c in df.columns if not str(c).startswith("__") and not str(c).startswith("@")]
253
304
  df = df[cols_to_keep]
305
+ summaries = generate_grouped_summaries(
306
+ df=df,
307
+ aggregation_field=self.config.get("dv_webapi_aggregation_field"),
308
+ row_format=self.config.get("dv_webapi_row_format"),
309
+ constants={"title": ""},
310
+ header_format=self.config.get("dv_webapi_header_format"),
311
+ sort_by=self.config.get("dv_webapi_sort_by"), # or a column/list if you want ordering
312
+ validate=True # ensures all placeholders exist
313
+ )
254
314
 
255
- results = df.to_dict("records")
256
- return results
315
+ return summaries
316
+ #results = df.to_dict("records")
317
+ #return results
257
318
  # else TDS mode
258
319
  sql = query or self.config.get("dv_tds_query") or self.config.get("dv_sql_query")
259
320
  if not sql:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
File without changes
File without changes
File without changes