datasourcelib 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ from typing import Any, Dict, List, Optional
2
+ from datasourcelib.datasources.datasource_base import DataSourceBase
3
+ from datasourcelib.utils.logger import get_logger
4
+ from datasourcelib.utils.validators import require_keys
5
+ import base64
6
+ import json
7
+ from bs4 import BeautifulSoup
8
+
9
+ logger = get_logger(__name__)
10
+
11
+ try:
12
+ import requests # type: ignore
13
+ except Exception:
14
+ requests = None # lazy import handled at runtime
15
+
16
+ class AzureDevOpsSource(DataSourceBase):
17
+
18
+ def validate_config(self) -> bool:
19
+ try:
20
+ require_keys(self.config, ["ado_organization", "ado_personal_access_token","ado_project","ado_query_id"])
21
+ return True
22
+ except Exception as ex:
23
+ logger.error("AzureDevOpsSource.validate_config: %s", ex)
24
+ return False
25
+
26
+ def connect(self) -> bool:
27
+ if requests is None:
28
+ raise RuntimeError("requests package is required for AzureDevOpsSource")
29
+ # No persistent connection; store auth header
30
+ pat = self.config.get("ado_personal_access_token")
31
+ token = pat
32
+ token_b64 = base64.b64encode(token.encode("utf-8")).decode("utf-8")
33
+ self._headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
34
+ self._connected = True
35
+ logger.info("AzureDevOpsSource ready (no persistent connection required)")
36
+ return True
37
+
38
+ def disconnect(self) -> None:
39
+ self._headers = {}
40
+ self._connected = False
41
+ logger.info("AzureDevOpsSource cleared")
42
+
43
+ def fetch_data(self, query: Optional[str] = None, **kwargs) -> List[Dict[str, Any]]:
44
+ if requests is None:
45
+ raise RuntimeError("requests package is required for AzureDevOpsSource")
46
+ if not getattr(self, "_connected", False):
47
+ self.connect()
48
+
49
+ org = self.config.get("ado_organization")
50
+ project = self.config.get("ado_project")
51
+ query_id = self.config.get("ado_query_id")
52
+ api_version = self.config.get("api_version", "7.1")
53
+ #path = self.config.get("query_path", query or "")
54
+ if not query_id:
55
+ raise ValueError("AzureDevOpsSource.fetch_data requires 'query_id' or query argument")
56
+
57
+ base = f"https://dev.azure.com/{org}/"
58
+ if project:
59
+ base = f"{base}{project}/"
60
+ url = f"{base}_apis/wit/wiql/{query_id}"
61
+ params = {"api-version": api_version}
62
+ method = self.config.get("method", "GET").upper()
63
+ query_response = requests.request(method, url, headers=getattr(self, "_headers", {}), params=params) #, json=self.config.get("payload")
64
+ query_response.raise_for_status()
65
+ #data = resp.json()
66
+ # Check if the request was successful
67
+ if query_response.status_code == 200:
68
+ work_items = query_response.json()['workItems']
69
+ work_item_details = []
70
+
71
+ # Loop through each work item ID to get detailed information
72
+ for item in work_items:
73
+ work_item_id = item['id']
74
+ work_item_url = f'https://dev.azure.com/{org}/{project}/_apis/wit/workitems/{work_item_id}?api-version=7.1'
75
+ work_item_response = requests.get(work_item_url, headers=getattr(self, "_headers", {}))
76
+
77
+ if work_item_response.status_code == 200:
78
+ logger.info(f"Current Item: {work_item_id}")
79
+ text = work_item_response.json()['fields']['System.Description']
80
+ c_desc=BeautifulSoup(text, "html.parser").get_text()
81
+ c_changedate = work_item_response.json()['fields']['System.ChangedDate']
82
+ c_title = work_item_response.json()['fields']['System.Title']
83
+ c_status = work_item_response.json()['fields']['System.State']
84
+ c_type = work_item_response.json()['fields']['System.WorkItemType']
85
+ c_created = work_item_response.json()['fields']['System.CreatedDate']
86
+
87
+ default_value = "-VALUE NOT ASSIGNED-"
88
+ c_assigned = work_item_response.json()['fields'].get('System.AssignedTo',{}).get('displayName',default_value)
89
+ logger.info(c_assigned)
90
+ c_tags = work_item_response.json()['fields'].get('System.Tags',default_value)
91
+ c_project = work_item_response.json()['fields'].get('Custom.ProjectName',default_value)
92
+ c_rtype = work_item_response.json()['fields'].get('Custom.Releasetype',default_value)
93
+ c_rdate = work_item_response.json()['fields'].get('Microsoft.VSTS.Scheduling.TargetDate',default_value)
94
+
95
+ #fullfeature = f"{c_type} ID {work_item_id} was created on {c_created} for a {c_rtype} release of Project '{c_project}' with target date '{c_rdate}' and has given Title as '{c_title}'. {c_type} ID {work_item_id} is currently in {c_status} state. {c_type} ID {work_item_id} is assigned to {c_assigned} and last modified on {c_changedate}.Tags Applied to {c_type} ID {work_item_id} are {c_tags}. Full Description of {c_type} ID {work_item_id} is [{c_desc}]."
96
+ fullfeature = f"{c_type} ID {work_item_id} was created on {c_created}. {c_type} ID {work_item_id} is a {c_rtype} release of Project '{c_project}'. {c_type} ID {work_item_id} Release has target date '{c_rdate}'.{c_type} ID {work_item_id} has given Title as '{c_title}'. {c_type} ID {work_item_id} is currently in {c_status} state. {c_type} ID {work_item_id} is assigned to {c_assigned}. {c_type} ID {work_item_id} is last modified on {c_changedate}. Tags Applied to {c_type} ID {work_item_id} are {c_tags}. Full Description of {c_type} ID {work_item_id} is [{c_desc}]."
97
+ # Ensure work_item_details is a list and append a dict for this work item
98
+
99
+ work_item_details.append({
100
+ "id": work_item_id,
101
+ "type": c_type,
102
+ "title": c_title,
103
+ "status": c_status,
104
+ "assigned_to": c_assigned,
105
+ "created": c_created,
106
+ "changed_date": c_changedate,
107
+ "tags": c_tags,
108
+ "release_type": c_rtype,
109
+ "target_date": c_rdate,
110
+ "project": c_project,
111
+ "description": c_desc,
112
+ "full": fullfeature
113
+ })
114
+ else:
115
+ logger.error(f"Error fetching details for work item ID {work_item_id}: {work_item_response.status_code}")
116
+
117
+ #work_item_desc = []
118
+ #for desc in work_item_details:
119
+ # work_item_desc.append(desc['fields']['System.Description'])
120
+
121
+
122
+ return work_item_details #[{"response": json.dumps(work_item_details)}]
123
+ else:
124
+ raise RuntimeError(f"Error: {query_response.status_code}")
125
+ # Caller decides how to interpret the payload; default: return raw json in a single-item list
126
+
@@ -50,77 +50,135 @@ class AzureDevOpsSource(DataSourceBase):
50
50
  project = self.config.get("ado_project")
51
51
  query_id = self.config.get("ado_query_id")
52
52
  api_version = self.config.get("api_version", "7.1")
53
- #path = self.config.get("query_path", query or "")
54
53
  if not query_id:
55
54
  raise ValueError("AzureDevOpsSource.fetch_data requires 'query_id' or query argument")
56
55
 
57
56
  base = f"https://dev.azure.com/{org}/"
58
57
  if project:
59
58
  base = f"{base}{project}/"
60
- url = f"{base}_apis/wit/wiql/{query_id}"
59
+ # WIQL query by id (returns list of work item refs)
60
+ wiql_url = f"{base}_apis/wit/wiql/{query_id}"
61
61
  params = {"api-version": api_version}
62
62
  method = self.config.get("method", "GET").upper()
63
- query_response = requests.request(method, url, headers=getattr(self, "_headers", {}), params=params) #, json=self.config.get("payload")
63
+ query_response = requests.request(method, wiql_url, headers=getattr(self, "_headers", {}), params=params)
64
64
  query_response.raise_for_status()
65
- #data = resp.json()
66
- # Check if the request was successful
67
- if query_response.status_code == 200:
68
- work_items = query_response.json()['workItems']
69
- work_item_details = []
70
-
71
- # Loop through each work item ID to get detailed information
72
- for item in work_items:
73
- work_item_id = item['id']
74
- work_item_url = f'https://dev.azure.com/{org}/{project}/_apis/wit/workitems/{work_item_id}?api-version=7.1'
75
- work_item_response = requests.get(work_item_url, headers=getattr(self, "_headers", {}))
76
-
77
- if work_item_response.status_code == 200:
78
- logger.info(f"Current Item: {work_item_id}")
79
- text = work_item_response.json()['fields']['System.Description']
80
- c_desc=BeautifulSoup(text, "html.parser").get_text()
81
- c_changedate = work_item_response.json()['fields']['System.ChangedDate']
82
- c_title = work_item_response.json()['fields']['System.Title']
83
- c_status = work_item_response.json()['fields']['System.State']
84
- c_type = work_item_response.json()['fields']['System.WorkItemType']
85
- c_created = work_item_response.json()['fields']['System.CreatedDate']
86
-
87
- default_value = "-VALUE NOT ASSIGNED-"
88
- c_assigned = work_item_response.json()['fields'].get('System.AssignedTo',{}).get('displayName',default_value)
89
- logger.info(c_assigned)
90
- c_tags = work_item_response.json()['fields'].get('System.Tags',default_value)
91
- c_project = work_item_response.json()['fields'].get('Custom.ProjectName',default_value)
92
- c_rtype = work_item_response.json()['fields'].get('Custom.Releasetype',default_value)
93
- c_rdate = work_item_response.json()['fields'].get('Microsoft.VSTS.Scheduling.TargetDate',default_value)
94
-
95
- #fullfeature = f"{c_type} ID {work_item_id} was created on {c_created} for a {c_rtype} release of Project '{c_project}' with target date '{c_rdate}' and has given Title as '{c_title}'. {c_type} ID {work_item_id} is currently in {c_status} state. {c_type} ID {work_item_id} is assigned to {c_assigned} and last modified on {c_changedate}.Tags Applied to {c_type} ID {work_item_id} are {c_tags}. Full Description of {c_type} ID {work_item_id} is [{c_desc}]."
96
- fullfeature = f"{c_type} ID {work_item_id} was created on {c_created}. {c_type} ID {work_item_id} is a {c_rtype} release of Project '{c_project}'. {c_type} ID {work_item_id} Release has target date '{c_rdate}'.{c_type} ID {work_item_id} has given Title as '{c_title}'. {c_type} ID {work_item_id} is currently in {c_status} state. {c_type} ID {work_item_id} is assigned to {c_assigned}. {c_type} ID {work_item_id} is last modified on {c_changedate}. Tags Applied to {c_type} ID {work_item_id} are {c_tags}. Full Description of {c_type} ID {work_item_id} is [{c_desc}]."
97
- # Ensure work_item_details is a list and append a dict for this work item
98
-
99
- work_item_details.append({
100
- "id": work_item_id,
101
- "type": c_type,
102
- "title": c_title,
103
- "status": c_status,
104
- "assigned_to": c_assigned,
105
- "created": c_created,
106
- "changed_date": c_changedate,
107
- "tags": c_tags,
108
- "release_type": c_rtype,
109
- "target_date": c_rdate,
110
- "project": c_project,
111
- "description": c_desc,
112
- "full": fullfeature
113
- })
114
- else:
115
- logger.error(f"Error fetching details for work item ID {work_item_id}: {work_item_response.status_code}")
116
-
117
- #work_item_desc = []
118
- #for desc in work_item_details:
119
- # work_item_desc.append(desc['fields']['System.Description'])
120
-
121
-
122
- return work_item_details #[{"response": json.dumps(work_item_details)}]
123
- else:
65
+
66
+ if query_response.status_code != 200:
124
67
  raise RuntimeError(f"Error: {query_response.status_code}")
125
- # Caller decides how to interpret the payload; default: return raw json in a single-item list
126
-
68
+
69
+ work_items_refs = query_response.json().get('workItems', []) or []
70
+ if not work_items_refs:
71
+ return []
72
+
73
+ # collect ids and fetch details in batch to get all fields for all work item types
74
+ ids = [str(item.get('id')) for item in work_items_refs if item.get('id')]
75
+ if not ids:
76
+ return []
77
+
78
+ details_url = f"https://dev.azure.com/{org}/{project}/_apis/wit/workitems"
79
+ # expand=all to include fields, relations, and attachments
80
+ params = {
81
+ "ids": ",".join(ids),
82
+ "api-version": api_version,
83
+ "$expand": "all"
84
+ }
85
+ details_resp = requests.get(details_url, headers=getattr(self, "_headers", {}), params=params)
86
+ details_resp.raise_for_status()
87
+ items = details_resp.json().get("value", [])
88
+
89
+ work_item_details: List[Dict[str, Any]] = []
90
+ for item in items:
91
+ item_id = item.get("id")
92
+ fields = item.get("fields", {}) or {}
93
+
94
+ # Normalize field keys to safe snake_case-like keys
95
+ norm_fields: Dict[str, Any] = {}
96
+ for k, v in fields.items():
97
+ nk = k.replace(".", "_")
98
+ nk = nk.lower()
99
+ norm_fields[nk] = v
100
+
101
+ # Helper to safely extract nested displayName for assigned to
102
+ assigned = norm_fields.get("system_assignedto")
103
+ if isinstance(assigned, dict):
104
+ assigned_to = assigned.get("displayName") or assigned.get("uniqueName") or str(assigned)
105
+ else:
106
+ assigned_to = assigned
107
+
108
+ # find a description-like field (some types use different field names)
109
+ desc = ""
110
+ for fk in ["system_description", "microsoft_vsts_createdby", "html_description"]:
111
+ if fk in norm_fields:
112
+ desc = norm_fields.get(fk) or ""
113
+ break
114
+ if not desc:
115
+ # fallback: first field key that contains 'description'
116
+ for kf, vf in norm_fields.items():
117
+ if "description" in kf and vf:
118
+ desc = vf
119
+ break
120
+
121
+ # clean HTML description to text
122
+ try:
123
+ c_desc = BeautifulSoup(desc or "", "html.parser").get_text()
124
+ except Exception:
125
+ c_desc = desc or ""
126
+
127
+ # Build common convenience values (use available fields)
128
+ wi_type = norm_fields.get("system_workitemtype") or norm_fields.get("system_witype") or ""
129
+ title = norm_fields.get("system_title") or ""
130
+ status = norm_fields.get("system_state") or ""
131
+ created = norm_fields.get("system_createddate") or norm_fields.get("system_created") or ""
132
+ changed = norm_fields.get("system_changeddate") or norm_fields.get("system_changed") or ""
133
+ tags = norm_fields.get("system_tags", "")
134
+ project_name = norm_fields.get("custom.projectname") or norm_fields.get("system_teamproject") or ""
135
+
136
+ rtype = norm_fields.get("custom.releasetype") or norm_fields.get("custom_releasetype") or ""
137
+ target_date = norm_fields.get("microsoft_vsts_scheduling_targetdate") or norm_fields.get("microsoft.vsts.scheduling.targetdate") or ""
138
+
139
+ # Construct a 'full' description string using available pieces
140
+ parts = []
141
+ if wi_type:
142
+ parts.append(f"{wi_type} ID {item_id}")
143
+ else:
144
+ parts.append(f"WorkItem {item_id}")
145
+ if created:
146
+ parts.append(f"was created on {created}")
147
+ if title:
148
+ parts.append(f"and has Title '{title}'")
149
+ if status:
150
+ parts.append(f"is currently in {status} state")
151
+ if assigned_to:
152
+ parts.append(f"is assigned to {assigned_to}")
153
+ if project_name:
154
+ parts.append(f"for Project '{project_name}'")
155
+ if rtype:
156
+ parts.append(f"release type '{rtype}'")
157
+ if target_date:
158
+ parts.append(f"with target date '{target_date}'")
159
+ if tags:
160
+ parts.append(f"Tags: {tags}")
161
+ if c_desc:
162
+ parts.append(f"Description: [{c_desc}]")
163
+ fullfeature = ". ".join(parts)
164
+
165
+ # include all normalized fields in the returned object for completeness
166
+ entry = {
167
+ "id": item_id,
168
+ "type": wi_type,
169
+ "title": title,
170
+ "status": status,
171
+ "assigned_to": assigned_to,
172
+ "created": created,
173
+ "changed_date": changed,
174
+ "tags": tags,
175
+ "project": project_name,
176
+ "release_type": rtype,
177
+ "target_date": target_date,
178
+ "description": c_desc,
179
+ "full": fullfeature,
180
+ "fields": norm_fields # full field set for this work item
181
+ }
182
+ work_item_details.append(entry)
183
+
184
+ return work_item_details
@@ -7,6 +7,7 @@ import requests
7
7
  import pandas as pd
8
8
  import os
9
9
  from uuid import uuid4
10
+ from datetime import datetime, timedelta
10
11
 
11
12
  logger = get_logger(__name__)
12
13
  reader = ByteReader()
@@ -114,50 +115,86 @@ class SharePointSource(DataSourceBase):
114
115
  self._drive_id = drives[0].get("id")
115
116
  logger.info("Resolved SharePoint drive ID: %s", self._drive_id)
116
117
 
118
+ def _get_client_credentials(self) -> Tuple[str, str]:
119
+ """Retrieve client credentials in order of priority: sp_download_config, sp_client_config, sp_master_config."""
120
+ # Fallback to sp_client_config
121
+ sp_client_config = self.config.get("sp_client_config", {})
122
+ client_id = sp_client_config.get("sp_client_id")
123
+ client_secret = sp_client_config.get("sp_client_secret")
124
+
125
+ if not client_id or not client_secret:
126
+ # Fallback to sp_master_config
127
+ sp_master_config = self.config.get("sp_master_config", {})
128
+ client_id = client_id or sp_master_config.get("sp_client_id")
129
+ client_secret = client_secret or sp_master_config.get("sp_client_secret")
130
+
131
+ if not client_id or not client_secret:
132
+ raise ValueError("Client ID and Client Secret must be provided in the configuration.")
133
+
134
+ return client_id, client_secret
135
+
136
+ def _get_download_credentials(self) -> Tuple[str, str]:
137
+ """Retrieve client credentials in order of priority: sp_download_config, sp_client_config, sp_master_config."""
138
+ # Check sp_download_config first
139
+ sp_download_config = self.config.get("sp_client_config", {}).get("sp_download_config", {})
140
+ client_id = sp_download_config.get("sp_client_id")
141
+ client_secret = sp_download_config.get("sp_client_secret")
142
+
143
+ if not client_id or not client_secret:
144
+ # Fallback to sp_client_config
145
+ sp_client_config = self.config.get("sp_client_config", {})
146
+ client_id = client_id or sp_client_config.get("sp_client_id")
147
+ client_secret = client_secret or sp_client_config.get("sp_client_secret")
148
+
149
+ if not client_id or not client_secret:
150
+ # Fallback to sp_master_config
151
+ sp_master_config = self.config.get("sp_master_config", {})
152
+ client_id = client_id or sp_master_config.get("sp_client_id")
153
+ client_secret = client_secret or sp_master_config.get("sp_client_secret")
154
+
155
+ if not client_id or not client_secret:
156
+ raise ValueError("Client ID and Client Secret must be provided in the configuration.")
157
+
158
+ return client_id, client_secret
159
+
160
+
117
161
  def connect(self) -> bool:
118
162
  try:
119
163
  # basic values
120
164
  self._site_url = self.config["sp_site_url"]
121
- client_config = self.config["sp_client_config"]
122
165
  master_config = self.config["sp_master_config"]
123
166
 
124
167
  # get master token (Sites.Read.All)
125
168
  try:
126
- self._master_token = self._get_token(
127
- master_config["sp_client_id"], master_config["sp_client_secret"], master_config["sp_tenant_id"]
128
- )
169
+ master_client_id = master_config["sp_client_id"]
170
+ master_client_secret = master_config["sp_client_secret"]
171
+ self._master_token = self._get_token(master_client_id, master_client_secret, master_config["sp_tenant_id"])
129
172
  logger.info("$$$ - Obtained master access token for SharePoint - $$$")
130
173
  except Exception as ex:
131
174
  logger.info("$$$ - Failed to obtain master token - $$$")
132
175
 
133
176
  # resolve site and drive ids
134
177
  try:
135
- self._resolve_site_and_drive(
136
- self.config['sp_site_display_name']
137
- )
178
+ self._resolve_site_and_drive(self.config['sp_site_display_name'])
138
179
  except Exception:
139
180
  logger.info("$$$ - Failed to resolve site/drive - $$$")
140
-
181
+
141
182
  # get client token (Site.Selected) for download operations
142
183
  try:
143
- # use master tenant id for tenant
144
- self._access_token = self._get_token(
145
- client_config["sp_client_id"], client_config["sp_client_secret"], master_config["sp_tenant_id"]
146
- )
184
+ client_id, client_secret = self._get_client_credentials()
185
+ self._access_token = self._get_token(client_id, client_secret, master_config["sp_tenant_id"])
147
186
  logger.info("$$$ - Obtained client access token for SharePoint downloads - $$$")
148
187
  except Exception:
149
188
  logger.info("$$$ - Failed to obtain client access token - $$$")
150
-
189
+
151
190
  # get list client token (Site.Selected) for list operations
152
191
  try:
153
- # use master tenant id for tenant
154
- self._list_token = self._get_list_token(
155
- client_config["sp_client_id"], client_config["sp_client_secret"], master_config["sp_tenant_id"],master_config["sp_domain_name"]
156
- )
192
+ client_id, client_secret = self._get_client_credentials()
193
+ self._list_token = self._get_list_token(client_id, client_secret, master_config["sp_tenant_id"], master_config["sp_domain_name"])
157
194
  logger.info("$$$ - Obtained client list token for SharePoint list operations - $$$")
158
195
  except Exception:
159
196
  logger.info("$$$ - Failed to obtain client list token - $$$")
160
-
197
+
161
198
  self._connected = True
162
199
  logger.info("SharePointSource connected for site: %s", self._site_url)
163
200
  return True
@@ -324,10 +361,9 @@ class SharePointSource(DataSourceBase):
324
361
  results = []
325
362
  items = self._fetch_list_items_via_rest(relative_path)
326
363
 
327
- if str(self.config.get("sp_client_config",{}).get("sp_download_config",{})["sp_client_id"]):
328
- self._access_token = self._get_token(
329
- self.config.get("sp_client_config",{}).get("sp_download_config",{})["sp_client_id"], self.config.get("sp_client_config",{}).get("sp_download_config",{})["sp_client_secret"], self.config.get("sp_master_config",{})["sp_tenant_id"]
330
- )
364
+ client_id, client_secret = self._get_download_credentials()
365
+
366
+ self._access_token = self._get_token(client_id, client_secret, self.config.get("sp_master_config",{})["sp_tenant_id"])
331
367
  #test running with hardcoded items
332
368
  if False:
333
369
  items = []
@@ -350,12 +386,29 @@ class SharePointSource(DataSourceBase):
350
386
  })
351
387
 
352
388
  for item in items:
353
- item_name = item.get("Title")
354
- item_display_name = item.get("SiteDisplayName")
389
+ #the path after [Shared Documents/] in relative path
390
+ item_relative_path = item.get("RelativePath") or item.get("relativepath") or item.get("relativePath")
391
+ item_name = item.get("Title") or item.get("title")
392
+ item_display_name = item.get("SiteDisplayName") or item.get("sitedisplayname") or item.get("siteDisplayName")
393
+
394
+ # Check ModifiedDate filter
395
+ # "2024-01-15" → 10 chars || "20240115" → 8 chars
396
+ modified_date_str = item.get("ModifiedDate") or item.get("modifieddate") or item.get("modifiedDate")
397
+ if modified_date_str:
398
+ try:
399
+ modified_date = datetime.fromisoformat(modified_date_str.replace('Z', '+00:00'))
400
+ if datetime.now(modified_date.tzinfo) - modified_date < timedelta(days=1):
401
+ continue
402
+ except Exception:
403
+ pass
404
+
405
+ if not item_relative_path:
406
+ logger.warning("Item missing RelativePath: %s", item)
407
+ continue
408
+
355
409
  #get site id and drive id for this item
356
410
  self._resolve_site_and_drive(item_display_name)
357
- #the path after [Shared Documents/] in relative path
358
- item_relative_path = item.get("RelativePath")
411
+
359
412
  try:
360
413
  content, filename = self._download_file_bytes(item_relative_path)
361
414
  saved = self._save_file_if_requested(content, filename, save_path)
@@ -16,8 +16,16 @@ class SQLDataSource(DataSourceBase):
16
16
  self._is_sqlite = False
17
17
 
18
18
  def validate_config(self) -> bool:
19
+ """
20
+ Validate config. If sql_windows_auth is True then sql_username/sql_password are optional.
21
+ Otherwise require sql_username and sql_password.
22
+ """
19
23
  try:
20
- require_keys(self.config, ["sql_server","sql_database","sql_username","sql_password","sql_is_onprem"])
24
+ # Always require server/database at minimum
25
+ require_keys(self.config, ["sql_server", "sql_database"])
26
+ # If not using Windows authentication, require credentials
27
+ if not bool(self.config.get("sql_windows_auth", False)):
28
+ require_keys(self.config, ["sql_username", "sql_password"])
21
29
  return True
22
30
  except Exception as ex:
23
31
  logger.error("SQLDataSource.validate_config: %s", ex)
@@ -27,22 +35,31 @@ class SQLDataSource(DataSourceBase):
27
35
  try:
28
36
  sql_server = self.config.get("sql_server", "")
29
37
  sql_database = self.config.get("sql_database", "")
30
- sql_username = self.config.get("sql_username", "")
31
- sql_password = self.config.get("sql_password", "")
32
38
  sql_is_onprem = self.config.get("sql_is_onprem", False)
33
-
39
+
40
+ # Determine auth mode: sql_windows_auth (Trusted Connection) overrides username/password
41
+ sql_windows_auth = bool(self.config.get("sql_windows_auth", False))
42
+
34
43
  # Get available driver
35
44
  sql_driver = self._get_available_driver()
36
-
37
- # Build connection string with appropriate encryption settings
45
+
46
+ # Build connection string
38
47
  conn_params = [
39
48
  f'DRIVER={sql_driver}',
40
49
  f'SERVER={sql_server}',
41
50
  f'DATABASE={sql_database}',
42
- f'UID={sql_username}',
43
- f'PWD={sql_password}'
44
51
  ]
45
-
52
+
53
+ if sql_windows_auth:
54
+ # Use integrated Windows authentication (Trusted Connection)
55
+ # This will use the current process credentials / kerberos ticket.
56
+ conn_params.append('Trusted_Connection=yes')
57
+ logger.info("SQLDataSource using Windows (integrated) authentication")
58
+ else:
59
+ sql_username = self.config.get("sql_username", "")
60
+ sql_password = self.config.get("sql_password", "")
61
+ conn_params.extend([f'UID={sql_username}', f'PWD={sql_password}'])
62
+
46
63
  # Add encryption settings based on environment
47
64
  if not sql_is_onprem:
48
65
  conn_params.extend([
@@ -56,13 +73,13 @@ class SQLDataSource(DataSourceBase):
56
73
  ])
57
74
 
58
75
  conn_str = ';'.join(conn_params)
59
-
76
+
60
77
  # Attempt connection with timeout
61
78
  self._conn = pyodbc.connect(conn_str, timeout=30)
62
79
  self._connected = True
63
- logger.info("SQLDataSource connected to %s using driver %s", sql_server, sql_driver)
80
+ logger.info("SQLDataSource connected to %s using driver %s (sql_windows_auth=%s)", sql_server, sql_driver, sql_windows_auth)
64
81
  return True
65
-
82
+
66
83
  except pyodbc.Error as ex:
67
84
  logger.error("SQLDataSource.connect failed - ODBC Error: %s", ex)
68
85
  self._connected = False
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Data source sync strategies for vector DBs
5
- Home-page: https://github.com/jaiprakash0217/datasourcelib
6
- Author: Jai Prakash
7
- Author-email: jai.prakash@jai12ka4.com
5
+ Home-page: https://github.com/akashmaurya0217/datasourcelib
6
+ Author: Akash Kumar Maurya
7
+ Author-email: mrelectronicsarduino@gmail.com
8
8
  Classifier: Development Status :: 3 - Alpha
9
9
  Classifier: Intended Audience :: Developers
10
10
  Classifier: License :: OSI Approved :: MIT License
@@ -4,13 +4,14 @@ datasourcelib/core/sync_base.py,sha256=AfwwaV3rJOFKVmKKpSj-BwznnCDCaeuT4LLNDfA3N
4
4
  datasourcelib/core/sync_manager.py,sha256=lj070S3PwSNcB0UL_ZDzDAm6uJ9G38TY491vQZ1dL3o,3849
5
5
  datasourcelib/core/sync_types.py,sha256=KVZB7PkfkFTzghoe--U8jLeAU8XAfba9qMRIVcUjuMc,297
6
6
  datasourcelib/datasources/__init__.py,sha256=lZtgs0vT-2gub5UZo8BUnREZl3K_-_xYqUP8mjf8vhM,436
7
- datasourcelib/datasources/azure_devops_source.py,sha256=g-IOCq5vGwwteU21jZPWW_GggMu1_myVJkP0_BmSdGY,7282
7
+ datasourcelib/datasources/azure_devops_source copy.py,sha256=g-IOCq5vGwwteU21jZPWW_GggMu1_myVJkP0_BmSdGY,7282
8
+ datasourcelib/datasources/azure_devops_source.py,sha256=3hyZIrUdgwZEQNjb2iZGDMJcAw3Z6r7oV0hWAq_zMsg,8005
8
9
  datasourcelib/datasources/blob_source.py,sha256=Qk61_ulqUSPYDaiMzqgvJAu43c4AjTlDRdfFg4VwgDU,3574
9
10
  datasourcelib/datasources/datasource_base.py,sha256=N8fOGvTl8oWWAiydLI0Joz66luq73a5yovO0XA9Q3jk,1068
10
11
  datasourcelib/datasources/datasource_types.py,sha256=eEiWymYS05X_TxwuB7P3MpphPG1En67h3kRiSGeHjQ0,176
11
12
  datasourcelib/datasources/sharepoint_source - Copy.py,sha256=7V1c-zyvTo4IuPN_YMrKwLZFgbtipbP-mtunmXjOLJQ,17664
12
- datasourcelib/datasources/sharepoint_source.py,sha256=Pv9735Gu2FylVeeT9e_cZlCvgGUwxn-pVRRZQe2PHU8,20196
13
- datasourcelib/datasources/sql_source.py,sha256=sCYHrmeD82fQVcdQjL9Y2TTTjaqlv2v8B5noAng3Bl4,5450
13
+ datasourcelib/datasources/sharepoint_source.py,sha256=t3rly2mVEI2qEDuUVqstck5ktkZW0BnF16Bke_NjPLI,23126
14
+ datasourcelib/datasources/sql_source.py,sha256=ntZjiFXpa7V797x7mAATJV0LH-g878VHuRw-QTxEe28,6372
14
15
  datasourcelib/indexes/__init__.py,sha256=S8dz-lyxy1BTuDuLGRJNLrZD_1ku_FIUnDEm6HhMyT0,94
15
16
  datasourcelib/indexes/azure_search_index.py,sha256=kznAz06UXgyT1Clqj6gRhnBQ5HFw40ZQHJElRFIcbRo,22115
16
17
  datasourcelib/strategies/__init__.py,sha256=kot3u62KIAqYBg9M-KRE4mkMII_zwrDBZNf8Dj1vmX8,399
@@ -25,8 +26,8 @@ datasourcelib/utils/exceptions.py,sha256=mgcDaW1k3VndgpMOwSm7NqgyRTvvE2a5ehn3x4f
25
26
  datasourcelib/utils/file_reader.py,sha256=Zr0rwNTRWE6KeVJEXgTOPS1_JI74LiUSiX5-6qojmN0,7301
26
27
  datasourcelib/utils/logger.py,sha256=Sl6lNlvubxtK9ztzyq7vjGVyA8_-pZ_ixpk5jfVsh6U,424
27
28
  datasourcelib/utils/validators.py,sha256=fLgmRAb5OZSdMVlHu_n0RKJUDl-G8dI8JsRSfxIquh8,205
28
- datasourcelib-0.1.4.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
29
- datasourcelib-0.1.4.dist-info/METADATA,sha256=LR3db7O_rnbTmF_owLl-lH06xAfP-iZu4aXPtmjVtRo,1185
30
- datasourcelib-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- datasourcelib-0.1.4.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
32
- datasourcelib-0.1.4.dist-info/RECORD,,
29
+ datasourcelib-0.1.5.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
30
+ datasourcelib-0.1.5.dist-info/METADATA,sha256=jDGgTdya-zt_go_TpEOJNfTQUI7CsbjM4m-Fg51XdqU,1199
31
+ datasourcelib-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
+ datasourcelib-0.1.5.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
33
+ datasourcelib-0.1.5.dist-info/RECORD,,