datasourcelib 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,6 @@ class SyncBase(ABC):
14
14
  raise NotImplementedError
15
15
 
16
16
  @abstractmethod
17
- def sync(self, **kwargs) -> bool:
17
+ def sync(self, **kwargs) -> Dict[str, Any]:
18
18
  """Execute sync operation. Returns True on success, False otherwise."""
19
19
  raise NotImplementedError
@@ -40,8 +40,8 @@ class SyncManager:
40
40
  DataSourceType.Dataverse: DataverseSource
41
41
  }
42
42
 
43
- def execute_sync(self, sync_type: SyncType,
44
- source_type: DataSourceType,
43
+ def execute_sync(self, sync_type: str,
44
+ source_type: str,
45
45
  source_config: Dict[str, Any],
46
46
  vector_db_config: Dict[str, Any],
47
47
  **kwargs) -> Dict[str, Any]:
@@ -49,6 +49,33 @@ class SyncManager:
49
49
  logger.info(f"Execute {sync_type} sync using {source_type} source")
50
50
 
51
51
  try:
52
+ # validate and convert sync_type and source_type to their Enum members
53
+ def _to_enum(enum_cls, val, label):
54
+ if isinstance(val, enum_cls):
55
+ return val
56
+ s = str(val)
57
+ # case-insensitive name match
58
+ for member in enum_cls:
59
+ if member.name.lower() == s.lower():
60
+ return member
61
+ # try by value
62
+ try:
63
+ return enum_cls(val)
64
+ except Exception:
65
+ names = ", ".join([m.name for m in enum_cls])
66
+ values = ", ".join([str(m.value) for m in enum_cls])
67
+ raise ValueError(f"Invalid {label}. Permitted names: {names}. Permitted values: {values}")
68
+
69
+ try:
70
+ sync_type = _to_enum(SyncType, sync_type, "sync_type")
71
+ source_type = _to_enum(DataSourceType, source_type, "source_type")
72
+ except ValueError as ex:
73
+ logger.error(str(ex))
74
+ return {
75
+ "status": SyncStatus.FAILED,
76
+ "message": str(ex),
77
+ "started_at": start
78
+ }
52
79
  # Get data source class
53
80
  source_cls = self._datasource_map.get(source_type)
54
81
  if not source_cls:
@@ -78,15 +105,7 @@ class SyncManager:
78
105
  }
79
106
 
80
107
  # Execute sync
81
- success = strategy.sync(**kwargs)
82
- status = SyncStatus.SUCCESS if success else SyncStatus.FAILED
83
-
84
- return {
85
- "status": status,
86
- "message": f"{sync_type} completed" if success else f"{sync_type} failed",
87
- "started_at": start,
88
- "finished_at": datetime.utcnow()
89
- }
108
+ return strategy.sync(**kwargs)
90
109
 
91
110
  except Exception as ex:
92
111
  logger.exception("SyncManager.execute_sync failed")
@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional, Tuple
2
2
  from datasourcelib.datasources.datasource_base import DataSourceBase
3
3
  from datasourcelib.utils.logger import get_logger
4
4
  from datasourcelib.utils.validators import require_keys
5
+ from datasourcelib.utils.aggregation import generate_grouped_summaries
5
6
  import pyodbc
6
7
  import time
7
8
  import pandas as pd
@@ -250,6 +251,7 @@ class DataverseSource(DataSourceBase):
250
251
  # exclude SharePoint metadata columns (start with '__' or prefixed with '@')
251
252
  cols_to_keep = [c for c in df.columns if not str(c).startswith("__") and not str(c).startswith("@")]
252
253
  df = df[cols_to_keep]
254
+
253
255
  results = df.to_dict("records")
254
256
  return results
255
257
  # else TDS mode
@@ -265,7 +267,19 @@ class DataverseSource(DataSourceBase):
265
267
  results: List[Dict[str, Any]] = []
266
268
  for r in rows:
267
269
  results.append({cols[i]: r[i] for i in range(len(cols))})
268
- return results
270
+
271
+ df = pd.DataFrame(results)
272
+ summaries = generate_grouped_summaries(
273
+ df=df,
274
+ aggregation_field=self.config.get("dv_tds_aggregation_field"),
275
+ row_format=self.config.get("dv_tds_row_format"),
276
+ constants={"title": ""},
277
+ header_format=self.config.get("dv_tds_header_format"),
278
+ sort_by=self.config.get("dv_tds_sort_by"), # or a column/list if you want ordering
279
+ validate=True # ensures all placeholders exist
280
+ )
281
+
282
+ return summaries
269
283
  finally:
270
284
  try:
271
285
  cur.close()
@@ -2,9 +2,10 @@ from typing import Any, Dict, List, Optional
2
2
  from datasourcelib.datasources.datasource_base import DataSourceBase
3
3
  from datasourcelib.utils.logger import get_logger
4
4
  from datasourcelib.utils.validators import require_keys
5
+ from datasourcelib.utils.aggregation import generate_grouped_summaries
5
6
  import os
6
7
  import pyodbc
7
-
8
+ import pandas as pd
8
9
 
9
10
  logger = get_logger(__name__)
10
11
 
@@ -121,7 +122,20 @@ class SQLDataSource(DataSourceBase):
121
122
  results: List[Dict[str, Any]] = []
122
123
  for r in rows:
123
124
  results.append({cols[i]: r[i] for i in range(len(cols))})
124
- return results
125
+
126
+ df = pd.DataFrame(results)
127
+ summaries = generate_grouped_summaries(
128
+ df=df,
129
+ aggregation_field=self.config.get("sql_aggregation_field"),
130
+ row_format=self.config.get("sql_aggregation_row_format"),
131
+ constants={"title": ""},
132
+ header_format=self.config.get("sql_aggregation_header_format"),
133
+ sort_by=self.config.get("sql_aggregation_sort_by"), # or a column/list if you want ordering
134
+ validate=True # ensures all placeholders exist
135
+ )
136
+
137
+ return summaries
138
+
125
139
  finally:
126
140
  try:
127
141
  cur.close()
@@ -1,22 +1,47 @@
1
1
  from datasourcelib.core.sync_base import SyncBase
2
2
  from datasourcelib.utils.logger import get_logger
3
- from datetime import datetime, timedelta
3
+ from datetime import datetime, timezone
4
+ from typing import Dict, Any, Optional
4
5
 
5
6
  logger = get_logger(__name__)
6
7
 
7
8
  class DailyLoadStrategy(SyncBase):
8
- """Daily scheduled load (wraps incremental)."""
9
+ """Daily scheduled load strategy (wraps incremental sync)."""
9
10
 
10
11
  def validate(self) -> bool:
12
+ """Validate strategy preconditions."""
11
13
  return True
12
14
 
13
- def sync(self, run_date: str = None, **kwargs) -> bool:
15
+ def sync(self, run_date: Optional[str] = None, **kwargs) -> Dict[str, Any]:
16
+ """
17
+ Execute daily load for the given run_date (ISO date string).
18
+ If run_date is None, today's UTC date is used.
19
+
20
+ Returns a dict with status, message and ISO timestamps.
21
+ """
22
+ # Ensure run_date and started_at exist even if exceptions occur early
23
+ run_date = run_date
24
+ started_at = datetime.now(timezone.utc).isoformat()
14
25
  try:
15
- run_date = run_date or datetime.utcnow().date().isoformat()
16
- logger.info("Starting daily load for %s", run_date)
17
- # Typically call incremental with last_sync = previous day midnight
18
- # TODO implement scheduling integration externally; the strategy here is idempotent
19
- return True
20
- except Exception:
26
+ run_date = run_date or datetime.now(timezone.utc).date().isoformat()
27
+ logger.info("Starting daily load for %s (requested run_date=%s)", started_at, run_date)
28
+
29
+ # TODO: call incremental sync / processing here, for example:
30
+ # result = self.incremental_sync(last_sync=..., **kwargs)
31
+
32
+ finished_at = datetime.now(timezone.utc).isoformat()
33
+ return {
34
+ "status": "success",
35
+ "message": f"Daily load completed for {run_date}",
36
+ "started_at": started_at,
37
+ "finished_at": finished_at
38
+ }
39
+ except Exception as ex:
21
40
  logger.exception("DailyLoadStrategy.sync failed")
22
- return False
41
+ finished_at = datetime.now(timezone.utc).isoformat()
42
+ return {
43
+ "status": "failure",
44
+ "message": f"Exception: {ex}",
45
+ "started_at": started_at,
46
+ "finished_at": finished_at
47
+ }
@@ -1,38 +1,99 @@
1
+ from typing import Dict, Any
2
+ from datetime import datetime, timezone
3
+
1
4
  from datasourcelib.core.sync_base import SyncBase
2
5
  from datasourcelib.utils.logger import get_logger
3
6
  from datasourcelib.indexes.azure_search_index import AzureSearchIndexer
7
+
4
8
  logger = get_logger(__name__)
5
9
 
10
+
6
11
  class FullLoadStrategy(SyncBase):
7
12
  """Full load: replace or reload entire source into vector DB."""
8
13
 
9
14
  def validate(self) -> bool:
10
- # Minimal validation: required keys exist
11
- dsok = self.data_source.validate_config()
12
- return dsok
15
+ # Minimal validation: required keys exist on datasource
16
+ try:
17
+ return bool(self.data_source and self.data_source.validate_config())
18
+ except Exception:
19
+ logger.exception("FullLoadStrategy.validate failed")
20
+ return False
13
21
 
14
- def sync(self, **kwargs) -> bool:
22
+ def sync(self, **kwargs) -> Dict[str, Any]:
23
+ """
24
+ Execute full load: read data from data_source and index into vector DB (Azure Search).
25
+ Returns a dict with status, message and ISO timestamps.
26
+ """
27
+ started_at = datetime.now(timezone.utc).isoformat()
15
28
  try:
16
- logger.info("Running full data load")
29
+ logger.info("Running full data load (started_at=%s)", started_at)
30
+
31
+ # Fetch data from configured data source
17
32
  data = self.data_source.fetch_data(**kwargs)
18
- for key, value in kwargs.items():
19
- print(f"{key} = {value}")
20
- # Implement real extract -> transform -> load to vector DB
21
- # Example pseudocode:
22
- # vector_client.upsert_batch(self.vector_db_config, rows)
23
- # New: use AzureSearchIndexer to create index and upload documents if requested
24
- if isinstance(data, list) and data:
25
- indexer = AzureSearchIndexer(self.vector_db_config or {})
26
- if not indexer.validate_config():
27
- logger.error("Vector DB config invalid for Azure Search indexer")
28
- return False
29
- ok = indexer.index(data)
30
- if not ok:
31
- logger.error("Indexing data to Azure Search failed")
32
- return False
33
-
34
- logger.info("Full data load finished successfully")
35
- return True
36
- except Exception:
33
+
34
+ # Log kwargs for debugging at debug level
35
+ if kwargs:
36
+ logger.debug("FullLoadStrategy.sync kwargs: %s", kwargs)
37
+
38
+ # If no data returned, finish gracefully
39
+ total_records = len(data) if isinstance(data, (list, tuple)) else (1 if data is not None else 0)
40
+ if total_records == 0:
41
+ finished_at = datetime.now(timezone.utc).isoformat()
42
+ msg = "No records returned from data source"
43
+ logger.info(msg)
44
+ return {
45
+ "status": "success",
46
+ "message": msg,
47
+ "started_at": started_at,
48
+ "finished_at": finished_at,
49
+ "loaded_records": 0
50
+ }
51
+
52
+ # Use AzureSearchIndexer to create index and upload documents if requested
53
+ indexer = AzureSearchIndexer(self.vector_db_config or {})
54
+ if not indexer.validate_config():
55
+ finished_at = datetime.now(timezone.utc).isoformat()
56
+ msg = "Vector DB config invalid for Azure Search indexer"
57
+ logger.error(msg)
58
+ return {
59
+ "status": "failure",
60
+ "message": msg,
61
+ "started_at": started_at,
62
+ "finished_at": finished_at,
63
+ "loaded_records": 0
64
+ }
65
+
66
+ ok = indexer.index(data)
67
+ if not ok:
68
+ finished_at = datetime.now(timezone.utc).isoformat()
69
+ msg = "Indexing data to Azure Search failed"
70
+ logger.error(msg)
71
+ return {
72
+ "status": "failure",
73
+ "message": msg,
74
+ "started_at": started_at,
75
+ "finished_at": finished_at,
76
+ "loaded_records": total_records
77
+ }
78
+
79
+ finished_at = datetime.now(timezone.utc).isoformat()
80
+ msg = f"Full load completed. Loaded {total_records} records."
81
+ logger.info("Full data load finished successfully (%s)", msg)
82
+ return {
83
+ "status": "success",
84
+ "message": msg,
85
+ "started_at": started_at,
86
+ "finished_at": finished_at,
87
+ "loaded_records": total_records
88
+ }
89
+
90
+ except Exception as ex:
37
91
  logger.exception("FullLoadStrategy.sync failed")
38
- return False
92
+ finished_at = datetime.now(timezone.utc).isoformat()
93
+ return {
94
+ "status": "failure",
95
+ "message": f"Exception: {ex}",
96
+ "started_at": started_at,
97
+ "finished_at": finished_at,
98
+ "loaded_records": 0
99
+ }
@@ -1,7 +1,7 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timezone
2
2
  from datasourcelib.core.sync_base import SyncBase
3
3
  from datasourcelib.utils.logger import get_logger
4
-
4
+ from typing import Dict, Any
5
5
  logger = get_logger(__name__)
6
6
 
7
7
  class IncrementalLoadStrategy(SyncBase):
@@ -14,14 +14,27 @@ class IncrementalLoadStrategy(SyncBase):
14
14
  return False
15
15
  return True
16
16
 
17
- def sync(self, last_sync: str = None, **kwargs) -> bool:
17
+ def sync(self, last_sync: str = None, **kwargs) -> Dict[str, Any]:
18
18
  try:
19
+ started_at = datetime.now(timezone.utc).isoformat()
19
20
  last = last_sync or self.source_config.get("last_sync")
20
21
  logger.info("Running incremental load since %s", last)
21
22
  # TODO: fetch delta rows since 'last' and upsert to vector DB
22
23
  # After successful run store new last_sync timestamp
23
24
  logger.info("Incremental load completed")
24
- return True
25
- except Exception:
25
+ finished_at = datetime.now(timezone.utc).isoformat()
26
+ return {
27
+ "status": "success",
28
+ "message": f"Incremental load completed since {last}",
29
+ "started_at": started_at,
30
+ "finished_at": finished_at
31
+ }
32
+ except Exception as ex:
26
33
  logger.exception("IncrementalLoadStrategy.sync failed")
27
- return False
34
+ finished_at = datetime.now(timezone.utc).isoformat()
35
+ return {
36
+ "status": "failure",
37
+ "message": f"Exception: {ex}",
38
+ "started_at": started_at,
39
+ "finished_at": finished_at
40
+ }
@@ -1,6 +1,7 @@
1
1
  from datasourcelib.core.sync_base import SyncBase
2
2
  from datasourcelib.utils.logger import get_logger
3
-
3
+ from typing import Dict, Any
4
+ from datetime import datetime, timezone
4
5
  logger = get_logger(__name__)
5
6
 
6
7
  class OnDemandLoadStrategy(SyncBase):
@@ -9,11 +10,24 @@ class OnDemandLoadStrategy(SyncBase):
9
10
  def validate(self) -> bool:
10
11
  return True
11
12
 
12
- def sync(self, **kwargs) -> bool:
13
+ def sync(self, **kwargs) -> Dict[str, Any]:
13
14
  try:
15
+ started_at = datetime.now(timezone.utc).isoformat()
14
16
  logger.info("On-demand sync invoked with params: %s", kwargs)
15
17
  # Use kwargs to drive partial loads, filters, ids etc.
16
- return True
17
- except Exception:
18
+ finished_at = datetime.now(timezone.utc).isoformat()
19
+ return {
20
+ "status": "success",
21
+ "message": f"Ondemand load completed.",
22
+ "started_at": started_at,
23
+ "finished_at": finished_at
24
+ }
25
+ except Exception as ex:
18
26
  logger.exception("OnDemandLoadStrategy.sync failed")
19
- return False
27
+ finished_at = datetime.now(timezone.utc).isoformat()
28
+ return {
29
+ "status": "failure",
30
+ "message": f"Exception: {ex}",
31
+ "started_at": started_at,
32
+ "finished_at": finished_at
33
+ }
@@ -1,6 +1,7 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timezone
2
2
  from datasourcelib.core.sync_base import SyncBase
3
3
  from datasourcelib.utils.logger import get_logger
4
+ from typing import Dict, Any
4
5
 
5
6
  logger = get_logger(__name__)
6
7
 
@@ -11,14 +12,27 @@ class TimeRangeLoadStrategy(SyncBase):
11
12
  # rely on params at runtime; minimal validation OK
12
13
  return True
13
14
 
14
- def sync(self, start: str = None, end: str = None, **kwargs) -> bool:
15
+ def sync(self, start: str = None, end: str = None, **kwargs) -> Dict[str, Any]:
15
16
  try:
17
+ started_at = datetime.now(timezone.utc).isoformat()
16
18
  if not start or not end:
17
19
  logger.error("TimeRangeLoadStrategy requires 'start' and 'end'")
18
20
  return False
19
21
  logger.info("Time range load between %s and %s", start, end)
20
22
  # TODO: query source for timeframe and upsert
21
- return True
22
- except Exception:
23
+ finished_at = datetime.now(timezone.utc).isoformat()
24
+ return {
25
+ "status": "success",
26
+ "message": f"TimeRange load completed between {start} and {end}",
27
+ "started_at": started_at,
28
+ "finished_at": finished_at
29
+ }
30
+ except Exception as ex:
23
31
  logger.exception("TimeRangeLoadStrategy.sync failed")
24
- return False
32
+ finished_at = datetime.now(timezone.utc).isoformat()
33
+ return {
34
+ "status": "failure",
35
+ "message": f"Exception: {ex}",
36
+ "started_at": started_at,
37
+ "finished_at": finished_at
38
+ }
@@ -0,0 +1,152 @@
1
+
2
+ import pandas as pd
3
+ from string import Formatter
4
+ from typing import Iterable, Any, Dict, List, Optional, Union
5
+
6
+ def _placeholders(fmt: str) -> List[str]:
7
+ """
8
+ Extract top-level placeholder names from a format string.
9
+ e.g., 'Number {i} is {fname}' -> ['i', ' """
10
+ return [field_name for _, field_name, _, _ in Formatter().parse(fmt) if field_name]
11
+
12
+ def _safe_str(x) -> str:
13
+ return "" if pd.isna(x) else str(x).strip()
14
+
15
+ def generate_grouped_summaries(
16
+ df: pd.DataFrame,
17
+ aggregation_field: str,
18
+ row_format: str,
19
+ *,
20
+ header_format: str = "{group_value} has {count} record{plural}.",
21
+ constants: Optional[Dict[str, Union[str, int, float]]] = None,
22
+ drop_empty_groups: bool = True,
23
+ sort_by: Optional[Union[str, Iterable[str]]] = None,
24
+ validate: bool = True
25
+ ) -> List[Dict[str, Any]]:
26
+ """
27
+ Build grouped summaries strictly when `aggregation_field` exists in `df` and is non-empty.
28
+
29
+ Parameters
30
+ ----------
31
+ df : pd.DataFrame
32
+ Source dataset.
33
+ aggregation_field : str
34
+ Column name to group by. Must exist in `df`.
35
+ row_format : str
36
+ Format string applied per row within a group.
37
+ You may use placeholders for any df columns, plus:
38
+ - {i}: 1-based sequence number within group
39
+ - constants you provide (e.g., {title_prefix})
40
+ headertr, optional
41
+ Format string for group headers. Available placeholders:
42
+ - {group_value}: the group key
43
+ - {count}: number of rows in the group
44
+ - {plural}: '' when count==1 else 's'
45
+ Default: "{group_value} has {count} record{plural}."
46
+ constants : dict, optional
47
+ Additional fixed values to be merged into each row's format context.
48
+ Example: {"title_prefix": "Mr"}
49
+ drop_empty_groups : bool, optional
50
+ If True, rows with blank/empty group values are discarded before grouping.
51
+ sort_by : str | Iterable[str] | None, optional
52
+ If provided, sorts rows within each group by these columns before formatting.
53
+ validate : bool, optional
54
+ If True, checks that all placeholders used in `row_format` and `header_format`
55
+ are available (in df columns or computed context). Raises ValueError if missing.
56
+
57
+ Returns
58
+ -------
59
+ List[str]
60
+ One formatted string per group (header + row lines joined with spaces).
61
+
62
+ Raises
63
+ ------
64
+ ValueError
65
+ - If `aggregation_field` is missing or empty
66
+ - If no non-empty values exist for `aggregation_field` (with drop_empty_groups=True)
67
+ - If required placeholders are missing when `validate=True`
68
+ KeyError
69
+ - If columns referenced in `sort_by` are missing
70
+ """
71
+ # Basic checks
72
+ if df.empty:
73
+ return []
74
+
75
+ agg_field = (aggregation_field or "").strip()
76
+ if not agg_field:
77
+ return df.to_dict("records")
78
+ if agg_field not in df.columns:
79
+ raise ValueError(f"aggregation_field '{agg_field}' not found in DataFrame columns: {list(df.columns)}")
80
+
81
+ # Prepare working frame
82
+ working = df.copy()
83
+ working[agg_field] = working[agg_field].astype(str).str.strip()
84
+
85
+ if drop_empty_groups:
86
+ working = working[working[agg_field].astype(bool)]
87
+
88
+ if working.empty:
89
+ raise ValueError(f"No rows with non-empty values found for aggregation_field '{agg_field}'.")
90
+
91
+ # Optional sort within groups
92
+ if sort_by is not None:
93
+ sort_cols = [sort_by] if isinstance(sort_by, str) else list(sort_by)
94
+ missing_sort = [c for c in sort_cols if c not in working.columns]
95
+ if missing_sort:
96
+ raise KeyError(f"sort_by columns not found in DataFrame: {missing_sort}")
97
+ working = working.sort_values(sort_cols, kind="stable")
98
+
99
+ # Validation of placeholders (if requested)
100
+ if validate:
101
+ df_cols = set(working.columns)
102
+ row_keys = set(_placeholders(row_format))
103
+ header_keys = set(_placeholders(header_format))
104
+ # Context keys provided by the function
105
+ provided_keys = {"i", "group_value", "count", "plural"}
106
+ constant_keys = set((constants or {}).keys())
107
+
108
+ missing_row = [k for k in row_keys if k not in df_cols and k not in constant_keys and k not in provided_keys]
109
+ missing_header = [k for k in header_keys if k not in provided_keys and k not in constant_keys and k not in df_cols]
110
+ if missing_row:
111
+ raise ValueError(
112
+ f"row_format references missing keys: {missing_row}. "
113
+ f"Ensure these are either df columns or in `constants`."
114
+ )
115
+ if missing_header:
116
+ raise ValueError(
117
+ f"header_format references missing keys: {missing_header}. "
118
+ f"Use only {{group_value}}, {{count}}, {{plural}} or provide constants."
119
+ )
120
+
121
+ # Build summaries per group
122
+ summaries = []
123
+ for group_value, group_df in working.groupby(agg_field, sort=True):
124
+ group_df = group_df.reset_index(drop=True)
125
+ count = len(group_df)
126
+ plural = "" if count == 1 else "s"
127
+
128
+ header_ctx = {
129
+ "group_value": _safe_str(group_value),
130
+ "count": count,
131
+ "plural": plural,
132
+ **(constants or {}),
133
+ }
134
+ header = header_format.format(**header_ctx)
135
+
136
+ lines = []
137
+ for i, row in enumerate(group_df.to_dict(orient="records"), start=1):
138
+ # Row context = df row + sequence + constants (constants override df if same key)
139
+ row_ctx = {k: _safe_str(v) for k, v in row.items()}
140
+ row_ctx.update({"i": i})
141
+ if constants:
142
+ # Constants override row values with same keys
143
+ row_ctx.update(constants)
144
+
145
+ lines.append(row_format.format(**row_ctx))
146
+
147
+ content = header + " " + " ".join(lines)
148
+ summaries.append(
149
+ {"content" : content, "id": group_value}
150
+ )
151
+
152
+ return summaries
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
@@ -1,7 +1,7 @@
1
1
  datasourcelib/__init__.py,sha256=I7JTSZ1J6ULg_TfdMEgFcd1regkCHuyKdZT4DcPtoyQ,78
2
2
  datasourcelib/core/__init__.py,sha256=nsXojDd97T7eMqqtCsZr1qSYLBitvKydSZRb9Dg7hqU,462
3
- datasourcelib/core/sync_base.py,sha256=AfwwaV3rJOFKVmKKpSj-BwznnCDCaeuT4LLNDfA3NAY,716
4
- datasourcelib/core/sync_manager.py,sha256=pep3lS9GINzhOnwrMSPnOh5rfIsMbu8a0TEkTyq4yRk,3961
3
+ datasourcelib/core/sync_base.py,sha256=fKbsJYtPIV0ow7sGH7O7GmAEeeSefvD16LBOz0dP4TU,726
4
+ datasourcelib/core/sync_manager.py,sha256=pfnvWv4AwmlJJUIsfxNNxYDBOsa7juTIxgFJIEZ5bIM,4842
5
5
  datasourcelib/core/sync_types.py,sha256=KVZB7PkfkFTzghoe--U8jLeAU8XAfba9qMRIVcUjuMc,297
6
6
  datasourcelib/datasources/__init__.py,sha256=lZtgs0vT-2gub5UZo8BUnREZl3K_-_xYqUP8mjf8vhM,436
7
7
  datasourcelib/datasources/azure_devops_source copy.py,sha256=g-IOCq5vGwwteU21jZPWW_GggMu1_myVJkP0_BmSdGY,7282
@@ -9,27 +9,28 @@ datasourcelib/datasources/azure_devops_source.py,sha256=3hyZIrUdgwZEQNjb2iZGDMJc
9
9
  datasourcelib/datasources/blob_source.py,sha256=Qk61_ulqUSPYDaiMzqgvJAu43c4AjTlDRdfFg4VwgDU,3574
10
10
  datasourcelib/datasources/datasource_base.py,sha256=N8fOGvTl8oWWAiydLI0Joz66luq73a5yovO0XA9Q3jk,1068
11
11
  datasourcelib/datasources/datasource_types.py,sha256=jpm4f9n1l7X9aBD58Pbr9evXiCHHEhRCLojGwchUD7A,205
12
- datasourcelib/datasources/dataverse_source.py,sha256=8qScGvTvMOVeDc_ODYtBmx97L9AIlokz3wkzioT_ovw,13296
12
+ datasourcelib/datasources/dataverse_source.py,sha256=PTIWArl_rRMap5QfH8ST5kCewE0Ax1xPZ1vgSxeujpU,14080
13
13
  datasourcelib/datasources/sharepoint_source - Copy.py,sha256=7V1c-zyvTo4IuPN_YMrKwLZFgbtipbP-mtunmXjOLJQ,17664
14
14
  datasourcelib/datasources/sharepoint_source.py,sha256=t3rly2mVEI2qEDuUVqstck5ktkZW0BnF16Bke_NjPLI,23126
15
- datasourcelib/datasources/sql_source.py,sha256=ntZjiFXpa7V797x7mAATJV0LH-g878VHuRw-QTxEe28,6372
15
+ datasourcelib/datasources/sql_source.py,sha256=pXs5UDAxRyRYuvw-zMNieJAZSqDndh6LlJy9GS6GoiY,7159
16
16
  datasourcelib/datasources/sql_source_bkup.py,sha256=ntZjiFXpa7V797x7mAATJV0LH-g878VHuRw-QTxEe28,6372
17
17
  datasourcelib/indexes/__init__.py,sha256=S8dz-lyxy1BTuDuLGRJNLrZD_1ku_FIUnDEm6HhMyT0,94
18
18
  datasourcelib/indexes/azure_search_index.py,sha256=kznAz06UXgyT1Clqj6gRhnBQ5HFw40ZQHJElRFIcbRo,22115
19
19
  datasourcelib/strategies/__init__.py,sha256=kot3u62KIAqYBg9M-KRE4mkMII_zwrDBZNf8Dj1vmX8,399
20
- datasourcelib/strategies/daily_load.py,sha256=Rh-veUhxKYsplwHTyko_Zp9C6NkUJV5VAGtg-p7Iy34,856
21
- datasourcelib/strategies/full_load.py,sha256=U1a9wO_ZLRnMInvU0IRW-ZKnhu0Cv437VcNMKIYuzMA,1691
22
- datasourcelib/strategies/incremental_load.py,sha256=TVqmDLu3m571nqGvzo_69i36QtYe4sBpllFwfPNL0TE,1178
23
- datasourcelib/strategies/ondemand_load.py,sha256=VxzAYgrW2ebTOC3xm61CerL2AFehZUJLnKrqtGRGJoE,644
24
- datasourcelib/strategies/timerange_load.py,sha256=c62BN2yXwVFaA_dQV54qenP4vrb4rcFqbx6m-nqhaTA,900
20
+ datasourcelib/strategies/daily_load.py,sha256=A9BnPqPfbPO8UeBy-jtS53eORK7QWWqLOWHrtyFLbl4,1909
21
+ datasourcelib/strategies/full_load.py,sha256=4BS_g4loR28OVqSDwXBCH2jCKbJLZxx6354KCOi_Qjk,4020
22
+ datasourcelib/strategies/incremental_load.py,sha256=CY1tAyXwjZLoq5zMLwB5i5qmT_L8JBaiBxDy9hx8QkQ,1822
23
+ datasourcelib/strategies/ondemand_load.py,sha256=MgenKJbJePLeErdEkXKsz1h7RuR8yT0RV_X523G7UUs,1304
24
+ datasourcelib/strategies/timerange_load.py,sha256=W_sSZg059Lw2o9tmdGKM9D5-z1pph7AN1ftalXhuyjo,1557
25
25
  datasourcelib/utils/__init__.py,sha256=9pSIpaK-kdmNuDzwl0Z7QU-_lV3cZE-iwOEPh3RBBTs,298
26
+ datasourcelib/utils/aggregation.py,sha256=5aOBcxay4eTyY-S4BRafNgSi37AY-JXERzcCv055E8w,6060
26
27
  datasourcelib/utils/byte_reader.py,sha256=GaoPXwJa2YTWG1Kim0K6JG20eVSaWkZJd1o9bswxHmc,9082
27
28
  datasourcelib/utils/exceptions.py,sha256=mgcDaW1k3VndgpMOwSm7NqgyRTvvE2a5ehn3x4fYQww,369
28
29
  datasourcelib/utils/file_reader.py,sha256=Zr0rwNTRWE6KeVJEXgTOPS1_JI74LiUSiX5-6qojmN0,7301
29
30
  datasourcelib/utils/logger.py,sha256=Sl6lNlvubxtK9ztzyq7vjGVyA8_-pZ_ixpk5jfVsh6U,424
30
31
  datasourcelib/utils/validators.py,sha256=fLgmRAb5OZSdMVlHu_n0RKJUDl-G8dI8JsRSfxIquh8,205
31
- datasourcelib-0.1.6.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
32
- datasourcelib-0.1.6.dist-info/METADATA,sha256=5lpuBdVreQu7PHsMoD9RWsnSx2cZjpKLEjFhclwO5oA,1199
33
- datasourcelib-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- datasourcelib-0.1.6.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
35
- datasourcelib-0.1.6.dist-info/RECORD,,
32
+ datasourcelib-0.1.8.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
33
+ datasourcelib-0.1.8.dist-info/METADATA,sha256=NzIB4zUHZei5jADVk8zT4RvWrvlIAskqr_xd_DfmRGg,1199
34
+ datasourcelib-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ datasourcelib-0.1.8.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
36
+ datasourcelib-0.1.8.dist-info/RECORD,,