datasourcelib 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/PKG-INFO +1 -1
  2. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/setup.py +1 -1
  3. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/aggregation.py +8 -1
  4. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib.egg-info/PKG-INFO +1 -1
  5. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/LICENSE +0 -0
  6. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/MANIFEST.in +0 -0
  7. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/README.md +0 -0
  8. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/pyproject.toml +0 -0
  9. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/setup.cfg +0 -0
  10. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/__init__.py +0 -0
  11. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/core/__init__.py +0 -0
  12. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/core/sync_base.py +0 -0
  13. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/core/sync_manager.py +0 -0
  14. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/core/sync_types.py +0 -0
  15. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/__init__.py +0 -0
  16. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/azure_devops_source copy.py +0 -0
  17. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/azure_devops_source.py +0 -0
  18. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/azure_devops_source10dec.py +0 -0
  19. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/blob_source.py +0 -0
  20. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/datasource_base.py +0 -0
  21. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/datasource_types.py +0 -0
  22. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/dataverse_source.py +0 -0
  23. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/sharepoint_source - Copy.py +0 -0
  24. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/sharepoint_source.py +0 -0
  25. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/sql_source.py +0 -0
  26. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/datasources/sql_source_bkup.py +0 -0
  27. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/indexes/__init__.py +0 -0
  28. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/indexes/azure_search_index.py +0 -0
  29. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/strategies/__init__.py +0 -0
  30. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/strategies/daily_load.py +0 -0
  31. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/strategies/full_load.py +0 -0
  32. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/strategies/incremental_load.py +0 -0
  33. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/strategies/ondemand_load.py +0 -0
  34. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/strategies/timerange_load.py +0 -0
  35. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/__init__.py +0 -0
  36. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/byte_reader.py +0 -0
  37. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/exceptions.py +0 -0
  38. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/file_reader.py +0 -0
  39. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/logger.py +0 -0
  40. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib/utils/validators.py +0 -0
  41. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib.egg-info/SOURCES.txt +0 -0
  42. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib.egg-info/dependency_links.txt +0 -0
  43. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib.egg-info/requires.txt +0 -0
  44. {datasourcelib-0.1.13 → datasourcelib-0.1.14}/src/datasourcelib.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="datasourcelib",
5
- version="0.1.13",
5
+ version="0.1.14",
6
6
  packages=find_packages(where="src", exclude=["tests.*", "tests", "examples.*", "examples"]),
7
7
  package_dir={"": "src"},
8
8
  install_requires=[
@@ -2,6 +2,7 @@
2
2
  import pandas as pd
3
3
  from string import Formatter
4
4
  from typing import Iterable, Any, Dict, List, Optional, Union
5
+ import regex as re
5
6
 
6
7
  def _placeholders(fmt: str) -> List[str]:
7
8
  """
@@ -12,6 +13,12 @@ def _placeholders(fmt: str) -> List[str]:
12
13
  def _safe_str(x) -> str:
13
14
  return "" if pd.isna(x) else str(x).strip()
14
15
 
16
+ @staticmethod
17
+ def sanitize(s: str) -> str:
18
+ """Keep only A-Z a-z 0-9 underscore/dash/equals in a safe way."""
19
+ # using the `regex` import already present as `re`
20
+ return re.sub(r'[^A-Za-z0-9_\-=]', '', s)
21
+
15
22
  def generate_grouped_summaries(
16
23
  df: pd.DataFrame,
17
24
  aggregation_field: str,
@@ -146,7 +153,7 @@ def generate_grouped_summaries(
146
153
 
147
154
  content = header + " " + " ".join(lines)
148
155
  summaries.append(
149
- {"content" : content, "id": group_value}
156
+ {"content" : content, "id": sanitize(group_value.replace(" ", "_").strip())}
150
157
  )
151
158
 
152
159
  return summaries
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
File without changes
File without changes
File without changes