datasourcelib 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  import pandas as pd
3
3
  from string import Formatter
4
4
  from typing import Iterable, Any, Dict, List, Optional, Union
5
+ import regex as re
5
6
 
6
7
  def _placeholders(fmt: str) -> List[str]:
7
8
  """
@@ -12,6 +13,12 @@ def _placeholders(fmt: str) -> List[str]:
12
13
  def _safe_str(x) -> str:
13
14
  return "" if pd.isna(x) else str(x).strip()
14
15
 
16
+ @staticmethod
17
+ def sanitize(s: str) -> str:
18
+ """Keep only A-Z a-z 0-9 underscore/dash/equals in a safe way."""
19
+ # using the `regex` import already present as `re`
20
+ return re.sub(r'[^A-Za-z0-9_\-=]', '', s)
21
+
15
22
  def generate_grouped_summaries(
16
23
  df: pd.DataFrame,
17
24
  aggregation_field: str,
@@ -146,7 +153,7 @@ def generate_grouped_summaries(
146
153
 
147
154
  content = header + " " + " ".join(lines)
148
155
  summaries.append(
149
- {"content" : content, "id": group_value}
156
+ {"content" : content, "id": sanitize(group_value.replace(" ", "_").strip())}
150
157
  )
151
158
 
152
159
  return summaries
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datasourcelib
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: Data source sync strategies for vector DBs
5
5
  Home-page: https://github.com/akashmaurya0217/datasourcelib
6
6
  Author: Akash Kumar Maurya
@@ -24,14 +24,14 @@ datasourcelib/strategies/incremental_load.py,sha256=CY1tAyXwjZLoq5zMLwB5i5qmT_L8
24
24
  datasourcelib/strategies/ondemand_load.py,sha256=MgenKJbJePLeErdEkXKsz1h7RuR8yT0RV_X523G7UUs,1304
25
25
  datasourcelib/strategies/timerange_load.py,sha256=W_sSZg059Lw2o9tmdGKM9D5-z1pph7AN1ftalXhuyjo,1557
26
26
  datasourcelib/utils/__init__.py,sha256=9pSIpaK-kdmNuDzwl0Z7QU-_lV3cZE-iwOEPh3RBBTs,298
27
- datasourcelib/utils/aggregation.py,sha256=5aOBcxay4eTyY-S4BRafNgSi37AY-JXERzcCv055E8w,6060
27
+ datasourcelib/utils/aggregation.py,sha256=_XzTxdGIc-nc0w1FE1NfPA6J1PmAKiSpz0sYU7yEU6s,6337
28
28
  datasourcelib/utils/byte_reader.py,sha256=GaoPXwJa2YTWG1Kim0K6JG20eVSaWkZJd1o9bswxHmc,9082
29
29
  datasourcelib/utils/exceptions.py,sha256=mgcDaW1k3VndgpMOwSm7NqgyRTvvE2a5ehn3x4fYQww,369
30
30
  datasourcelib/utils/file_reader.py,sha256=Zr0rwNTRWE6KeVJEXgTOPS1_JI74LiUSiX5-6qojmN0,7301
31
31
  datasourcelib/utils/logger.py,sha256=Sl6lNlvubxtK9ztzyq7vjGVyA8_-pZ_ixpk5jfVsh6U,424
32
32
  datasourcelib/utils/validators.py,sha256=fLgmRAb5OZSdMVlHu_n0RKJUDl-G8dI8JsRSfxIquh8,205
33
- datasourcelib-0.1.13.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
34
- datasourcelib-0.1.13.dist-info/METADATA,sha256=0OVrXyRAPvKZz9gXSrM5DBOr_Vh1_iJPgVVOB7PB3wY,1200
35
- datasourcelib-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
- datasourcelib-0.1.13.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
37
- datasourcelib-0.1.13.dist-info/RECORD,,
33
+ datasourcelib-0.1.14.dist-info/licenses/LICENSE,sha256=9S0AcKETmp9XOcC73jEjN7WSkuSWGFGreiBat6ONClo,1087
34
+ datasourcelib-0.1.14.dist-info/METADATA,sha256=EyIP4yk74vb4-yhOWh--E9aXo8E1gt8RDVkYVoexfNI,1200
35
+ datasourcelib-0.1.14.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
36
+ datasourcelib-0.1.14.dist-info/top_level.txt,sha256=wIwiwdIj8T9pAvE2TkGLUvT2oIi43C2vkkTKibUlv3U,14
37
+ datasourcelib-0.1.14.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5