ingestr 0.13.90__py3-none-any.whl → 0.13.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.90"
1
+ version = "v0.13.92"
@@ -1,5 +1,6 @@
1
1
  """Mongo database source helpers"""
2
2
 
3
+ import re
3
4
  from itertools import islice
4
5
  from typing import (
5
6
  TYPE_CHECKING,
@@ -866,4 +867,127 @@ class MongoDbCollectionResourceConfiguration(BaseConfiguration):
866
867
  projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value
867
868
 
868
869
 
870
+ def convert_mongo_shell_to_extended_json(query_string: str) -> str:
871
+ """
872
+ Convert MongoDB shell syntax to MongoDB Extended JSON v2 format.
873
+
874
+ This function handles common MongoDB shell constructs like ISODate, ObjectId,
875
+ NumberLong, NumberDecimal, etc. and converts them to their Extended JSON equivalents
876
+ that can be parsed by bson.json_util.
877
+
878
+ Args:
879
+ query_string: A string containing MongoDB shell syntax
880
+
881
+ Returns:
882
+ A string with MongoDB Extended JSON v2 format
883
+
884
+ Examples:
885
+ >>> convert_mongo_shell_to_extended_json('ISODate("2010-01-01T00:00:00.000Z")')
886
+ '{"$date": "2010-01-01T00:00:00.000Z"}'
887
+
888
+ >>> convert_mongo_shell_to_extended_json('ObjectId("507f1f77bcf86cd799439011")')
889
+ '{"$oid": "507f1f77bcf86cd799439011"}'
890
+ """
891
+ converted = query_string
892
+
893
+ # Convert ISODate("...") to {"$date": "..."}
894
+ # Pattern matches ISODate("2010-01-01T00:00:00.000+0000") or similar
895
+ converted = re.sub(
896
+ r'ISODate\("([^"]+)"\)',
897
+ r'{"$date": "\1"}',
898
+ converted
899
+ )
900
+
901
+ # Convert ObjectId("...") to {"$oid": "..."}
902
+ converted = re.sub(
903
+ r'ObjectId\("([^"]+)"\)',
904
+ r'{"$oid": "\1"}',
905
+ converted
906
+ )
907
+
908
+ # Convert NumberLong(...) to {"$numberLong": "..."}
909
+ # Note: NumberLong can have quotes or not: NumberLong(123) or NumberLong("123")
910
+ converted = re.sub(
911
+ r'NumberLong\("([^"]+)"\)',
912
+ r'{"$numberLong": "\1"}',
913
+ converted
914
+ )
915
+ converted = re.sub(
916
+ r'NumberLong\(([^)]+)\)',
917
+ r'{"$numberLong": "\1"}',
918
+ converted
919
+ )
920
+
921
+ # Convert NumberInt(...) to {"$numberInt": "..."}
922
+ converted = re.sub(
923
+ r'NumberInt\("([^"]+)"\)',
924
+ r'{"$numberInt": "\1"}',
925
+ converted
926
+ )
927
+ converted = re.sub(
928
+ r'NumberInt\(([^)]+)\)',
929
+ r'{"$numberInt": "\1"}',
930
+ converted
931
+ )
932
+
933
+ # Convert NumberDecimal("...") to {"$numberDecimal": "..."}
934
+ converted = re.sub(
935
+ r'NumberDecimal\("([^"]+)"\)',
936
+ r'{"$numberDecimal": "\1"}',
937
+ converted
938
+ )
939
+
940
+ # Convert Timestamp(..., ...) to {"$timestamp": {"t": ..., "i": ...}}
941
+ # Timestamp(1234567890, 1) -> {"$timestamp": {"t": 1234567890, "i": 1}}
942
+ converted = re.sub(
943
+ r'Timestamp\((\d+),\s*(\d+)\)',
944
+ r'{"$timestamp": {"t": \1, "i": \2}}',
945
+ converted
946
+ )
947
+
948
+ # Convert BinData(..., "...") to {"$binary": {"base64": "...", "subType": "..."}}
949
+ converted = re.sub(
950
+ r'BinData\((\d+),\s*"([^"]+)"\)',
951
+ r'{"$binary": {"base64": "\2", "subType": "\1"}}',
952
+ converted
953
+ )
954
+
955
+ # Convert MinKey() to {"$minKey": 1}
956
+ converted = re.sub(
957
+ r'MinKey\(\)',
958
+ r'{"$minKey": 1}',
959
+ converted
960
+ )
961
+
962
+ # Convert MaxKey() to {"$maxKey": 1}
963
+ converted = re.sub(
964
+ r'MaxKey\(\)',
965
+ r'{"$maxKey": 1}',
966
+ converted
967
+ )
968
+
969
+ # Convert UUID("...") to {"$uuid": "..."}
970
+ converted = re.sub(
971
+ r'UUID\("([^"]+)"\)',
972
+ r'{"$uuid": "\1"}',
973
+ converted
974
+ )
975
+
976
+ # Convert DBRef("collection", "id") to {"$ref": "collection", "$id": "id"}
977
+ converted = re.sub(
978
+ r'DBRef\("([^"]+)",\s*"([^"]+)"\)',
979
+ r'{"$ref": "\1", "$id": "\2"}',
980
+ converted
981
+ )
982
+
983
+ # Convert Code("...") to {"$code": "..."}
984
+ converted = re.sub(
985
+ r'Code\("([^"]+)"\)',
986
+ r'{"$code": "\1"}',
987
+ converted
988
+ )
989
+
990
+ return converted
991
+
992
+
869
993
  __source_name__ = "mongodb"
@@ -14,6 +14,7 @@ def salesforce_source(
14
14
  password: str,
15
15
  token: str,
16
16
  domain: str,
17
+ custom_object: str = None,
17
18
  ) -> Iterable[DltResource]:
18
19
  """
19
20
  Retrieves data from Salesforce using the Salesforce API.
@@ -131,6 +132,10 @@ def salesforce_source(
131
132
  ) -> Iterable[TDataItem]:
132
133
  yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
133
134
 
135
+ @dlt.resource(write_disposition="replace")
136
+ def custom() -> Iterable[TDataItem]:
137
+ yield get_records(client, custom_object)
138
+
134
139
  return (
135
140
  user,
136
141
  user_role,
@@ -147,4 +152,5 @@ def salesforce_source(
147
152
  pricebook_entry,
148
153
  task,
149
154
  event,
155
+ custom,
150
156
  )
@@ -3,6 +3,22 @@ from typing import Iterable
3
3
  import dlt
4
4
  import smartsheet # type: ignore
5
5
  from dlt.extract import DltResource
6
+ from smartsheet.models.enums import ColumnType # type: ignore
7
+ from smartsheet.models.sheet import Sheet # type: ignore
8
+
9
+ TYPE_MAPPING = {
10
+ ColumnType.TEXT_NUMBER: "text",
11
+ ColumnType.DATE: "date",
12
+ ColumnType.DATETIME: "timestamp",
13
+ ColumnType.CONTACT_LIST: "text",
14
+ ColumnType.CHECKBOX: "bool",
15
+ ColumnType.PICKLIST: "text",
16
+ ColumnType.DURATION: "text",
17
+ ColumnType.PREDECESSOR: "text",
18
+ ColumnType.ABSTRACT_DATETIME: "timestamp",
19
+ ColumnType.MULTI_CONTACT_LIST: "text",
20
+ ColumnType.MULTI_PICKLIST: "text",
21
+ }
6
22
 
7
23
 
8
24
  @dlt.source
@@ -34,21 +50,33 @@ def smartsheet_source(
34
50
  )
35
51
  sheet_name = sheet_details.name
36
52
  resource_name = f"sheet_{sheet_name.replace(' ', '_').lower()}"
53
+ sheet = smartsheet_client.Sheets.get_sheet(sheet_id_int)
37
54
 
38
55
  yield dlt.resource(
39
- _get_sheet_data(smartsheet_client, sheet_id_int),
56
+ _get_sheet_data(sheet),
40
57
  name=resource_name,
58
+ columns=_generate_type_hints(sheet),
41
59
  write_disposition="replace",
42
60
  )
43
61
 
44
62
 
45
- def _get_sheet_data(smartsheet_client: smartsheet.Smartsheet, sheet_id: int):
63
+ def _get_sheet_data(sheet: Sheet):
46
64
  """Helper function to get all rows from a sheet."""
47
- sheet = smartsheet_client.Sheets.get_sheet(sheet_id)
48
- # Transform rows to a list of dictionaries
65
+
49
66
  column_titles = [col.title for col in sheet.columns]
50
67
  for row in sheet.rows:
51
68
  row_data = {"_row_id": row.id}
52
69
  for i, cell in enumerate(row.cells):
53
70
  row_data[column_titles[i]] = cell.value
54
71
  yield row_data
72
+
73
+
74
+ def _generate_type_hints(sheet: Sheet):
75
+ return {
76
+ col.title: {
77
+ "data_type": TYPE_MAPPING.get(col.type.value),
78
+ "nullable": True,
79
+ }
80
+ for col in sheet.columns
81
+ if col.type.value in TYPE_MAPPING
82
+ }
ingestr/src/sources.py CHANGED
@@ -427,14 +427,19 @@ class MongoDbSource:
427
427
  if ":" in table:
428
428
  collection_name, query_json = table.split(":", 1)
429
429
 
430
- # Parse and validate the query
430
+ # Parse the query using MongoDB's extended JSON parser
431
+ # First, convert MongoDB shell syntax to Extended JSON format
432
+ from bson import json_util
433
+ from ingestr.src.mongodb.helpers import convert_mongo_shell_to_extended_json
434
+
435
+ # Convert MongoDB shell constructs to Extended JSON v2 format
436
+ converted_query = convert_mongo_shell_to_extended_json(query_json)
437
+
431
438
  try:
432
- import json
433
-
434
- query = json.loads(query_json)
435
- except json.JSONDecodeError as e:
436
- raise ValueError(f"Invalid JSON query format: {e}")
437
-
439
+ query = json_util.loads(converted_query)
440
+ except Exception as e:
441
+ raise ValueError(f"Invalid MongoDB query format: {e}")
442
+
438
443
  # Validate that it's a list for aggregation pipeline
439
444
  if not isinstance(query, list):
440
445
  raise ValueError(
@@ -2525,9 +2530,14 @@ class SalesforceSource:
2525
2530
 
2526
2531
  src = salesforce_source(**creds) # type: ignore
2527
2532
 
2533
+ if table.startswith("custom:"):
2534
+ custom_object = table.split(":")[1]
2535
+ src = salesforce_source(**creds, custom_object=custom_object)
2536
+ return src.with_resources("custom")
2537
+
2528
2538
  if table not in src.resources:
2529
2539
  raise UnsupportedResourceError(table, "Salesforce")
2530
-
2540
+
2531
2541
  return src.with_resources(table)
2532
2542
 
2533
2543
 
@@ -84,7 +84,6 @@ class TestSmartsheetSource(unittest.TestCase):
84
84
  list(source)
85
85
 
86
86
  def test_get_sheet_data(self):
87
- mock_smartsheet_client_instance = MagicMock()
88
87
  mock_sheet = Sheet(
89
88
  {
90
89
  "id": 456,
@@ -121,15 +120,13 @@ class TestSmartsheetSource(unittest.TestCase):
121
120
  ],
122
121
  }
123
122
  )
124
- mock_smartsheet_client_instance.Sheets.get_sheet.return_value = mock_sheet
125
123
 
126
- data_generator = _get_sheet_data(mock_smartsheet_client_instance, 456)
124
+ data_generator = _get_sheet_data(mock_sheet)
127
125
  data = list(data_generator)
128
126
 
129
127
  self.assertEqual(len(data), 2)
130
128
  self.assertEqual(data[0], {"_row_id": 201, "ID": 1, "Value": "Alpha"})
131
129
  self.assertEqual(data[1], {"_row_id": 202, "ID": 2, "Value": "Beta"})
132
- mock_smartsheet_client_instance.Sheets.get_sheet.assert_called_once_with(456)
133
130
 
134
131
 
135
132
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.90
3
+ Version: 0.13.92
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,7 +2,7 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
2
2
  ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=ZctKjmmFfAq8G28jd1zT9vTB-KNWs9X_92vQvWFmF_8,21
5
+ ingestr/src/buildinfo.py,sha256=bR_SHBOnG2JaQD4K3ZvZC_ctkUAC7obqll7Reuy7YHE,21
6
6
  ingestr/src/destinations.py,sha256=QNT2rm91cZmY1_Zyj4VnbI14qGmZOUQOQUg9xUTVVYs,23799
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
8
  ingestr/src/factory.py,sha256=hC5E_XgrgTHMqwqPc6ihUYvRGTGMTzdPfQhrgPyD0tY,6945
@@ -12,7 +12,7 @@ ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
13
13
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
14
14
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
15
- ingestr/src/sources.py,sha256=MM_-6ZmIwFLS_L4kBkwJJc0XDyjDyHUkxMMnQaRfuRA,125176
15
+ ingestr/src/sources.py,sha256=5S-RDTrAD6ZfpzTC4m7RW3WPKY2gBWCrPRC9esEsO5U,125785
16
16
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
17
17
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
18
18
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -96,7 +96,7 @@ ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnb
96
96
  ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
97
97
  ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
98
98
  ingestr/src/mongodb/__init__.py,sha256=wu3KJ3VH5FF67gctJqm4T3ZTdBOQam1u6xuFBohq7bs,7486
99
- ingestr/src/mongodb/helpers.py,sha256=TmEbQ-Rz5ajxmaMgZa7nrI13-L7Z_ClbFCFPnmPIrgE,31739
99
+ ingestr/src/mongodb/helpers.py,sha256=-CZo96tRPy5SvJVMejJ1F72F-PJhyVJ3Rfpk0o2M9rQ,35243
100
100
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
101
101
  ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
102
102
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -116,7 +116,7 @@ ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl
116
116
  ingestr/src/quickbooks/__init__.py,sha256=cZUuVCOTGPHTscRj6i0DytO63_fWF-4ieMxoU4PcyTg,3727
117
117
  ingestr/src/revenuecat/__init__.py,sha256=5HbyZuEOekkbeeT72sM_bnGygSyYdmd_vczfAUz7xoM,4029
118
118
  ingestr/src/revenuecat/helpers.py,sha256=CYU6l79kplnfL87GfdxyGeEBrBSWEZfGP0GyjPHuVDk,9619
119
- ingestr/src/salesforce/__init__.py,sha256=HVHY8pDngB498B6g6KDzwq-q2KPU4PxuEd9Y_8tDDFs,4716
119
+ ingestr/src/salesforce/__init__.py,sha256=Ijveo8gyo_wLzQRBklxIm3RV0y2Gta9-mR44RbJljpI,4901
120
120
  ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
121
121
  ingestr/src/shopify/__init__.py,sha256=RzSSG93g-Qlkz6TAxi1XasFDdxxtVXIo53ZTtjGczW4,62602
122
122
  ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
@@ -125,7 +125,7 @@ ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSs
125
125
  ingestr/src/slack/__init__.py,sha256=pyDukxcilqTAe_bBzfWJ8Vxi83S-XEdEFBH2pEgILrM,10113
126
126
  ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
127
127
  ingestr/src/slack/settings.py,sha256=NhKn4y1zokEa5EmIZ05wtj_-I0GOASXZ5V81M1zXCtY,457
128
- ingestr/src/smartsheets/__init__.py,sha256=3_Kz3AW68UhRg8WvppOeIcQYw04XlyrIHLx2A2pCx-o,1640
128
+ ingestr/src/smartsheets/__init__.py,sha256=RIEfN1T2TMFg8T0RvN4o6sqC58YusJRDrmE9Isos5P4,2375
129
129
  ingestr/src/solidgate/__init__.py,sha256=Ts83j-JSnFsFuF4tDhVOfZKg7H0-bIpfn3kg1ZOR58A,8003
130
130
  ingestr/src/solidgate/helpers.py,sha256=mAsW_1hpD7ab3Y2vw8fxHi4yD3aT1geLdIYZ7ycyxBc,5690
131
131
  ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -157,9 +157,9 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
157
157
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
158
158
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
159
159
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
160
- ingestr/tests/unit/test_smartsheets.py,sha256=i9diA7mXXwQLKGTcalD89OEDx8rqZi8pxtcE23ncgRQ,5122
161
- ingestr-0.13.90.dist-info/METADATA,sha256=i2rKDULepQJ4dBoMOr-PHvDI_DAEcEC4NSy8zknxoRc,15182
162
- ingestr-0.13.90.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
163
- ingestr-0.13.90.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
164
- ingestr-0.13.90.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
165
- ingestr-0.13.90.dist-info/RECORD,,
160
+ ingestr/tests/unit/test_smartsheets.py,sha256=djU1TFD0-Zr7h-WNhm281DoyGApOpNOrZcN4r-U9Pks,4873
161
+ ingestr-0.13.92.dist-info/METADATA,sha256=m59fSWl6js1EilscWOHeNyTe1p7mfKor2Xc2qvElXwA,15182
162
+ ingestr-0.13.92.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
163
+ ingestr-0.13.92.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
164
+ ingestr-0.13.92.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
165
+ ingestr-0.13.92.dist-info/RECORD,,