ingestr 0.13.91__py3-none-any.whl → 0.13.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/mongodb/helpers.py +124 -0
- ingestr/src/smartsheets/__init__.py +32 -4
- ingestr/src/sources.py +12 -7
- ingestr/tests/unit/test_smartsheets.py +1 -4
- {ingestr-0.13.91.dist-info → ingestr-0.13.93.dist-info}/METADATA +2 -2
- {ingestr-0.13.91.dist-info → ingestr-0.13.93.dist-info}/RECORD +10 -10
- {ingestr-0.13.91.dist-info → ingestr-0.13.93.dist-info}/WHEEL +0 -0
- {ingestr-0.13.91.dist-info → ingestr-0.13.93.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.91.dist-info → ingestr-0.13.93.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.93"
|
ingestr/src/mongodb/helpers.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Mongo database source helpers"""
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from itertools import islice
|
|
4
5
|
from typing import (
|
|
5
6
|
TYPE_CHECKING,
|
|
@@ -866,4 +867,127 @@ class MongoDbCollectionResourceConfiguration(BaseConfiguration):
|
|
|
866
867
|
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value
|
|
867
868
|
|
|
868
869
|
|
|
870
|
+
def convert_mongo_shell_to_extended_json(query_string: str) -> str:
|
|
871
|
+
"""
|
|
872
|
+
Convert MongoDB shell syntax to MongoDB Extended JSON v2 format.
|
|
873
|
+
|
|
874
|
+
This function handles common MongoDB shell constructs like ISODate, ObjectId,
|
|
875
|
+
NumberLong, NumberDecimal, etc. and converts them to their Extended JSON equivalents
|
|
876
|
+
that can be parsed by bson.json_util.
|
|
877
|
+
|
|
878
|
+
Args:
|
|
879
|
+
query_string: A string containing MongoDB shell syntax
|
|
880
|
+
|
|
881
|
+
Returns:
|
|
882
|
+
A string with MongoDB Extended JSON v2 format
|
|
883
|
+
|
|
884
|
+
Examples:
|
|
885
|
+
>>> convert_mongo_shell_to_extended_json('ISODate("2010-01-01T00:00:00.000Z")')
|
|
886
|
+
'{"$date": "2010-01-01T00:00:00.000Z"}'
|
|
887
|
+
|
|
888
|
+
>>> convert_mongo_shell_to_extended_json('ObjectId("507f1f77bcf86cd799439011")')
|
|
889
|
+
'{"$oid": "507f1f77bcf86cd799439011"}'
|
|
890
|
+
"""
|
|
891
|
+
converted = query_string
|
|
892
|
+
|
|
893
|
+
# Convert ISODate("...") to {"$date": "..."}
|
|
894
|
+
# Pattern matches ISODate("2010-01-01T00:00:00.000+0000") or similar
|
|
895
|
+
converted = re.sub(
|
|
896
|
+
r'ISODate\("([^"]+)"\)',
|
|
897
|
+
r'{"$date": "\1"}',
|
|
898
|
+
converted
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
# Convert ObjectId("...") to {"$oid": "..."}
|
|
902
|
+
converted = re.sub(
|
|
903
|
+
r'ObjectId\("([^"]+)"\)',
|
|
904
|
+
r'{"$oid": "\1"}',
|
|
905
|
+
converted
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
# Convert NumberLong(...) to {"$numberLong": "..."}
|
|
909
|
+
# Note: NumberLong can have quotes or not: NumberLong(123) or NumberLong("123")
|
|
910
|
+
converted = re.sub(
|
|
911
|
+
r'NumberLong\("([^"]+)"\)',
|
|
912
|
+
r'{"$numberLong": "\1"}',
|
|
913
|
+
converted
|
|
914
|
+
)
|
|
915
|
+
converted = re.sub(
|
|
916
|
+
r'NumberLong\(([^)]+)\)',
|
|
917
|
+
r'{"$numberLong": "\1"}',
|
|
918
|
+
converted
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
# Convert NumberInt(...) to {"$numberInt": "..."}
|
|
922
|
+
converted = re.sub(
|
|
923
|
+
r'NumberInt\("([^"]+)"\)',
|
|
924
|
+
r'{"$numberInt": "\1"}',
|
|
925
|
+
converted
|
|
926
|
+
)
|
|
927
|
+
converted = re.sub(
|
|
928
|
+
r'NumberInt\(([^)]+)\)',
|
|
929
|
+
r'{"$numberInt": "\1"}',
|
|
930
|
+
converted
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
# Convert NumberDecimal("...") to {"$numberDecimal": "..."}
|
|
934
|
+
converted = re.sub(
|
|
935
|
+
r'NumberDecimal\("([^"]+)"\)',
|
|
936
|
+
r'{"$numberDecimal": "\1"}',
|
|
937
|
+
converted
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
# Convert Timestamp(..., ...) to {"$timestamp": {"t": ..., "i": ...}}
|
|
941
|
+
# Timestamp(1234567890, 1) -> {"$timestamp": {"t": 1234567890, "i": 1}}
|
|
942
|
+
converted = re.sub(
|
|
943
|
+
r'Timestamp\((\d+),\s*(\d+)\)',
|
|
944
|
+
r'{"$timestamp": {"t": \1, "i": \2}}',
|
|
945
|
+
converted
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
# Convert BinData(..., "...") to {"$binary": {"base64": "...", "subType": "..."}}
|
|
949
|
+
converted = re.sub(
|
|
950
|
+
r'BinData\((\d+),\s*"([^"]+)"\)',
|
|
951
|
+
r'{"$binary": {"base64": "\2", "subType": "\1"}}',
|
|
952
|
+
converted
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
# Convert MinKey() to {"$minKey": 1}
|
|
956
|
+
converted = re.sub(
|
|
957
|
+
r'MinKey\(\)',
|
|
958
|
+
r'{"$minKey": 1}',
|
|
959
|
+
converted
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
# Convert MaxKey() to {"$maxKey": 1}
|
|
963
|
+
converted = re.sub(
|
|
964
|
+
r'MaxKey\(\)',
|
|
965
|
+
r'{"$maxKey": 1}',
|
|
966
|
+
converted
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
# Convert UUID("...") to {"$uuid": "..."}
|
|
970
|
+
converted = re.sub(
|
|
971
|
+
r'UUID\("([^"]+)"\)',
|
|
972
|
+
r'{"$uuid": "\1"}',
|
|
973
|
+
converted
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
# Convert DBRef("collection", "id") to {"$ref": "collection", "$id": "id"}
|
|
977
|
+
converted = re.sub(
|
|
978
|
+
r'DBRef\("([^"]+)",\s*"([^"]+)"\)',
|
|
979
|
+
r'{"$ref": "\1", "$id": "\2"}',
|
|
980
|
+
converted
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
# Convert Code("...") to {"$code": "..."}
|
|
984
|
+
converted = re.sub(
|
|
985
|
+
r'Code\("([^"]+)"\)',
|
|
986
|
+
r'{"$code": "\1"}',
|
|
987
|
+
converted
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
return converted
|
|
991
|
+
|
|
992
|
+
|
|
869
993
|
__source_name__ = "mongodb"
|
|
@@ -3,6 +3,22 @@ from typing import Iterable
|
|
|
3
3
|
import dlt
|
|
4
4
|
import smartsheet # type: ignore
|
|
5
5
|
from dlt.extract import DltResource
|
|
6
|
+
from smartsheet.models.enums import ColumnType # type: ignore
|
|
7
|
+
from smartsheet.models.sheet import Sheet # type: ignore
|
|
8
|
+
|
|
9
|
+
TYPE_MAPPING = {
|
|
10
|
+
ColumnType.TEXT_NUMBER: "text",
|
|
11
|
+
ColumnType.DATE: "date",
|
|
12
|
+
ColumnType.DATETIME: "timestamp",
|
|
13
|
+
ColumnType.CONTACT_LIST: "text",
|
|
14
|
+
ColumnType.CHECKBOX: "bool",
|
|
15
|
+
ColumnType.PICKLIST: "text",
|
|
16
|
+
ColumnType.DURATION: "text",
|
|
17
|
+
ColumnType.PREDECESSOR: "text",
|
|
18
|
+
ColumnType.ABSTRACT_DATETIME: "timestamp",
|
|
19
|
+
ColumnType.MULTI_CONTACT_LIST: "text",
|
|
20
|
+
ColumnType.MULTI_PICKLIST: "text",
|
|
21
|
+
}
|
|
6
22
|
|
|
7
23
|
|
|
8
24
|
@dlt.source
|
|
@@ -34,21 +50,33 @@ def smartsheet_source(
|
|
|
34
50
|
)
|
|
35
51
|
sheet_name = sheet_details.name
|
|
36
52
|
resource_name = f"sheet_{sheet_name.replace(' ', '_').lower()}"
|
|
53
|
+
sheet = smartsheet_client.Sheets.get_sheet(sheet_id_int)
|
|
37
54
|
|
|
38
55
|
yield dlt.resource(
|
|
39
|
-
_get_sheet_data(
|
|
56
|
+
_get_sheet_data(sheet),
|
|
40
57
|
name=resource_name,
|
|
58
|
+
columns=_generate_type_hints(sheet),
|
|
41
59
|
write_disposition="replace",
|
|
42
60
|
)
|
|
43
61
|
|
|
44
62
|
|
|
45
|
-
def _get_sheet_data(
|
|
63
|
+
def _get_sheet_data(sheet: Sheet):
|
|
46
64
|
"""Helper function to get all rows from a sheet."""
|
|
47
|
-
|
|
48
|
-
# Transform rows to a list of dictionaries
|
|
65
|
+
|
|
49
66
|
column_titles = [col.title for col in sheet.columns]
|
|
50
67
|
for row in sheet.rows:
|
|
51
68
|
row_data = {"_row_id": row.id}
|
|
52
69
|
for i, cell in enumerate(row.cells):
|
|
53
70
|
row_data[column_titles[i]] = cell.value
|
|
54
71
|
yield row_data
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _generate_type_hints(sheet: Sheet):
|
|
75
|
+
return {
|
|
76
|
+
col.title: {
|
|
77
|
+
"data_type": TYPE_MAPPING.get(col.type.value),
|
|
78
|
+
"nullable": True,
|
|
79
|
+
}
|
|
80
|
+
for col in sheet.columns
|
|
81
|
+
if col.type.value in TYPE_MAPPING
|
|
82
|
+
}
|
ingestr/src/sources.py
CHANGED
|
@@ -427,14 +427,19 @@ class MongoDbSource:
|
|
|
427
427
|
if ":" in table:
|
|
428
428
|
collection_name, query_json = table.split(":", 1)
|
|
429
429
|
|
|
430
|
-
# Parse
|
|
430
|
+
# Parse the query using MongoDB's extended JSON parser
|
|
431
|
+
# First, convert MongoDB shell syntax to Extended JSON format
|
|
432
|
+
from bson import json_util
|
|
433
|
+
from ingestr.src.mongodb.helpers import convert_mongo_shell_to_extended_json
|
|
434
|
+
|
|
435
|
+
# Convert MongoDB shell constructs to Extended JSON v2 format
|
|
436
|
+
converted_query = convert_mongo_shell_to_extended_json(query_json)
|
|
437
|
+
|
|
431
438
|
try:
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
query
|
|
435
|
-
|
|
436
|
-
raise ValueError(f"Invalid JSON query format: {e}")
|
|
437
|
-
|
|
439
|
+
query = json_util.loads(converted_query)
|
|
440
|
+
except Exception as e:
|
|
441
|
+
raise ValueError(f"Invalid MongoDB query format: {e}")
|
|
442
|
+
|
|
438
443
|
# Validate that it's a list for aggregation pipeline
|
|
439
444
|
if not isinstance(query, list):
|
|
440
445
|
raise ValueError(
|
|
@@ -84,7 +84,6 @@ class TestSmartsheetSource(unittest.TestCase):
|
|
|
84
84
|
list(source)
|
|
85
85
|
|
|
86
86
|
def test_get_sheet_data(self):
|
|
87
|
-
mock_smartsheet_client_instance = MagicMock()
|
|
88
87
|
mock_sheet = Sheet(
|
|
89
88
|
{
|
|
90
89
|
"id": 456,
|
|
@@ -121,15 +120,13 @@ class TestSmartsheetSource(unittest.TestCase):
|
|
|
121
120
|
],
|
|
122
121
|
}
|
|
123
122
|
)
|
|
124
|
-
mock_smartsheet_client_instance.Sheets.get_sheet.return_value = mock_sheet
|
|
125
123
|
|
|
126
|
-
data_generator = _get_sheet_data(
|
|
124
|
+
data_generator = _get_sheet_data(mock_sheet)
|
|
127
125
|
data = list(data_generator)
|
|
128
126
|
|
|
129
127
|
self.assertEqual(len(data), 2)
|
|
130
128
|
self.assertEqual(data[0], {"_row_id": 201, "ID": 1, "Value": "Alpha"})
|
|
131
129
|
self.assertEqual(data[1], {"_row_id": 202, "ID": 2, "Value": "Beta"})
|
|
132
|
-
mock_smartsheet_client_instance.Sheets.get_sheet.assert_called_once_with(456)
|
|
133
130
|
|
|
134
131
|
|
|
135
132
|
if __name__ == "__main__":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.93
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -47,7 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
47
47
|
Requires-Dist: dataclasses-json==0.6.7
|
|
48
48
|
Requires-Dist: decorator==5.2.1
|
|
49
49
|
Requires-Dist: deprecation==2.1.0
|
|
50
|
-
Requires-Dist: dlt-cratedb==0.0.
|
|
50
|
+
Requires-Dist: dlt-cratedb==0.0.2
|
|
51
51
|
Requires-Dist: dlt==1.11.0
|
|
52
52
|
Requires-Dist: dnspython==2.7.0
|
|
53
53
|
Requires-Dist: duckdb-engine==0.17.0
|
|
@@ -2,7 +2,7 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
|
|
|
2
2
|
ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=scQ4J9B6A-wcmLHWDlzSjSOlSpCXD-Cc6kMYpDV3lBA,21
|
|
6
6
|
ingestr/src/destinations.py,sha256=QNT2rm91cZmY1_Zyj4VnbI14qGmZOUQOQUg9xUTVVYs,23799
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
8
|
ingestr/src/factory.py,sha256=hC5E_XgrgTHMqwqPc6ihUYvRGTGMTzdPfQhrgPyD0tY,6945
|
|
@@ -12,7 +12,7 @@ ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
|
12
12
|
ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
|
|
13
13
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
14
14
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
15
|
-
ingestr/src/sources.py,sha256=
|
|
15
|
+
ingestr/src/sources.py,sha256=5S-RDTrAD6ZfpzTC4m7RW3WPKY2gBWCrPRC9esEsO5U,125785
|
|
16
16
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
17
17
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
18
18
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -96,7 +96,7 @@ ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnb
|
|
|
96
96
|
ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
|
|
97
97
|
ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
|
|
98
98
|
ingestr/src/mongodb/__init__.py,sha256=wu3KJ3VH5FF67gctJqm4T3ZTdBOQam1u6xuFBohq7bs,7486
|
|
99
|
-
ingestr/src/mongodb/helpers.py,sha256
|
|
99
|
+
ingestr/src/mongodb/helpers.py,sha256=-CZo96tRPy5SvJVMejJ1F72F-PJhyVJ3Rfpk0o2M9rQ,35243
|
|
100
100
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
101
101
|
ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
|
|
102
102
|
ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -125,7 +125,7 @@ ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSs
|
|
|
125
125
|
ingestr/src/slack/__init__.py,sha256=pyDukxcilqTAe_bBzfWJ8Vxi83S-XEdEFBH2pEgILrM,10113
|
|
126
126
|
ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
|
|
127
127
|
ingestr/src/slack/settings.py,sha256=NhKn4y1zokEa5EmIZ05wtj_-I0GOASXZ5V81M1zXCtY,457
|
|
128
|
-
ingestr/src/smartsheets/__init__.py,sha256=
|
|
128
|
+
ingestr/src/smartsheets/__init__.py,sha256=RIEfN1T2TMFg8T0RvN4o6sqC58YusJRDrmE9Isos5P4,2375
|
|
129
129
|
ingestr/src/solidgate/__init__.py,sha256=Ts83j-JSnFsFuF4tDhVOfZKg7H0-bIpfn3kg1ZOR58A,8003
|
|
130
130
|
ingestr/src/solidgate/helpers.py,sha256=mAsW_1hpD7ab3Y2vw8fxHi4yD3aT1geLdIYZ7ycyxBc,5690
|
|
131
131
|
ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -157,9 +157,9 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
157
157
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
158
158
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
159
159
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
160
|
-
ingestr/tests/unit/test_smartsheets.py,sha256=
|
|
161
|
-
ingestr-0.13.
|
|
162
|
-
ingestr-0.13.
|
|
163
|
-
ingestr-0.13.
|
|
164
|
-
ingestr-0.13.
|
|
165
|
-
ingestr-0.13.
|
|
160
|
+
ingestr/tests/unit/test_smartsheets.py,sha256=djU1TFD0-Zr7h-WNhm281DoyGApOpNOrZcN4r-U9Pks,4873
|
|
161
|
+
ingestr-0.13.93.dist-info/METADATA,sha256=mWWWSFc4--mvOLSqO8bcw7HwkeeOa8cgQv3TpjPrlFE,15182
|
|
162
|
+
ingestr-0.13.93.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
163
|
+
ingestr-0.13.93.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
164
|
+
ingestr-0.13.93.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
165
|
+
ingestr-0.13.93.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|