ingestr 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -0,0 +1,77 @@
1
+ """Source that loads tables form Airtable.
2
+ Supports whitelisting of tables or loading of all tables from a specified base.
3
+ """
4
+
5
+ from typing import Any, Optional
6
+
7
+ import dlt
8
+ from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
9
+ from dlt.extract.items import TTableHintTemplate
10
+
11
+
12
+ def memory_mapped_arrow(
13
+ path: str,
14
+ columns: Optional[TTableSchemaColumns] = None,
15
+ primary_key: Optional[TTableHintTemplate[TColumnNames]] = None,
16
+ merge_key: Optional[TTableHintTemplate[TColumnNames]] = None,
17
+ incremental: Optional[dlt.sources.incremental[Any]] = None,
18
+ ):
19
+ @dlt.resource(
20
+ name="arrow_mmap",
21
+ columns=columns, # type: ignore
22
+ primary_key=primary_key, # type: ignore
23
+ merge_key=merge_key, # type: ignore
24
+ )
25
+ def arrow_mmap(
26
+ incremental: Optional[dlt.sources.incremental[Any]] = incremental,
27
+ ):
28
+ import pyarrow as pa # type: ignore
29
+ import pyarrow.ipc as ipc # type: ignore
30
+
31
+ with pa.memory_map(path, "rb") as mmap:
32
+ reader: ipc.RecordBatchFileReader = ipc.open_file(mmap)
33
+ table = reader.read_all()
34
+
35
+ last_value = None
36
+ end_value = None
37
+ if incremental:
38
+ if incremental.cursor_path not in table.column_names:
39
+ raise KeyError(
40
+ f"Cursor column '{incremental.cursor_path}' does not exist in table"
41
+ )
42
+
43
+ last_value = incremental.last_value
44
+ end_value = incremental.end_value
45
+
46
+ if last_value is not None:
47
+ # Check if the column is a date type
48
+ if pa.types.is_temporal(table.schema.field(incremental.cursor_path).type): # type: ignore
49
+ if not isinstance(last_value, pa.TimestampScalar):
50
+ last_value = pa.scalar(last_value, type=pa.timestamp("ns"))
51
+
52
+ table = table.filter(
53
+ pa.compute.field(incremental.cursor_path) > last_value # type: ignore
54
+ )
55
+ else:
56
+ # For non-date types, use direct comparison
57
+ table = table.filter(
58
+ pa.compute.field(incremental.cursor_path) > last_value # type: ignore
59
+ )
60
+
61
+ if end_value is not None:
62
+ if pa.types.is_timestamp(table.schema.field(incremental.cursor_path).type): # type: ignore
63
+ # Convert end_value to timestamp if it's not already
64
+ if not isinstance(end_value, pa.TimestampScalar):
65
+ end_value = pa.scalar(end_value, type=pa.timestamp("ns"))
66
+ table = table.filter(
67
+ pa.compute.field(incremental.cursor_path) < end_value # type: ignore
68
+ )
69
+ else:
70
+ # For non-date types, use direct comparison
71
+ table = table.filter(
72
+ pa.compute.field(incremental.cursor_path) < end_value # type: ignore
73
+ )
74
+
75
+ yield table
76
+
77
+ return arrow_mmap
ingestr/src/factory.py CHANGED
@@ -18,6 +18,7 @@ from ingestr.src.sources import (
18
18
  AdjustSource,
19
19
  AirtableSource,
20
20
  AppsflyerSource,
21
+ ArrowMemoryMappedSource,
21
22
  ChessSource,
22
23
  FacebookAdsSource,
23
24
  GoogleSheetsSource,
@@ -136,6 +137,8 @@ class SourceDestinationFactory:
136
137
  return AdjustSource()
137
138
  elif self.source_scheme == "zendesk":
138
139
  return ZendeskSource()
140
+ elif self.source_scheme == "mmap":
141
+ return ArrowMemoryMappedSource()
139
142
  elif self.source_scheme == "s3":
140
143
  return S3Source()
141
144
  else:
@@ -65,7 +65,7 @@ def mongodb(
65
65
  sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration
66
66
  )
67
67
  def mongodb_collection(
68
- connection_url: str = dlt.secrets.value,
68
+ connection_url: str = dlt.config.value,
69
69
  database: Optional[str] = dlt.config.value,
70
70
  collection: str = dlt.config.value,
71
71
  incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
@@ -155,7 +155,7 @@ class MongoDbCollectionConfiguration(BaseConfiguration):
155
155
 
156
156
  @configspec
157
157
  class MongoDbCollectionResourceConfiguration(BaseConfiguration):
158
- connection_url: str = dlt.secrets.value
158
+ connection_url: str = dlt.config.value
159
159
  database: Optional[str] = dlt.config.value
160
160
  collection: str = dlt.config.value
161
161
  incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
ingestr/src/sources.py CHANGED
@@ -12,6 +12,7 @@ from dlt.common.typing import TSecretStrValue
12
12
  from ingestr.src.adjust._init_ import adjust_source
13
13
  from ingestr.src.airtable import airtable_source
14
14
  from ingestr.src.appsflyer._init_ import appsflyer_source
15
+ from ingestr.src.arrow import memory_mapped_arrow
15
16
  from ingestr.src.chess import source
16
17
  from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
17
18
  from ingestr.src.filesystem import readers
@@ -75,6 +76,51 @@ class SqlSource:
75
76
  return table_instance
76
77
 
77
78
 
79
+ class ArrowMemoryMappedSource:
80
+ table_builder: Callable
81
+
82
+ def __init__(self, table_builder=memory_mapped_arrow) -> None:
83
+ self.table_builder = table_builder
84
+
85
+ def handles_incrementality(self) -> bool:
86
+ return False
87
+
88
+ def dlt_source(self, uri: str, table: str, **kwargs):
89
+ import os
90
+
91
+ incremental = None
92
+ if kwargs.get("incremental_key"):
93
+ start_value = kwargs.get("interval_start")
94
+ end_value = kwargs.get("interval_end")
95
+
96
+ incremental = dlt.sources.incremental(
97
+ kwargs.get("incremental_key", ""),
98
+ initial_value=start_value,
99
+ end_value=end_value,
100
+ )
101
+
102
+ file_path = uri.split("://")[1]
103
+ if not os.path.exists(file_path):
104
+ raise ValueError(f"File at path {file_path} does not exist")
105
+
106
+ if os.path.isdir(file_path):
107
+ raise ValueError(
108
+ f"Path {file_path} is a directory, it should be an Arrow memory mapped file"
109
+ )
110
+
111
+ primary_key = kwargs.get("primary_key")
112
+ merge_key = kwargs.get("merge_key")
113
+
114
+ table_instance = self.table_builder(
115
+ path=file_path,
116
+ incremental=incremental,
117
+ merge_key=merge_key,
118
+ primary_key=primary_key,
119
+ )
120
+
121
+ return table_instance
122
+
123
+
78
124
  class MongoDbSource:
79
125
  table_builder: Callable
80
126
 
@@ -656,12 +702,12 @@ class KafkaSource:
656
702
  credentials=KafkaCredentials(
657
703
  bootstrap_servers=bootstrap_servers[0],
658
704
  group_id=group_id[0],
659
- security_protocol=security_protocol[0]
660
- if len(security_protocol) > 0
661
- else None, # type: ignore
662
- sasl_mechanisms=sasl_mechanisms[0]
663
- if len(sasl_mechanisms) > 0
664
- else None, # type: ignore
705
+ security_protocol=(
706
+ security_protocol[0] if len(security_protocol) > 0 else None
707
+ ), # type: ignore
708
+ sasl_mechanisms=(
709
+ sasl_mechanisms[0] if len(sasl_mechanisms) > 0 else None
710
+ ), # type: ignore
665
711
  sasl_username=sasl_username[0] if len(sasl_username) > 0 else None, # type: ignore
666
712
  sasl_password=sasl_password[0] if len(sasl_password) > 0 else None, # type: ignore
667
713
  ),
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.9.0"
1
+ __version__ = "0.9.2"
@@ -1,11 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.9.0
3
+ Version: 0.9.2
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
7
7
  Author-email: Burak Karakan <burak.karakan@getbruin.com>
8
- License-File: LICENSE.md
9
8
  Classifier: Development Status :: 4 - Beta
10
9
  Classifier: Environment :: Console
11
10
  Classifier: Intended Audience :: Developers
@@ -15,7 +14,6 @@ Classifier: Programming Language :: Python :: 3
15
14
  Classifier: Topic :: Database
16
15
  Requires-Python: >=3.9
17
16
  Requires-Dist: confluent-kafka>=2.3.0
18
- Requires-Dist: cx-oracle==8.3.0
19
17
  Requires-Dist: databricks-sql-connector==2.9.3
20
18
  Requires-Dist: dlt==0.5.1
21
19
  Requires-Dist: duckdb-engine==0.11.5
@@ -30,7 +28,6 @@ Requires-Dist: py-machineid==0.5.1
30
28
  Requires-Dist: pyairtable==2.3.3
31
29
  Requires-Dist: pymongo==4.6.3
32
30
  Requires-Dist: pymysql==1.1.0
33
- Requires-Dist: pyodbc==5.1.0
34
31
  Requires-Dist: pyrate-limiter==3.6.1
35
32
  Requires-Dist: redshift-connector==2.1.0
36
33
  Requires-Dist: rich==13.7.1
@@ -46,6 +43,10 @@ Requires-Dist: stripe==10.7.0
46
43
  Requires-Dist: tqdm==4.66.2
47
44
  Requires-Dist: typer==0.12.3
48
45
  Requires-Dist: types-requests==2.32.0.20240907
46
+ Provides-Extra: odbc
47
+ Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
48
+ Provides-Extra: oracle
49
+ Requires-Dist: cx-oracle==8.3.0; extra == 'oracle'
49
50
  Description-Content-Type: text/markdown
50
51
 
51
52
  <div align="center">
@@ -1,15 +1,16 @@
1
1
  ingestr/main.py,sha256=U66TM57ePv-RdoAftQ0pFZx8woZUQnLepKa50C-bA5I,17655
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
4
- ingestr/src/factory.py,sha256=NbSLbF2ClHRcptpmL0n9p7bGVz8Uj6xyG0Y93OZTfKM,4830
5
- ingestr/src/sources.py,sha256=Ny7qYulZUmwrCsG8UZ_SeR6FIqVR3E1FWdvj_yQD41M,30786
4
+ ingestr/src/factory.py,sha256=ft81B-YJgvEROkHAZjMjTIS7IYvle-uZQv45b7-Wfk0,4947
5
+ ingestr/src/sources.py,sha256=iZbCY-pzv6jbgdHOh0Vdsl3cBoC71eiFZgu_a5RoaDE,32188
6
6
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
7
- ingestr/src/version.py,sha256=H9NWRZb7NbeRRPLP_V1fARmLNXranorVM-OOY-8_2ug,22
7
+ ingestr/src/version.py,sha256=gqT-BGoeEItda9fICQDvLbxEjWRIBhFJxPxxKvmHLUo,22
8
8
  ingestr/src/adjust/_init_.py,sha256=_jJE3Ywvv-YyJ7ywICdht_X2Gnd1cKm6F1wAfnpXuWM,890
9
9
  ingestr/src/adjust/helpers.py,sha256=kkYC3MqMHLNucuQ50klZWrvd3o8VfUysNtZTQSsKZ_c,2588
10
10
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
11
11
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
12
12
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
13
+ ingestr/src/arrow/__init__.py,sha256=AgU7S9Ra3ZeeG00Mf32zxO5sgMFfRnTdOSirUJ1Pu10,2976
13
14
  ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
14
15
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
15
16
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
@@ -35,8 +36,8 @@ ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,
35
36
  ingestr/src/klaviyo/_init_.py,sha256=nq2T1p3Xc7yiwGabsZBp2Jy2fa8_n5oxqxBnUGhKOgg,6592
36
37
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
37
38
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
38
- ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
39
- ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
39
+ ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
40
+ ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
40
41
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
41
42
  ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
42
43
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -73,8 +74,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
73
74
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
74
75
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
75
76
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
76
- ingestr-0.9.0.dist-info/METADATA,sha256=yJZ6Tdum4TirrssuFG40UZ_zm4pzhzSSweJRIignFck,6949
77
- ingestr-0.9.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
78
- ingestr-0.9.0.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
79
- ingestr-0.9.0.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
80
- ingestr-0.9.0.dist-info/RECORD,,
77
+ ingestr-0.9.2.dist-info/METADATA,sha256=ZnA32SeV-3jKU3g13UsczdNiKtvu1lXvF-Gb2mZUlzw,7004
78
+ ingestr-0.9.2.dist-info/WHEEL,sha256=wukiCwsxxsuzcQTdnC_ZWHZECE4wwOh3xCCrap6i6Ts,87
79
+ ingestr-0.9.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
80
+ ingestr-0.9.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
81
+ ingestr-0.9.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.26.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any