ingestr 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -32,6 +32,18 @@ DATE_FORMATS = [
32
32
  "%Y-%m-%dT%H:%M:%S.%f%z",
33
33
  ]
34
34
 
35
+ # https://dlthub.com/docs/dlt-ecosystem/file-formats/parquet#supported-destinations
36
+ PARQUET_SUPPORTED_DESTINATIONS = [
37
+ "bigquery",
38
+ "duckdb",
39
+ "snowflake",
40
+ "databricks",
41
+ "synapse",
42
+ ]
43
+
44
+ # these sources would return a JSON for sure, which means they cannot be used with Parquet loader for BigQuery
45
+ JSON_RETURNING_SOURCES = ["notion"]
46
+
35
47
 
36
48
  class SpinnerCollector(Collector):
37
49
  status: Status
@@ -155,6 +167,20 @@ def ingest(
155
167
  envvar="PROGRESS",
156
168
  ),
157
169
  ] = "interactive", # type: ignore
170
+ sql_backend: Annotated[
171
+ Optional[str],
172
+ typer.Option(
173
+ help="The SQL backend to use, must be one of 'sqlalchemy', 'pyarrow'",
174
+ envvar="SQL_BACKEND",
175
+ ),
176
+ ] = "pyarrow", # type: ignore
177
+ loader_file_format: Annotated[
178
+ Optional[str],
179
+ typer.Option(
180
+ help="The file format to use when loading data, must be one of 'jsonl', 'parquet', 'default'",
181
+ envvar="LOADER_FILE_FORMAT",
182
+ ),
183
+ ] = "default", # type: ignore
158
184
  ):
159
185
  track(
160
186
  "command_triggered",
@@ -240,11 +266,25 @@ def ingest(
240
266
  merge_key=merge_key,
241
267
  interval_start=interval_start,
242
268
  interval_end=interval_end,
269
+ sql_backend=sql_backend,
243
270
  )
244
271
 
245
272
  if original_incremental_strategy == "delete+insert":
246
273
  dlt_source.incremental.primary_key = ()
247
274
 
275
+ if (
276
+ factory.destination_scheme in PARQUET_SUPPORTED_DESTINATIONS
277
+ and loader_file_format == "default"
278
+ ):
279
+ loader_file_format = "parquet"
280
+
281
+ # if the source is a JSON returning source, we cannot use Parquet loader for BigQuery
282
+ if factory.destination_scheme == 'bigquery' and factory.source_scheme in JSON_RETURNING_SOURCES:
283
+ loader_file_format = "jsonl"
284
+
285
+ elif loader_file_format == "default":
286
+ loader_file_format = "jsonl"
287
+
248
288
  run_info = pipeline.run(
249
289
  dlt_source,
250
290
  **destination.dlt_run_params(
@@ -253,6 +293,7 @@ def ingest(
253
293
  ),
254
294
  write_disposition=incremental_strategy, # type: ignore
255
295
  primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
296
+ loader_file_format=loader_file_format, # type: ignore
256
297
  )
257
298
 
258
299
  destination.post_load()
ingestr/main_test.py CHANGED
@@ -24,6 +24,8 @@ def invoke_ingest_command(
24
24
  merge_key=None,
25
25
  interval_start=None,
26
26
  interval_end=None,
27
+ sql_backend=None,
28
+ loader_file_format=None,
27
29
  ):
28
30
  args = [
29
31
  "ingest",
@@ -61,6 +63,14 @@ def invoke_ingest_command(
61
63
  args.append("--interval-end")
62
64
  args.append(interval_end)
63
65
 
66
+ if sql_backend:
67
+ args.append("--sql-backend")
68
+ args.append(sql_backend)
69
+
70
+ if loader_file_format:
71
+ args.append("--loader-file-format")
72
+ args.append(loader_file_format)
73
+
64
74
  result = runner.invoke(
65
75
  app,
66
76
  args,
@@ -93,7 +103,6 @@ def test_create_replace():
93
103
  "testschema.output",
94
104
  )
95
105
 
96
- print(result.stdout)
97
106
  assert result.exit_code == 0
98
107
 
99
108
  res = conn.sql(
@@ -138,6 +147,7 @@ def test_append():
138
147
  "testschema_append.output",
139
148
  "append",
140
149
  "updated_at",
150
+ sql_backend="sqlalchemy",
141
151
  )
142
152
  assert res.exit_code == 0
143
153
 
@@ -194,6 +204,7 @@ def test_merge_with_primary_key():
194
204
  "merge",
195
205
  "updated_at",
196
206
  "id",
207
+ sql_backend="sqlalchemy",
197
208
  )
198
209
  assert res.exit_code == 0
199
210
  return res
@@ -351,6 +362,8 @@ def test_delete_insert_without_primary_key():
351
362
  "testschema_delete_insert.output",
352
363
  inc_strategy="delete+insert",
353
364
  inc_key="updated_at",
365
+ sql_backend="sqlalchemy",
366
+ loader_file_format="jsonl",
354
367
  )
355
368
  assert res.exit_code == 0
356
369
  return res
@@ -465,6 +478,8 @@ def test_delete_insert_with_timerange():
465
478
  inc_key="updated_at",
466
479
  interval_start=start_date,
467
480
  interval_end=end_date,
481
+ sql_backend="sqlalchemy",
482
+ loader_file_format="jsonl",
468
483
  )
469
484
  assert res.exit_code == 0
470
485
  return res
@@ -9,7 +9,7 @@ from .helpers.client import NotionClient
9
9
  from .helpers.database import NotionDatabase
10
10
 
11
11
 
12
- @dlt.source
12
+ @dlt.source(max_table_nesting=1)
13
13
  def notion_databases(
14
14
  database_ids: Optional[List[Dict[str, str]]] = None,
15
15
  api_key: str = dlt.secrets.value,
ingestr/src/sources.py CHANGED
@@ -41,6 +41,7 @@ class SqlSource:
41
41
  table=table_fields[-1],
42
42
  incremental=incremental,
43
43
  merge_key=kwargs.get("merge_key"),
44
+ backend=kwargs.get("sql_backend", "sqlalchemy"),
44
45
  )
45
46
 
46
47
  return table_instance
@@ -22,10 +22,11 @@ class SqlSourceTest(unittest.TestCase):
22
22
  table = "schema.table"
23
23
 
24
24
  # monkey patch the sql_table function
25
- def sql_table(credentials, schema, table, incremental, merge_key):
25
+ def sql_table(credentials, schema, table, incremental, merge_key, backend):
26
26
  self.assertEqual(credentials, uri)
27
27
  self.assertEqual(schema, "schema")
28
28
  self.assertEqual(table, "table")
29
+ self.assertEqual(backend, "sqlalchemy")
29
30
  self.assertIsNone(incremental)
30
31
  self.assertIsNone(merge_key)
31
32
  return dlt.resource()
@@ -40,10 +41,11 @@ class SqlSourceTest(unittest.TestCase):
40
41
  incremental_key = "id"
41
42
 
42
43
  # monkey patch the sql_table function
43
- def sql_table(credentials, schema, table, incremental, merge_key):
44
+ def sql_table(credentials, schema, table, incremental, merge_key, backend):
44
45
  self.assertEqual(credentials, uri)
45
46
  self.assertEqual(schema, "schema")
46
47
  self.assertEqual(table, "table")
48
+ self.assertEqual(backend, "sqlalchemy")
47
49
  self.assertIsInstance(incremental, dlt.sources.incremental)
48
50
  self.assertEqual(incremental.cursor_path, incremental_key)
49
51
  self.assertIsNone(merge_key)
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.0"
1
+ __version__ = "0.3.2"
Binary file
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -1,15 +1,15 @@
1
- ingestr/main.py,sha256=OIxnk6VpzQaND9rHcCZGm96rTt0IYnuHv97HsuNV2lU,11867
2
- ingestr/main_test.py,sha256=c8C7CUC1dJ4BrMZImjGM2yhmNONeJX_duIaz8hhtU6Y,19485
1
+ ingestr/main.py,sha256=CLwBkFFltEaCWyZObtonQ662nIGb5S-1t4OKZ-iqgFo,13373
2
+ ingestr/main_test.py,sha256=fXZw1qZd5CElrFSRnsI5003813LcIMGpGCMnGNFVhNI,19946
3
3
  ingestr/src/destinations.py,sha256=_PIoAU-_tDEyX_-vDOgGB5eqXoGhPwtCRApHufj1ae4,6350
4
4
  ingestr/src/destinations_test.py,sha256=rgEk8EpAntFbSOwXovC4prv3RA22mwq8pIO6sZ_rYzg,4212
5
5
  ingestr/src/factory.py,sha256=DvzeYaE32pL9XpUUiO4Iaaj4ShRn9KABVeSo1E_QVg4,3114
6
6
  ingestr/src/factory_test.py,sha256=X9sFkvNByWChIcyeDt1QiIPMIzGNKb7M5A_GUE0-nnI,664
7
- ingestr/src/sources.py,sha256=SKWaC8mwNArg_JVQcSJsO8w8yZs3a97V8nM84YudRJA,3917
8
- ingestr/src/sources_test.py,sha256=gqqnJIqblxDbipsX3jZWl0He7aMsob0YyewceY4Z11M,3808
9
- ingestr/src/version.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
7
+ ingestr/src/sources.py,sha256=Bl8Q1gXGqYWAZtwyF2CoaPB5CgAwoctXHwW9OZYK__8,3978
8
+ ingestr/src/sources_test.py,sha256=t94u1lYAspxzfe-DkxVtq5vw6xrLWphipvwntrwrzqg,3930
9
+ ingestr/src/version.py,sha256=vNiWJ14r_cw5t_7UDqDQIVZvladKFGyHH2avsLpN7Vg,22
10
10
  ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
11
11
  ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
12
- ingestr/src/notion/__init__.py,sha256=K4TXZGysRNxeVnpdI3hx3joV-S_2Or2NA_v64cEjWnE,1813
12
+ ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
13
13
  ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
14
14
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
@@ -21,13 +21,13 @@ ingestr/src/sql_database/schema_types.py,sha256=foGHh4iGagGLfS7nF3uGYhBjqgX0jlrj
21
21
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
22
22
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
23
23
  ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
24
- ingestr/testdata/test_append.db,sha256=YAK31Am4R2UwesMDov0qiEVW9QrvbpuSO01JV1SVmoY,798720
25
- ingestr/testdata/test_create_replace.db,sha256=m-Mgp-8LR82r3dJu6-NY4yDOREHCJSBOvwESdypiyNE,798720
26
- ingestr/testdata/test_delete_insert_with_timerange.db,sha256=5qiW6X69rCawByXbnbk8bVLDdtNGFiKDjbsNtVP7CBI,1585152
27
- ingestr/testdata/test_delete_insert_without_primary_key.db,sha256=WCGT3sQAL4WfJrC01kEz-A3VJncSETGeDB1wFoL-Gqc,1847296
28
- ingestr/testdata/test_merge_with_primary_key.db,sha256=JGvUKv6_6ArgIA5v4wUQ74_6udlmRk7eN1G_n5Oi468,1847296
29
- ingestr-0.3.0.dist-info/METADATA,sha256=2oj-a494cJfiAtOgdM3IFSO7kb7FuJyopiMcO0_N5RM,5309
30
- ingestr-0.3.0.dist-info/WHEEL,sha256=osohxoshIHTFJFVPhsi1UkZuLRGMHRXZzwEBW2ezjrc,87
31
- ingestr-0.3.0.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
32
- ingestr-0.3.0.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
33
- ingestr-0.3.0.dist-info/RECORD,,
24
+ ingestr/testdata/test_append.db,sha256=3Jwyusio7tDVFtBAMZkPSdoYfCmAKms1O3QKBm9m85o,798720
25
+ ingestr/testdata/test_create_replace.db,sha256=EQfucKIJ5SpWK0DKpISwwI2YWpoPgcieyiI_e6xtb-M,798720
26
+ ingestr/testdata/test_delete_insert_with_timerange.db,sha256=xORZj07gvaDnQM8TgPsIx3Y1LHu9LyWP6QmkCIBBdIo,1585152
27
+ ingestr/testdata/test_delete_insert_without_primary_key.db,sha256=fxnI0HB3Ag1aRn80t0WgaUAGmcnJsaGe26lmBWZYhCA,1847296
28
+ ingestr/testdata/test_merge_with_primary_key.db,sha256=3p5_k-Qpt47atdAyiQ3JSk9pwqaVWq2U1LkCO-CrTCM,1847296
29
+ ingestr-0.3.2.dist-info/METADATA,sha256=vvm_R9Tt4cWKm1aqPr9szgwSIGcL8XfoNiZZz4ZFt04,5309
30
+ ingestr-0.3.2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
31
+ ingestr-0.3.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
32
+ ingestr-0.3.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
33
+ ingestr-0.3.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.1
2
+ Generator: hatchling 1.24.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any