ingestr 0.6.0__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (94) hide show
  1. ingestr-0.6.2/.python-version +1 -0
  2. {ingestr-0.6.0 → ingestr-0.6.2}/PKG-INFO +3 -2
  3. {ingestr-0.6.0 → ingestr-0.6.2}/docs/.vitepress/config.mjs +2 -0
  4. {ingestr-0.6.0 → ingestr-0.6.2}/docs/index.md +4 -1
  5. ingestr-0.6.2/docs/supported-sources/gorgias.md +53 -0
  6. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/main.py +16 -3
  7. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/main_test.py +287 -7
  8. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/factory.py +6 -0
  9. ingestr-0.6.2/ingestr/src/gorgias/__init__.py +587 -0
  10. ingestr-0.6.2/ingestr/src/gorgias/helpers.py +149 -0
  11. ingestr-0.6.2/ingestr/src/gorgias/helpers_test.py +45 -0
  12. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/sources.py +95 -3
  13. ingestr-0.6.2/ingestr/src/version.py +1 -0
  14. ingestr-0.6.2/ingestr/testdata/create_replace.csv +21 -0
  15. ingestr-0.6.2/ingestr/testdata/delete_insert_expected.csv +6 -0
  16. ingestr-0.6.2/ingestr/testdata/delete_insert_part1.csv +5 -0
  17. ingestr-0.6.2/ingestr/testdata/delete_insert_part2.csv +6 -0
  18. ingestr-0.6.2/ingestr/testdata/merge_expected.csv +5 -0
  19. ingestr-0.6.2/ingestr/testdata/merge_part1.csv +4 -0
  20. ingestr-0.6.2/ingestr/testdata/merge_part2.csv +5 -0
  21. {ingestr-0.6.0 → ingestr-0.6.2}/package-lock.json +492 -436
  22. {ingestr-0.6.0 → ingestr-0.6.2}/package.json +1 -1
  23. {ingestr-0.6.0 → ingestr-0.6.2}/pyproject.toml +2 -0
  24. {ingestr-0.6.0 → ingestr-0.6.2}/requirements.txt +2 -1
  25. ingestr-0.6.0/ingestr/src/version.py +0 -1
  26. ingestr-0.6.0/ingestr/testdata/test_append.db +0 -0
  27. ingestr-0.6.0/ingestr/testdata/test_create_replace.db +0 -0
  28. ingestr-0.6.0/ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
  29. ingestr-0.6.0/ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
  30. ingestr-0.6.0/ingestr/testdata/test_merge_with_primary_key.db +0 -0
  31. {ingestr-0.6.0 → ingestr-0.6.2}/.dockerignore +0 -0
  32. {ingestr-0.6.0 → ingestr-0.6.2}/.github/workflows/deploy-docs.yml +0 -0
  33. {ingestr-0.6.0 → ingestr-0.6.2}/.github/workflows/docker.yml +0 -0
  34. {ingestr-0.6.0 → ingestr-0.6.2}/.gitignore +0 -0
  35. {ingestr-0.6.0 → ingestr-0.6.2}/Dockerfile +0 -0
  36. {ingestr-0.6.0 → ingestr-0.6.2}/LICENSE.md +0 -0
  37. {ingestr-0.6.0 → ingestr-0.6.2}/Makefile +0 -0
  38. {ingestr-0.6.0 → ingestr-0.6.2}/README.md +0 -0
  39. {ingestr-0.6.0 → ingestr-0.6.2}/docs/.vitepress/theme/custom.css +0 -0
  40. {ingestr-0.6.0 → ingestr-0.6.2}/docs/.vitepress/theme/index.js +0 -0
  41. {ingestr-0.6.0 → ingestr-0.6.2}/docs/commands/example-uris.md +0 -0
  42. {ingestr-0.6.0 → ingestr-0.6.2}/docs/commands/ingest.md +0 -0
  43. {ingestr-0.6.0 → ingestr-0.6.2}/docs/getting-started/core-concepts.md +0 -0
  44. {ingestr-0.6.0 → ingestr-0.6.2}/docs/getting-started/incremental-loading.md +0 -0
  45. {ingestr-0.6.0 → ingestr-0.6.2}/docs/getting-started/quickstart.md +0 -0
  46. {ingestr-0.6.0 → ingestr-0.6.2}/docs/getting-started/telemetry.md +0 -0
  47. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/bigquery.md +0 -0
  48. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/csv.md +0 -0
  49. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/databricks.md +0 -0
  50. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/duckdb.md +0 -0
  51. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/gsheets.md +0 -0
  52. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/mongodb.md +0 -0
  53. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/mssql.md +0 -0
  54. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/mysql.md +0 -0
  55. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/notion.md +0 -0
  56. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/oracle.md +0 -0
  57. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/overview.md +0 -0
  58. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/postgres.md +0 -0
  59. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/redshift.md +0 -0
  60. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/sap-hana.md +0 -0
  61. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/shopify.md +0 -0
  62. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/snowflake.md +0 -0
  63. {ingestr-0.6.0 → ingestr-0.6.2}/docs/supported-sources/sqlite.md +0 -0
  64. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/destinations.py +0 -0
  65. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/destinations_test.py +0 -0
  66. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/factory_test.py +0 -0
  67. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/google_sheets/README.md +0 -0
  68. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/google_sheets/__init__.py +0 -0
  69. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  70. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  71. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  72. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/mongodb/__init__.py +0 -0
  73. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/mongodb/helpers.py +0 -0
  74. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/notion/__init__.py +0 -0
  75. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/notion/helpers/__init__.py +0 -0
  76. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/notion/helpers/client.py +0 -0
  77. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/notion/helpers/database.py +0 -0
  78. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/notion/settings.py +0 -0
  79. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/shopify/__init__.py +0 -0
  80. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/shopify/exceptions.py +0 -0
  81. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/shopify/helpers.py +0 -0
  82. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/shopify/settings.py +0 -0
  83. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/sources_test.py +0 -0
  84. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/sql_database/__init__.py +0 -0
  85. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/sql_database/helpers.py +0 -0
  86. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/sql_database/override.py +0 -0
  87. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/sql_database/schema_types.py +0 -0
  88. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/telemetry/event.py +0 -0
  89. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  90. {ingestr-0.6.0 → ingestr-0.6.2}/ingestr/testdata/.gitignore +0 -0
  91. {ingestr-0.6.0 → ingestr-0.6.2}/requirements-dev.txt +0 -0
  92. {ingestr-0.6.0 → ingestr-0.6.2}/resources/demo.gif +0 -0
  93. {ingestr-0.6.0 → ingestr-0.6.2}/resources/demo.tape +0 -0
  94. {ingestr-0.6.0 → ingestr-0.6.2}/resources/ingestr.svg +0 -0
@@ -0,0 +1 @@
1
+ 3.11
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -16,7 +16,7 @@ Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
17
  Requires-Dist: cx-oracle==8.3.0
18
18
  Requires-Dist: databricks-sql-connector==2.9.3
19
- Requires-Dist: dlt==0.4.8
19
+ Requires-Dist: dlt==0.4.12
20
20
  Requires-Dist: duckdb-engine==0.11.5
21
21
  Requires-Dist: duckdb==0.10.2
22
22
  Requires-Dist: google-api-python-client==2.130.0
@@ -27,6 +27,7 @@ Requires-Dist: py-machineid==0.5.1
27
27
  Requires-Dist: pymongo==4.6.3
28
28
  Requires-Dist: pymysql==1.1.0
29
29
  Requires-Dist: pyodbc==5.1.0
30
+ Requires-Dist: pyrate-limiter==3.6.1
30
31
  Requires-Dist: redshift-connector==2.1.0
31
32
  Requires-Dist: rich==13.7.1
32
33
  Requires-Dist: rudder-sdk-python==2.1.0
@@ -22,6 +22,7 @@ export default defineConfig({
22
22
  { text: "Home", link: "/" },
23
23
  { text: "Getting started", link: "/getting-started/quickstart.md" },
24
24
  ],
25
+ outline: 'deep',
25
26
 
26
27
  sidebar: [
27
28
  {
@@ -68,6 +69,7 @@ export default defineConfig({
68
69
  text: "Platforms",
69
70
  collapsed: false,
70
71
  items: [
72
+ { text: "Gorgias", link: "/supported-sources/gorgias.md" },
71
73
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
72
74
  { text: "Notion", link: "/supported-sources/notion.md" },
73
75
  { text: "Shopify", link: "/supported-sources/shopify.md" },
@@ -5,7 +5,7 @@ layout: home
5
5
  hero:
6
6
  name: "ingestr"
7
7
  text: Copy data between any source and any destination
8
- tagline: "ingestr is a command-line application that allows ingesting or copying data from any source into any destination database."
8
+ tagline: "ingestr is a command-line application that allows copying data from any source into any destination database."
9
9
  image:
10
10
  src: https://github.com/bruin-data/ingestr/blob/main/resources/demo.gif?raw=true
11
11
  alt: ingestr logo
@@ -25,4 +25,7 @@ features:
25
25
  - title: Incremental Loading
26
26
  details: ingestr supports both full-refresh as well as incremental loading modes.
27
27
  ---
28
+ <div style="margin-top: 12px; line-height: 2em; text-align: center;">
28
29
 
30
+ <Badge type="info" text="Postgres" /> <Badge type="danger" text="BigQuery" /> <Badge type="tip" text="Snowflake" /> <Badge type="warning" text="Redshift" /> <Badge type="info" text="Databricks" /> <Badge type="danger" text="DuckDB" /> <Badge type="tip" text="Microsoft SQL Server" /> <Badge type="warning" text="Local CSV file" /> <Badge type="info" text="MongoDB" /> <Badge type="danger" text="Oracle" /> <Badge type="tip" text="SAP Hana" /> <Badge type="warning" text="SQLite" /> <Badge type="info" text="MySQL" /> <Badge type="danger" text="Google Sheets" /> <Badge type="tip" text="Notion" /> <Badge type="warning" text="Shopify" />
31
+ </div>
@@ -0,0 +1,53 @@
1
+ # Gorgias
2
+ [Gorgias](https://www.gorgias.com/) is a helpdesk for e-commerce merchants, providing customer service via email, social media, SMS, and live chat.
3
+
4
+ ingestr supports Gorgias as a source.
5
+
6
+ ## URI Format
7
+ The URI format for Gorgias is as follows:
8
+
9
+ ```plaintext
10
+ gorgias://<domain>?api_key=<api-key>&email=<email>
11
+ ```
12
+
13
+ URI parameters:
14
+ - `domain`: the domain of the Gorgias account without the full `gorgias.com`, e.g. `mycompany`
15
+ - `api_key`: the integration token used for authentication with the Gorgias API
16
+ - `email`: the email address of the user to connect to the Gorgias API
17
+
18
+ The URI is used to connect to the Gorgias API for extracting data.
19
+
20
+ ## Examples
21
+ ```bash
22
+ # get all the tickets that are created/updated since 2024-06-19 and write them to `gorgias.ticket_messages` table on BigQuery
23
+ ingestr ingest --source-table 'tickets' --source-uri $GORGIAS_URI --dest-uri $BIGQUERY_URI --interval-start 2024-06-19 --dest-table 'gorgias.ticket_messages' --loader-file-format jsonl
24
+
25
+ # get all the customers and write them to `gorgias.customers` table on DuckDB
26
+ ingestr ingest --source-table 'customers' --source-uri $GORGIAS_URI --dest-uri duckdb:///gorgias.duckdb --interval-start 2024-01-01 --dest-table 'gorgias.customers'
27
+ ```
28
+
29
+ ## Supported Entities
30
+ The Gorgias source supports a growing list of entities, feel free to create an issue if you need more entities to be supported.
31
+
32
+ ### Customers
33
+ Customers are the users who have interacted with the support team. Each customer has a unique ID and contains information such as the name and email.
34
+
35
+ You can retrieve customers by using `customers` as the source table.
36
+
37
+ ### Tickets
38
+ Tickets are the main entity in Gorgias, representing customer inquiries. Each ticket has a unique ID and contains information such as the customer, status, and messages.
39
+
40
+ You can retrieve tickets by using `tickets` as the source table.
41
+
42
+ ### Ticket Messages
43
+ Ticket messages are the messages exchanged between the customer and the support agent in a ticket. Each message has a unique ID and contains information such as the sender, content, and timestamp.
44
+
45
+ You can retrieve ticket messages by using `ticket_messages` as the source table.
46
+
47
+ ### Satisfaction Surveys
48
+ Satisfaction surveys are sent to customers after a ticket is resolved to gather feedback on their experience. Each survey has a unique ID and contains information such as the rating and comments.
49
+
50
+ You can retrieve satisfaction surveys by using `satisfaction_surveys` as the source table.
51
+
52
+
53
+
@@ -90,6 +90,7 @@ class IncrementalStrategy(str, Enum):
90
90
  append = "append"
91
91
  delete_insert = "delete+insert"
92
92
  merge = "merge"
93
+ none = "none"
93
94
 
94
95
 
95
96
  class LoaderFileFormat(str, Enum):
@@ -136,7 +137,7 @@ def ingest(
136
137
  ),
137
138
  ] = None, # type: ignore
138
139
  incremental_key: Annotated[
139
- str,
140
+ Optional[str],
140
141
  typer.Option(
141
142
  help="The incremental key from the table to be used for incremental strategies",
142
143
  envvar="INCREMENTAL_KEY",
@@ -257,6 +258,16 @@ def ingest(
257
258
  full_refresh=full_refresh,
258
259
  )
259
260
 
261
+ if source.handles_incrementality():
262
+ incremental_strategy = IncrementalStrategy.none
263
+ incremental_key = None
264
+
265
+ incremental_strategy_text = (
266
+ incremental_strategy.value
267
+ if incremental_strategy.value != IncrementalStrategy.none
268
+ else "Platform-specific"
269
+ )
270
+
260
271
  print()
261
272
  print("[bold green]Initiated the pipeline with the following:[/bold green]")
262
273
  print(
@@ -266,7 +277,7 @@ def ingest(
266
277
  f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
267
278
  )
268
279
  print(
269
- f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy.value}"
280
+ f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy_text}"
270
281
  )
271
282
  print(
272
283
  f"[bold yellow] Incremental Key:[/bold yellow] {incremental_key if incremental_key else 'None'}"
@@ -317,7 +328,9 @@ def ingest(
317
328
  uri=dest_uri,
318
329
  table=dest_table,
319
330
  ),
320
- write_disposition=incremental_strategy.value, # type: ignore
331
+ write_disposition=incremental_strategy.value
332
+ if incremental_strategy.value != IncrementalStrategy.none
333
+ else None, # type: ignore
321
334
  primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
322
335
  loader_file_format=loader_file_format.value
323
336
  if loader_file_format is not None
@@ -1,5 +1,8 @@
1
+ import csv
1
2
  import os
3
+ import random
2
4
  import shutil
5
+ import string
3
6
 
4
7
  import duckdb
5
8
  from typer.testing import CliRunner
@@ -80,9 +83,17 @@ def invoke_ingest_command(
80
83
  return result
81
84
 
82
85
 
83
- def test_create_replace():
84
- abs_db_path = get_abs_path("./testdata/test_create_replace.db")
85
- rel_db_path_to_command = "ingestr/testdata/test_create_replace.db"
86
+ ### These are DuckDB-to-DuckDB tests
87
+ def test_create_replace_duckdb_to_duckdb():
88
+ try:
89
+ shutil.rmtree(get_abs_path("../pipeline_data"))
90
+ except Exception:
91
+ pass
92
+
93
+ dbname = f"test_create_replace_{get_random_string(5)}.db"
94
+
95
+ abs_db_path = get_abs_path(f"./testdata/{dbname}")
96
+ rel_db_path_to_command = f"ingestr/testdata/{dbname}"
86
97
 
87
98
  conn = duckdb.connect(abs_db_path)
88
99
  conn.execute("DROP SCHEMA IF EXISTS testschema CASCADE")
@@ -103,6 +114,8 @@ def test_create_replace():
103
114
  "testschema.output",
104
115
  )
105
116
 
117
+ print(result.stdout)
118
+
106
119
  assert result.exit_code == 0
107
120
 
108
121
  res = conn.sql(
@@ -112,8 +125,13 @@ def test_create_replace():
112
125
  assert res[0] == (1, "val1", "2022-01-01")
113
126
  assert res[1] == (2, "val2", "2022-02-01")
114
127
 
128
+ try:
129
+ os.remove(abs_db_path)
130
+ except Exception:
131
+ pass
132
+
115
133
 
116
- def test_append():
134
+ def test_append_duckdb_to_duckdb():
117
135
  try:
118
136
  shutil.rmtree(get_abs_path("../pipeline_data"))
119
137
  except Exception:
@@ -172,8 +190,13 @@ def test_append():
172
190
  assert res[0] == (1, "val1", "2022-01-01")
173
191
  assert res[1] == (2, "val2", "2022-01-02")
174
192
 
193
+ try:
194
+ os.remove(abs_db_path)
195
+ except Exception:
196
+ pass
175
197
 
176
- def test_merge_with_primary_key():
198
+
199
+ def test_merge_with_primary_key_duckdb_to_duckdb():
177
200
  try:
178
201
  shutil.rmtree(get_abs_path("../pipeline_data"))
179
202
  except Exception:
@@ -325,8 +348,13 @@ def test_merge_with_primary_key():
325
348
  assert count_by_run_id[2][1] == 1
326
349
  ##############################
327
350
 
351
+ try:
352
+ os.remove(abs_db_path)
353
+ except Exception:
354
+ pass
355
+
328
356
 
329
- def test_delete_insert_without_primary_key():
357
+ def test_delete_insert_without_primary_key_duckdb_to_duckdb():
330
358
  try:
331
359
  shutil.rmtree(get_abs_path("../pipeline_data"))
332
360
  except Exception:
@@ -435,8 +463,13 @@ def test_delete_insert_without_primary_key():
435
463
  assert count_by_run_id[1][1] == 1
436
464
  ##############################
437
465
 
466
+ try:
467
+ os.remove(abs_db_path)
468
+ except Exception:
469
+ pass
438
470
 
439
- def test_delete_insert_with_timerange():
471
+
472
+ def test_delete_insert_with_timerange_duckdb_to_duckdb():
440
473
  try:
441
474
  shutil.rmtree(get_abs_path("../pipeline_data"))
442
475
  except Exception:
@@ -593,3 +626,250 @@ def test_delete_insert_with_timerange():
593
626
  assert count_by_run_id[1][1] == 2
594
627
  assert count_by_run_id[2][1] == 2
595
628
  ##############################
629
+
630
+ try:
631
+ os.remove(abs_db_path)
632
+ except Exception:
633
+ pass
634
+
635
+
636
+ ### These are CSV-to-DuckDB tests
637
+ def test_create_replace_csv_to_duckdb():
638
+ try:
639
+ shutil.rmtree(get_abs_path("../pipeline_data"))
640
+ except Exception:
641
+ pass
642
+
643
+ abs_db_path = get_abs_path("./testdata/test_create_replace_csv.db")
644
+ rel_db_path_to_command = "ingestr/testdata/test_create_replace_csv.db"
645
+ rel_source_path_to_command = "ingestr/testdata/create_replace.csv"
646
+
647
+ conn = duckdb.connect(abs_db_path)
648
+
649
+ result = invoke_ingest_command(
650
+ f"csv://{rel_source_path_to_command}",
651
+ "testschema.input",
652
+ f"duckdb:///{rel_db_path_to_command}",
653
+ "testschema.output",
654
+ )
655
+
656
+ assert result.exit_code == 0
657
+
658
+ res = conn.sql(
659
+ "select symbol, date, is_enabled, name from testschema.output"
660
+ ).fetchall()
661
+
662
+ # read CSV file
663
+ actual_rows = []
664
+ with open(get_abs_path("./testdata/create_replace.csv"), "r") as f:
665
+ reader = csv.reader(f, delimiter=",", quotechar='"')
666
+ next(reader, None)
667
+ for row in reader:
668
+ actual_rows.append(row)
669
+
670
+ # compare the CSV file with the DuckDB table
671
+ assert len(res) == len(actual_rows)
672
+ for i, row in enumerate(actual_rows):
673
+ assert res[i] == tuple(row)
674
+
675
+ try:
676
+ os.remove(abs_db_path)
677
+ except Exception:
678
+ pass
679
+
680
+
681
+ def get_random_string(length):
682
+ letters = string.ascii_lowercase
683
+ result_str = "".join(random.choice(letters) for i in range(length))
684
+ return result_str
685
+
686
+
687
+ def test_merge_with_primary_key_csv_to_duckdb():
688
+ try:
689
+ shutil.rmtree(get_abs_path("../pipeline_data"))
690
+ except Exception:
691
+ pass
692
+
693
+ dbname = f"test_merge_with_primary_key_csv{get_random_string(5)}.db"
694
+ abs_db_path = get_abs_path(f"./testdata/{dbname}")
695
+ rel_db_path_to_command = f"ingestr/testdata/{dbname}"
696
+ uri = f"duckdb:///{rel_db_path_to_command}"
697
+
698
+ conn = duckdb.connect(abs_db_path)
699
+
700
+ def run(source: str):
701
+ res = invoke_ingest_command(
702
+ source,
703
+ "whatever", # table name doesnt matter for CSV
704
+ uri,
705
+ "testschema_merge.output",
706
+ "merge",
707
+ "date",
708
+ "symbol",
709
+ )
710
+ assert res.exit_code == 0
711
+ return res
712
+
713
+ def get_output_rows():
714
+ conn.execute("CHECKPOINT")
715
+ return conn.sql(
716
+ "select symbol, date, is_enabled, name from testschema_merge.output order by symbol asc"
717
+ ).fetchall()
718
+
719
+ def assert_output_equals_to_csv(path: str):
720
+ res = get_output_rows()
721
+ actual_rows = []
722
+ with open(get_abs_path(path), "r") as f:
723
+ reader = csv.reader(f, delimiter=",", quotechar='"')
724
+ next(reader, None)
725
+ for row in reader:
726
+ actual_rows.append(row)
727
+
728
+ assert len(res) == len(actual_rows)
729
+ for i, row in enumerate(actual_rows):
730
+ assert res[i] == tuple(row)
731
+
732
+ run("csv://ingestr/testdata/merge_part1.csv")
733
+ assert_output_equals_to_csv("./testdata/merge_part1.csv")
734
+
735
+ first_run_id = conn.sql(
736
+ "select _dlt_load_id from testschema_merge.output limit 1"
737
+ ).fetchall()[0][0]
738
+
739
+ ##############################
740
+ # we'll run again, we don't expect any changes since the data hasn't changed
741
+ run("csv://ingestr/testdata/merge_part1.csv")
742
+ assert_output_equals_to_csv("./testdata/merge_part1.csv")
743
+
744
+ # we also ensure that the other rows were not touched
745
+ count_by_run_id = conn.sql(
746
+ "select _dlt_load_id, count(*) from testschema_merge.output group by 1"
747
+ ).fetchall()
748
+ assert len(count_by_run_id) == 1
749
+ assert count_by_run_id[0][1] == 3
750
+ assert count_by_run_id[0][0] == first_run_id
751
+ ##############################
752
+
753
+ ##############################
754
+ # now we'll run the same ingestion but with a different file this time
755
+
756
+ run("csv://ingestr/testdata/merge_part2.csv")
757
+ assert_output_equals_to_csv("./testdata/merge_expected.csv")
758
+
759
+ # let's check the runs
760
+ count_by_run_id = conn.sql(
761
+ "select _dlt_load_id, count(*) from testschema_merge.output group by 1 order by 1 asc"
762
+ ).fetchall()
763
+
764
+ # we expect that there's a new load ID now
765
+ assert len(count_by_run_id) == 2
766
+
767
+ # there should be only one row with the first load ID
768
+ assert count_by_run_id[0][1] == 1
769
+ assert count_by_run_id[0][0] == first_run_id
770
+
771
+ # there should be a new run with the rest, 2 rows updated + 1 new row
772
+ assert count_by_run_id[1][1] == 3
773
+ ##############################
774
+
775
+ try:
776
+ os.remove(abs_db_path)
777
+ except Exception:
778
+ pass
779
+
780
+
781
+ def test_delete_insert_without_primary_key_csv_to_duckdb():
782
+ try:
783
+ shutil.rmtree(get_abs_path("../pipeline_data"))
784
+ except Exception:
785
+ pass
786
+
787
+ dbname = f"test_merge_with_primary_key_csv{get_random_string(5)}.db"
788
+ abs_db_path = get_abs_path(f"./testdata/{dbname}")
789
+ rel_db_path_to_command = f"ingestr/testdata/{dbname}"
790
+ uri = f"duckdb:///{rel_db_path_to_command}"
791
+
792
+ conn = duckdb.connect(abs_db_path)
793
+
794
+ def run(source: str):
795
+ res = invoke_ingest_command(
796
+ source,
797
+ "whatever", # table name doesnt matter for CSV
798
+ uri,
799
+ "testschema.output",
800
+ "delete+insert",
801
+ "date",
802
+ )
803
+ assert res.exit_code == 0
804
+ return res
805
+
806
+ def get_output_rows():
807
+ conn.execute("CHECKPOINT")
808
+ return conn.sql(
809
+ "select symbol, date, is_enabled, name from testschema.output order by symbol asc"
810
+ ).fetchall()
811
+
812
+ def assert_output_equals_to_csv(path: str):
813
+ res = get_output_rows()
814
+ actual_rows = []
815
+ with open(get_abs_path(path), "r") as f:
816
+ reader = csv.reader(f, delimiter=",", quotechar='"')
817
+ next(reader, None)
818
+ for row in reader:
819
+ actual_rows.append(row)
820
+
821
+ assert len(res) == len(actual_rows)
822
+ for i, row in enumerate(actual_rows):
823
+ assert res[i] == tuple(row)
824
+
825
+ run("csv://ingestr/testdata/delete_insert_part1.csv")
826
+ assert_output_equals_to_csv("./testdata/delete_insert_part1.csv")
827
+
828
+ first_run_id = conn.sql(
829
+ "select _dlt_load_id from testschema.output limit 1"
830
+ ).fetchall()[0][0]
831
+
832
+ ##############################
833
+ # we'll run again, we expect the data to be the same, but a new load_id to exist
834
+ # this is due to the fact that the old data won't be touched, but the ones with the
835
+ # latest value will be rewritten
836
+ run("csv://ingestr/testdata/delete_insert_part1.csv")
837
+ assert_output_equals_to_csv("./testdata/delete_insert_part1.csv")
838
+
839
+ # we also ensure that the other rows were not touched
840
+ count_by_run_id = conn.sql(
841
+ "select _dlt_load_id, count(*) from testschema.output group by 1 order by 1 asc"
842
+ ).fetchall()
843
+
844
+ assert len(count_by_run_id) == 2
845
+ assert count_by_run_id[0][1] == 1
846
+ assert count_by_run_id[0][0] == first_run_id
847
+ assert count_by_run_id[1][1] == 3
848
+ ##############################
849
+
850
+ ##############################
851
+ # now we'll run the same ingestion but with a different file this time
852
+
853
+ run("csv://ingestr/testdata/delete_insert_part2.csv")
854
+ assert_output_equals_to_csv("./testdata/delete_insert_expected.csv")
855
+
856
+ # let's check the runs
857
+ count_by_run_id = conn.sql(
858
+ "select _dlt_load_id, count(*) from testschema.output group by 1 order by 1 asc"
859
+ ).fetchall()
860
+
861
+ # we expect that there's a new load ID now
862
+ assert len(count_by_run_id) == 2
863
+
864
+ # there should be only one row with the first load ID, oldest date
865
+ assert count_by_run_id[0][1] == 1
866
+ assert count_by_run_id[0][0] == first_run_id
867
+
868
+ # there should be a new run with the rest, 3 rows updated + 1 new row
869
+ assert count_by_run_id[1][1] == 4
870
+ ##############################
871
+
872
+ try:
873
+ os.remove(abs_db_path)
874
+ except Exception:
875
+ pass
@@ -16,6 +16,7 @@ from ingestr.src.destinations import (
16
16
  )
17
17
  from ingestr.src.sources import (
18
18
  GoogleSheetsSource,
19
+ GorgiasSource,
19
20
  LocalCsvSource,
20
21
  MongoDbSource,
21
22
  NotionSource,
@@ -45,6 +46,9 @@ class SourceProtocol(Protocol):
45
46
  def dlt_source(self, uri: str, table: str, **kwargs):
46
47
  pass
47
48
 
49
+ def handles_incrementality(self) -> bool:
50
+ pass
51
+
48
52
 
49
53
  class DestinationProtocol(Protocol):
50
54
  def dlt_dest(self, uri: str, **kwargs) -> Destination:
@@ -94,6 +98,8 @@ class SourceDestinationFactory:
94
98
  return GoogleSheetsSource()
95
99
  elif self.source_scheme == "shopify":
96
100
  return ShopifySource()
101
+ elif self.source_scheme == "gorgias":
102
+ return GorgiasSource()
97
103
  else:
98
104
  raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
99
105