ingestr 0.7.4__tar.gz → 0.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (98) hide show
  1. {ingestr-0.7.4 → ingestr-0.7.6}/.github/workflows/tests.yml +25 -25
  2. {ingestr-0.7.4 → ingestr-0.7.6}/PKG-INFO +21 -5
  3. {ingestr-0.7.4 → ingestr-0.7.6}/README.md +19 -4
  4. {ingestr-0.7.4 → ingestr-0.7.6}/docs/.vitepress/config.mjs +27 -8
  5. {ingestr-0.7.4 → ingestr-0.7.6}/docs/commands/example-uris.md +1 -1
  6. {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/quickstart.md +1 -1
  7. ingestr-0.7.6/docs/supported-sources/chess.md +37 -0
  8. ingestr-0.7.6/docs/supported-sources/hubspot.md +45 -0
  9. ingestr-0.7.6/docs/supported-sources/stripe.md +45 -0
  10. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/main.py +23 -2
  11. ingestr-0.7.6/ingestr/src/chess/__init__.py +166 -0
  12. ingestr-0.7.6/ingestr/src/chess/helpers.py +21 -0
  13. ingestr-0.7.6/ingestr/src/chess/settings.py +4 -0
  14. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/factory.py +9 -0
  15. ingestr-0.7.6/ingestr/src/hubspot/__init__.py +281 -0
  16. ingestr-0.7.6/ingestr/src/hubspot/helpers.py +188 -0
  17. ingestr-0.7.6/ingestr/src/hubspot/settings.py +99 -0
  18. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sources.py +136 -0
  19. ingestr-0.7.6/ingestr/src/stripe_analytics/__init__.py +99 -0
  20. ingestr-0.7.6/ingestr/src/stripe_analytics/helpers.py +68 -0
  21. ingestr-0.7.6/ingestr/src/stripe_analytics/settings.py +14 -0
  22. ingestr-0.7.6/ingestr/src/version.py +1 -0
  23. {ingestr-0.7.4 → ingestr-0.7.6}/package-lock.json +233 -118
  24. {ingestr-0.7.4 → ingestr-0.7.6}/package.json +1 -1
  25. {ingestr-0.7.4 → ingestr-0.7.6}/pyproject.toml +6 -0
  26. {ingestr-0.7.4 → ingestr-0.7.6}/requirements.txt +1 -0
  27. ingestr-0.7.4/docs/supported-sources/overview.md +0 -104
  28. ingestr-0.7.4/ingestr/src/version.py +0 -1
  29. {ingestr-0.7.4 → ingestr-0.7.6}/.dockerignore +0 -0
  30. {ingestr-0.7.4 → ingestr-0.7.6}/.github/workflows/deploy-docs.yml +0 -0
  31. {ingestr-0.7.4 → ingestr-0.7.6}/.gitignore +0 -0
  32. {ingestr-0.7.4 → ingestr-0.7.6}/.python-version +0 -0
  33. {ingestr-0.7.4 → ingestr-0.7.6}/Dockerfile +0 -0
  34. {ingestr-0.7.4 → ingestr-0.7.6}/LICENSE.md +0 -0
  35. {ingestr-0.7.4 → ingestr-0.7.6}/Makefile +0 -0
  36. {ingestr-0.7.4 → ingestr-0.7.6}/docs/.vitepress/theme/custom.css +0 -0
  37. {ingestr-0.7.4 → ingestr-0.7.6}/docs/.vitepress/theme/index.js +0 -0
  38. {ingestr-0.7.4 → ingestr-0.7.6}/docs/commands/ingest.md +0 -0
  39. {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/core-concepts.md +0 -0
  40. {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/incremental-loading.md +0 -0
  41. {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/telemetry.md +0 -0
  42. {ingestr-0.7.4 → ingestr-0.7.6}/docs/index.md +0 -0
  43. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/bigquery.md +0 -0
  44. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/csv.md +0 -0
  45. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/databricks.md +0 -0
  46. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/duckdb.md +0 -0
  47. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/gorgias.md +0 -0
  48. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/gsheets.md +0 -0
  49. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/mongodb.md +0 -0
  50. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/mssql.md +0 -0
  51. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/mysql.md +0 -0
  52. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/notion.md +0 -0
  53. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/oracle.md +0 -0
  54. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/postgres.md +0 -0
  55. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/redshift.md +0 -0
  56. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/sap-hana.md +0 -0
  57. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/shopify.md +0 -0
  58. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/snowflake.md +0 -0
  59. {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/sqlite.md +0 -0
  60. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/destinations.py +0 -0
  61. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/README.md +0 -0
  62. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/__init__.py +0 -0
  63. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  64. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  65. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  66. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/gorgias/__init__.py +0 -0
  67. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/gorgias/helpers.py +0 -0
  68. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/mongodb/__init__.py +0 -0
  69. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/mongodb/helpers.py +0 -0
  70. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/__init__.py +0 -0
  71. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/helpers/__init__.py +0 -0
  72. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/helpers/client.py +0 -0
  73. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/helpers/database.py +0 -0
  74. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/settings.py +0 -0
  75. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/__init__.py +0 -0
  76. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/exceptions.py +0 -0
  77. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/helpers.py +0 -0
  78. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/settings.py +0 -0
  79. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/__init__.py +0 -0
  80. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/arrow_helpers.py +0 -0
  81. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/helpers.py +0 -0
  82. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/override.py +0 -0
  83. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/schema_types.py +0 -0
  84. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/table_definition.py +0 -0
  85. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/telemetry/event.py +0 -0
  86. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  87. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/.gitignore +0 -0
  88. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/create_replace.csv +0 -0
  89. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/delete_insert_expected.csv +0 -0
  90. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/delete_insert_part1.csv +0 -0
  91. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/delete_insert_part2.csv +0 -0
  92. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/merge_expected.csv +0 -0
  93. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/merge_part1.csv +0 -0
  94. {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/merge_part2.csv +0 -0
  95. {ingestr-0.7.4 → ingestr-0.7.6}/requirements-dev.txt +0 -0
  96. {ingestr-0.7.4 → ingestr-0.7.6}/resources/demo.gif +0 -0
  97. {ingestr-0.7.4 → ingestr-0.7.6}/resources/demo.tape +0 -0
  98. {ingestr-0.7.4 → ingestr-0.7.6}/resources/ingestr.svg +0 -0
@@ -19,33 +19,33 @@ env:
19
19
 
20
20
 
21
21
  jobs:
22
- # tests:
23
- # runs-on: ubuntu-latest
24
- # steps:
25
- # - uses: actions/checkout@v4
26
- # - name: install Microsoft ODBC
27
- # run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
28
- # - uses: actions/setup-python@v4
29
- # with:
30
- # python-version: '3.11'
31
- # cache: 'pip'
32
- # - name: Cache dependencies
33
- # uses: actions/cache@v3
34
- # id: cache
35
- # with:
36
- # path: ${{ env.pythonLocation }}
37
- # key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
38
- # - name: Install pip dependencies
39
- # if: steps.cache.outputs.cache-hit != 'true'
40
- # run: make deps-ci
41
- # - name: run tests
42
- # run: make test-ci
43
- # - name: check the formatting
44
- # run: make lint-ci
22
+ tests:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - name: install Microsoft ODBC
27
+ run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
28
+ - uses: actions/setup-python@v4
29
+ with:
30
+ python-version: '3.11'
31
+ cache: 'pip'
32
+ - name: Cache dependencies
33
+ uses: actions/cache@v3
34
+ id: cache
35
+ with:
36
+ path: ${{ env.pythonLocation }}
37
+ key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
38
+ - name: Install pip dependencies
39
+ if: steps.cache.outputs.cache-hit != 'true'
40
+ run: make deps-ci
41
+ - name: run tests
42
+ run: make test-ci
43
+ - name: check the formatting
44
+ run: make lint-ci
45
45
 
46
46
  build-and-push-image:
47
- # needs: tests
48
- # if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')
47
+ needs: tests
48
+ if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')
49
49
  runs-on: ubuntu-latest
50
50
  permissions:
51
51
  contents: read
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.4
3
+ Version: 0.7.6
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -38,6 +38,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
38
38
  Requires-Dist: sqlalchemy-redshift==0.8.14
39
39
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
40
40
  Requires-Dist: sqlalchemy==1.4.52
41
+ Requires-Dist: stripe==10.7.0
41
42
  Requires-Dist: tqdm==4.66.2
42
43
  Requires-Dist: typer==0.12.3
43
44
  Description-Content-Type: text/markdown
@@ -172,25 +173,40 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
172
173
  <tr>
173
174
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
174
175
  </tr>
176
+ <tr>
177
+ <td>Chess.com</td>
178
+ <td>✅</td>
179
+ <td>-</td>
180
+ </tr>
175
181
  <tr>
176
182
  <td>Gorgias</td>
177
183
  <td>✅</td>
178
- <td>❌</td>
184
+ <td>-</td>
179
185
  </tr>
180
186
  <tr>
181
187
  <td>Google Sheets</td>
182
188
  <td>✅</td>
183
- <td>❌</td>
189
+ <td>-</td>
190
+ </tr>
191
+ <tr>
192
+ <td>HubSpot</td>
193
+ <td>✅</td>
194
+ <td>-</td>
184
195
  </tr>
185
196
  <tr>
186
197
  <td>Notion</td>
187
198
  <td>✅</td>
188
- <td>❌</td>
199
+ <td>-</td>
189
200
  </tr>
190
201
  <tr>
191
202
  <td>Shopify</td>
192
203
  <td>✅</td>
193
- <td>❌</td>
204
+ <td>-</td>
205
+ </tr>
206
+ <tr>
207
+ <td>Stripe</td>
208
+ <td>✅</td>
209
+ <td>-</td>
194
210
  </tr>
195
211
  </table>
196
212
 
@@ -128,25 +128,40 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
128
128
  <tr>
129
129
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
130
130
  </tr>
131
+ <tr>
132
+ <td>Chess.com</td>
133
+ <td>✅</td>
134
+ <td>-</td>
135
+ </tr>
131
136
  <tr>
132
137
  <td>Gorgias</td>
133
138
  <td>✅</td>
134
- <td>❌</td>
139
+ <td>-</td>
135
140
  </tr>
136
141
  <tr>
137
142
  <td>Google Sheets</td>
138
143
  <td>✅</td>
139
- <td>❌</td>
144
+ <td>-</td>
145
+ </tr>
146
+ <tr>
147
+ <td>HubSpot</td>
148
+ <td>✅</td>
149
+ <td>-</td>
140
150
  </tr>
141
151
  <tr>
142
152
  <td>Notion</td>
143
153
  <td>✅</td>
144
- <td>❌</td>
154
+ <td>-</td>
145
155
  </tr>
146
156
  <tr>
147
157
  <td>Shopify</td>
148
158
  <td>✅</td>
149
- <td>❌</td>
159
+ <td>-</td>
160
+ </tr>
161
+ <tr>
162
+ <td>Stripe</td>
163
+ <td>✅</td>
164
+ <td>-</td>
150
165
  </tr>
151
166
  </table>
152
167
 
@@ -6,7 +6,13 @@ export default defineConfig({
6
6
  description: "Ingest & copy data between any source and any destination",
7
7
  base: "/ingestr/",
8
8
  head: [
9
- ["script", { async: "", src: "https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ" }],
9
+ [
10
+ "script",
11
+ {
12
+ async: "",
13
+ src: "https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ",
14
+ },
15
+ ],
10
16
  [
11
17
  "script",
12
18
  {},
@@ -22,7 +28,7 @@ export default defineConfig({
22
28
  { text: "Home", link: "/" },
23
29
  { text: "Getting started", link: "/getting-started/quickstart.md" },
24
30
  ],
25
- outline: 'deep',
31
+ outline: "deep",
26
32
 
27
33
  sidebar: [
28
34
  {
@@ -30,7 +36,10 @@ export default defineConfig({
30
36
  items: [
31
37
  { text: "Quickstart", link: "/getting-started/quickstart.md" },
32
38
  { text: "Core Concepts", link: "/getting-started/core-concepts.md" },
33
- { text: "Incremental Loading", link: "/getting-started/incremental-loading.md" },
39
+ {
40
+ text: "Incremental Loading",
41
+ link: "/getting-started/incremental-loading.md",
42
+ },
34
43
  { text: "Telemetry", link: "/getting-started/telemetry.md" },
35
44
  ],
36
45
  },
@@ -44,7 +53,6 @@ export default defineConfig({
44
53
  {
45
54
  text: "Sources & Destinations",
46
55
  items: [
47
- { text: "Overview", link: "/supported-sources/overview.md" },
48
56
  {
49
57
  text: "Databases",
50
58
  collapsed: false,
@@ -52,9 +60,15 @@ export default defineConfig({
52
60
  { text: "AWS Redshift", link: "/supported-sources/redshift.md" },
53
61
  { text: "Databricks", link: "/supported-sources/databricks.md" },
54
62
  { text: "DuckDB", link: "/supported-sources/duckdb.md" },
55
- { text: "Google BigQuery", link: "/supported-sources/bigquery.md" },
63
+ {
64
+ text: "Google BigQuery",
65
+ link: "/supported-sources/bigquery.md",
66
+ },
56
67
  { text: "Local CSV Files", link: "/supported-sources/csv.md" },
57
- { text: "Microsoft SQL Server", link: "/supported-sources/mssql.md" },
68
+ {
69
+ text: "Microsoft SQL Server",
70
+ link: "/supported-sources/mssql.md",
71
+ },
58
72
  { text: "MongoDB", link: "/supported-sources/mongodb.md" },
59
73
  { text: "MySQL", link: "/supported-sources/mysql.md" },
60
74
  { text: "Oracle", link: "/supported-sources/oracle.md" },
@@ -69,16 +83,21 @@ export default defineConfig({
69
83
  text: "Platforms",
70
84
  collapsed: false,
71
85
  items: [
72
- { text: "Gorgias", link: "/supported-sources/gorgias.md" },
86
+ { text: "Chess.com", link: "/supported-sources/chess.md" },
73
87
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
88
+ { text: "Gorgias", link: "/supported-sources/gorgias.md" },
89
+ { text: "HubSpot", link: "/supported-sources/hubspot.md" },
74
90
  { text: "Notion", link: "/supported-sources/notion.md" },
75
91
  { text: "Shopify", link: "/supported-sources/shopify.md" },
92
+ { text: "Stripe", link: "/supported-sources/stripe.md" },
76
93
  ],
77
94
  },
78
95
  ],
79
96
  },
80
97
  ],
81
98
 
82
- socialLinks: [{ icon: "github", link: "https://github.com/bruin-data/ingestr" }],
99
+ socialLinks: [
100
+ { icon: "github", link: "https://github.com/bruin-data/ingestr" },
101
+ ],
83
102
  },
84
103
  });
@@ -2,4 +2,4 @@
2
2
 
3
3
  This command is supposed to serve as a guide for the user to understand the various URI formats that are supported by the `ingestr` tool. The command will provide a list of supported sources and destinations, along with the URI format for each of them.
4
4
 
5
- For the detailed documentation, please refer to the [Sources & Destinations](../supported-sources/overview.md) section.
5
+ For the detailed documentation, please refer to the Sources & Destinations section on the sidebar.
@@ -36,4 +36,4 @@ This command will:
36
36
 
37
37
  ## Supported Sources & Destinations
38
38
 
39
- See the [Supported Sources & Destinations](/supported-sources/overview.md) page for a list of all supported sources and destinations. More to come soon!
39
+ See the Supported Sources & Destinations page for a list of all supported sources and destinations. More to come soon!
@@ -0,0 +1,37 @@
1
+ # Chess.com
2
+
3
+ [Chess.com](https://www.chess.com/) is an online platform offering chess games, tournaments, lessons, and more.
4
+
5
+ ingestr supports Chess.com as a source, primarily to play around with the data of players, games, and more since it doesn't require any authentication.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Chess is as follows:
10
+
11
+ ```plaintext
12
+ --source-uri 'chess://?players=<List[str]>'
13
+ ```
14
+
15
+ URI parameter:
16
+
17
+ - `players`: A list of players usernames for which you want to fetch data. If no usernames are provided, then data of 4 different players will be fetched.
18
+
19
+ ## Setting up a Chess Integration
20
+
21
+ Let's say you have a list of player usernames: max2 and peter23. Here's a sample command that will copy the data from Chess into a DuckDB database:
22
+
23
+ ```sh
24
+ ingestr ingest --source-uri 'chess://?players=max2,peter23' --source-table 'profiles' --dest-uri 'duckdb:///chess.duckdb' --dest-table 'players.profiles'
25
+ ```
26
+
27
+ The result of this command will be a table in the `chess.duckdb` database.
28
+
29
+ ## Available Tables
30
+
31
+ Chess source allows ingesting the following sources into separate tables:
32
+
33
+ - `profiles`: Retrives player profiles based on a list of player usernames.
34
+ - `games`: Retrives players' games for specified players.
35
+ - `archives`: Retrives url to game archives for specified players.
36
+
37
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -0,0 +1,45 @@
1
+ # HubSpot
2
+
3
+ [HubSpot](https://www.hubspot.com/) is a customer relationship management software that helps businesses attract visitors, connect with customers, and close deals.
4
+
5
+ ingestr supports HubSpot as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for HubSpot is as follows:
10
+
11
+ ```plaintext
12
+ hubspot://?api_key=<api-key-here>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `api_key`: The API key is used for authentication with the HubSpot API.
18
+
19
+ The URI is used to connect to the HubSpot API for extracting data.
20
+
21
+ ## Setting up a HubSpot Integration
22
+
23
+ Hubspot requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/hubspot#setup-guide).
24
+
25
+ Once you complete the guide, you should have an API key. Let's say your API key is `pat_test_12345`, here's a sample command that will copy the data from HubSpot into a duckdb database:
26
+
27
+ ```sh
28
+ ingestr ingest --source-uri 'hubspot://?api_key=pat_test_12345' --source-table 'companies' --dest-uri duckdb:///hubspot.duckdb --dest-table 'companies.data'
29
+ ```
30
+
31
+ The result of this command will be a table in the `hubspot.duckdb` database.
32
+
33
+ ## Available Tables
34
+
35
+ HubSpot source allows ingesting the following sources into separate tables:
36
+
37
+ - `companies`: Retrieves information about organizations.
38
+ - `deals`: Retrieves deal records and tracks deal progress.
39
+ - `products`: Retrieves pricing information of products.
40
+ - `tickets`: Handles requests for help from customers or users.
41
+ - `quotes`: Retrieves price proposals that salespeople can create and send to their contacts.
42
+ - `hubspot_events_for_objects`: Retrieves web analytics events for a given object type and object IDs.
43
+ - `contacts`: Retrieves information about visitors, potential customers, and leads.
44
+
45
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -0,0 +1,45 @@
1
+ # Stripe
2
+ [Stripe](https://www.stripe.com/) is a technology company that builds economic infrastructure for the internet, providing payment processing software and APIs for e-commerce websites and mobile applications.
3
+
4
+ ingestr supports Stripe as a source.
5
+
6
+ ## URI Format
7
+ The URI format for Stripe is as follows:
8
+
9
+ ```plaintext
10
+ stripe://?api_key=<api-key-here>
11
+ ```
12
+
13
+ URI parameters:
14
+ - `api_key`: the API key used for authentication with the Stripe API
15
+
16
+ The URI is used to connect to the Stripe API for extracting data. More details on setting up Stripe integrations can be found [here](https://stripe.com/docs/api).
17
+
18
+ ## Setting up a Stripe Integration
19
+
20
+ Stripe requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/stripe#setup-guide).
21
+
22
+ Once you complete the guide, you should have an API key. Let's say your API key is `sk_test_12345`, here's a sample command that will copy the data from Stripe into a duckdb database:
23
+
24
+ ```sh
25
+ ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table 'stripe.charges'
26
+ ```
27
+
28
+ The result of this command will be a table in the `stripe.duckdb` database with JSON columns.
29
+
30
+ ## Available Tables
31
+ Stripe source allows ingesting the following sources into separate tables:
32
+ - `subscription`: Represents a customer's subscription to a recurring service, detailing billing cycles, plans, and status.
33
+ - `account`: Contains information about a Stripe account, including balances, payouts, and account settings.
34
+ - `coupon`: Stores data about discount codes or coupons that can be applied to invoices, subscriptions, or other charges.
35
+ - `customer`: Holds information about customers, such as billing details, payment methods, and associated transactions.
36
+ - `product`: Represents products that can be sold or subscribed to, including metadata and pricing information.
37
+ - `price`: Contains pricing information for products, including currency, amount, and billing intervals.
38
+ - `balancetransaction`: Records transactions that affect the Stripe account balance, such as charges, refunds, and payouts.
39
+ - `invoice`: Represents invoices sent to customers, detailing line items, amounts, and payment status.
40
+ - `event`: Logs all events in the Stripe account, including customer actions, account updates, and system-generated events.
41
+
42
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
43
+
44
+ > [!WARNING]
45
+ > Stripe does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
+ import tempfile
4
5
  from typing import Optional
5
6
 
6
7
  import dlt
@@ -236,6 +237,13 @@ def ingest(
236
237
  envvar="SCHEMA_NAMING",
237
238
  ),
238
239
  ] = SchemaNaming.default, # type: ignore
240
+ pipelines_dir: Annotated[
241
+ Optional[str],
242
+ typer.Option(
243
+ help="The path to store dlt-related pipeline metadata. By default, ingestr will create a temporary directory and delete it after the execution is done in order to make retries stateless.",
244
+ envvar="PIPELINES_DIR",
245
+ ),
246
+ ] = None, # type: ignore
239
247
  ):
240
248
  track(
241
249
  "command_triggered",
@@ -280,13 +288,18 @@ def ingest(
280
288
  if progress == Progress.log:
281
289
  progressInstance = LogCollector(dump_system_stats=False)
282
290
 
291
+ is_pipelines_dir_temp = False
292
+ if pipelines_dir is None:
293
+ pipelines_dir = tempfile.mkdtemp()
294
+ is_pipelines_dir_temp = True
295
+
283
296
  pipeline = dlt.pipeline(
284
297
  pipeline_name=m.hexdigest(),
285
298
  destination=destination.dlt_dest(
286
299
  uri=dest_uri,
287
300
  ),
288
301
  progress=progressInstance,
289
- pipelines_dir="pipeline_data",
302
+ pipelines_dir=pipelines_dir,
290
303
  refresh="drop_resources" if full_refresh else None,
291
304
  )
292
305
 
@@ -362,6 +375,8 @@ def ingest(
362
375
  if incremental_strategy != IncrementalStrategy.none:
363
376
  write_disposition = incremental_strategy.value
364
377
 
378
+ start_time = datetime.now()
379
+
365
380
  run_info: LoadInfo = pipeline.run(
366
381
  dlt_source,
367
382
  **destination.dlt_run_params(
@@ -389,11 +404,17 @@ def ingest(
389
404
 
390
405
  destination.post_load()
391
406
 
407
+ end_time = datetime.now()
392
408
  elapsedHuman = ""
393
409
  if run_info.started_at:
394
- elapsed = run_info.finished_at - run_info.started_at
410
+ elapsed = end_time - start_time
395
411
  elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
396
412
 
413
+ # remove the pipelines_dir folder if it was created by ingestr
414
+ if is_pipelines_dir_temp:
415
+ import shutil
416
+ shutil.rmtree(pipelines_dir)
417
+
397
418
  print(
398
419
  f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
399
420
  )
@@ -0,0 +1,166 @@
1
+ """A source loading player profiles and games from chess.com api"""
2
+
3
+ from typing import Any, Callable, Dict, Iterator, List, Sequence
4
+
5
+ import dlt
6
+ from dlt.common import pendulum
7
+ from dlt.common.typing import TDataItem
8
+ from dlt.sources import DltResource
9
+ from dlt.sources.helpers import requests
10
+
11
+ from .helpers import get_path_with_retry, get_url_with_retry, validate_month_string
12
+ from .settings import UNOFFICIAL_CHESS_API_URL
13
+
14
+
15
+ @dlt.source(name="chess")
16
+ def source(
17
+ players: List[str], start_month: str = None, end_month: str = None
18
+ ) -> Sequence[DltResource]:
19
+ """
20
+ A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing
21
+ various types of data: user profiles or chess match results
22
+ Args:
23
+ players (List[str]): A list of the player usernames for which to get the data.
24
+ start_month (str, optional): Filters out all the matches happening before `start_month`. Defaults to None.
25
+ end_month (str, optional): Filters out all the matches happening after `end_month`. Defaults to None.
26
+ Returns:
27
+ Sequence[DltResource]: A sequence of resources that can be selected from including players_profiles,
28
+ players_archives, players_games, players_online_status
29
+ """
30
+ return (
31
+ players_profiles(players),
32
+ players_archives(players),
33
+ players_games(players, start_month=start_month, end_month=end_month),
34
+ players_online_status(players),
35
+ )
36
+
37
+
38
+ @dlt.resource(
39
+ write_disposition="replace",
40
+ columns={
41
+ "last_online": {"data_type": "timestamp"},
42
+ "joined": {"data_type": "timestamp"},
43
+ },
44
+ )
45
+ def players_profiles(players: List[str]) -> Iterator[TDataItem]:
46
+ """
47
+ Yields player profiles for a list of player usernames.
48
+ Args:
49
+ players (List[str]): List of player usernames to retrieve profiles for.
50
+ Yields:
51
+ Iterator[TDataItem]: An iterator over player profiles data.
52
+ """
53
+
54
+ # get archives in parallel by decorating the http request with defer
55
+ @dlt.defer
56
+ def _get_profile(username: str) -> TDataItem:
57
+ return get_path_with_retry(f"player/{username}")
58
+
59
+ for username in players:
60
+ yield _get_profile(username)
61
+
62
+
63
+ @dlt.resource(write_disposition="replace", selected=False)
64
+ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
65
+ """
66
+ Yields url to game archives for specified players.
67
+ Args:
68
+ players (List[str]): List of player usernames to retrieve archives for.
69
+ Yields:
70
+ Iterator[List[TDataItem]]: An iterator over list of player archive data.
71
+ """
72
+ for username in players:
73
+ data = get_path_with_retry(f"player/{username}/games/archives")
74
+ yield data.get("archives", [])
75
+
76
+
77
+ @dlt.resource(
78
+ write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
79
+ )
80
+ def players_games(
81
+ players: List[str], start_month: str = None, end_month: str = None
82
+ ) -> Iterator[Callable[[], List[TDataItem]]]:
83
+ """
84
+ Yields `players` games that happened between `start_month` and `end_month`.
85
+ Args:
86
+ players (List[str]): List of player usernames to retrieve games for.
87
+ start_month (str, optional): The starting month in the format "YYYY/MM". Defaults to None.
88
+ end_month (str, optional): The ending month in the format "YYYY/MM". Defaults to None.
89
+ Yields:
90
+ Iterator[Callable[[], List[TDataItem]]]: An iterator over callables that return a list of games for each player.
91
+ """ # do a simple validation to prevent common mistakes in month format
92
+ validate_month_string(start_month)
93
+ validate_month_string(end_month)
94
+
95
+ # get a list of already checked archives
96
+ # from your point of view, the state is python dictionary that will have the same content the next time this function is called
97
+ checked_archives = dlt.current.resource_state().setdefault("archives", [])
98
+ # get player archives, note that you can call the resource like any other function and just iterate it like a list
99
+ archives = players_archives(players)
100
+
101
+ # get archives in parallel by decorating the http request with defer
102
+ @dlt.defer
103
+ def _get_archive(url: str) -> List[TDataItem]:
104
+ try:
105
+ games = get_url_with_retry(url).get("games", [])
106
+ return games # type: ignore
107
+ except requests.HTTPError as http_err:
108
+ # sometimes archives are not available and the error seems to be permanent
109
+ if http_err.response.status_code == 404:
110
+ return []
111
+ raise
112
+
113
+ # enumerate the archives
114
+ for url in archives:
115
+ # the `url` format is https://api.chess.com/pub/player/{username}/games/{YYYY}/{MM}
116
+ if start_month and url[-7:] < start_month:
117
+ continue
118
+ if end_month and url[-7:] > end_month:
119
+ continue
120
+ # do not download archive again
121
+ if url in checked_archives:
122
+ continue
123
+ checked_archives.append(url)
124
+ # get the filtered archive
125
+ yield _get_archive(url)
126
+
127
+
128
+ @dlt.resource(write_disposition="append")
129
+ def players_online_status(players: List[str]) -> Iterator[TDataItem]:
130
+ """
131
+ Returns current online status for a list of players.
132
+ Args:
133
+ players (List[str]): List of player usernames to check online status for.
134
+ Yields:
135
+ Iterator[TDataItem]: An iterator over the online status of each player.
136
+ """
137
+ # we'll use unofficial endpoint to get online status, the official seems to be removed
138
+ for player in players:
139
+ status = get_url_with_retry(f"{UNOFFICIAL_CHESS_API_URL}user/popup/{player}")
140
+ # return just relevant selection
141
+ yield {
142
+ "username": player,
143
+ "onlineStatus": status["onlineStatus"],
144
+ "lastLoginDate": status["lastLoginDate"],
145
+ "check_time": pendulum.now(), # dlt can deal with native python dates
146
+ }
147
+
148
+
149
+ @dlt.source
150
+ def chess_dlt_config_example(
151
+ secret_str: str = dlt.secrets.value,
152
+ secret_dict: Dict[str, Any] = dlt.secrets.value,
153
+ config_int: int = dlt.config.value,
154
+ ) -> DltResource:
155
+ """
156
+ An example of a source that uses dlt to provide secrets and config values.
157
+ Args:
158
+ secret_str (str, optional): Secret string provided by dlt.secrets.value. Defaults to dlt.secrets.value.
159
+ secret_dict (Dict[str, Any], optional): Secret dictionary provided by dlt.secrets.value. Defaults to dlt.secrets.value.
160
+ config_int (int, optional): Config integer provided by dlt.config.value. Defaults to dlt.config.value.
161
+ Returns:
162
+ DltResource: Returns a resource yielding the configured values.
163
+ """
164
+
165
+ # returns a resource yielding the configured values - it is just a test
166
+ return dlt.resource([secret_str, secret_dict, config_int], name="config_values")
@@ -0,0 +1,21 @@
1
+ """Chess source helpers"""
2
+
3
+ from dlt.common.typing import StrAny
4
+ from dlt.sources.helpers import requests
5
+
6
+ from .settings import OFFICIAL_CHESS_API_URL
7
+
8
+
9
+ def get_url_with_retry(url: str) -> StrAny:
10
+ r = requests.get(url)
11
+ return r.json() # type: ignore
12
+
13
+
14
+ def get_path_with_retry(path: str) -> StrAny:
15
+ return get_url_with_retry(f"{OFFICIAL_CHESS_API_URL}{path}")
16
+
17
+
18
+ def validate_month_string(string: str) -> None:
19
+ """Validates that the string is in YYYY/MM format"""
20
+ if string and string[4] != "/":
21
+ raise ValueError(string)
@@ -0,0 +1,4 @@
1
+ """Chess source settings and constants"""
2
+
3
+ OFFICIAL_CHESS_API_URL = "https://api.chess.com/pub/"
4
+ UNOFFICIAL_CHESS_API_URL = "https://www.chess.com/callback/"