ingestr 0.7.4__tar.gz → 0.7.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.7.4 → ingestr-0.7.6}/.github/workflows/tests.yml +25 -25
- {ingestr-0.7.4 → ingestr-0.7.6}/PKG-INFO +21 -5
- {ingestr-0.7.4 → ingestr-0.7.6}/README.md +19 -4
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/.vitepress/config.mjs +27 -8
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/commands/example-uris.md +1 -1
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/quickstart.md +1 -1
- ingestr-0.7.6/docs/supported-sources/chess.md +37 -0
- ingestr-0.7.6/docs/supported-sources/hubspot.md +45 -0
- ingestr-0.7.6/docs/supported-sources/stripe.md +45 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/main.py +23 -2
- ingestr-0.7.6/ingestr/src/chess/__init__.py +166 -0
- ingestr-0.7.6/ingestr/src/chess/helpers.py +21 -0
- ingestr-0.7.6/ingestr/src/chess/settings.py +4 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/factory.py +9 -0
- ingestr-0.7.6/ingestr/src/hubspot/__init__.py +281 -0
- ingestr-0.7.6/ingestr/src/hubspot/helpers.py +188 -0
- ingestr-0.7.6/ingestr/src/hubspot/settings.py +99 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sources.py +136 -0
- ingestr-0.7.6/ingestr/src/stripe_analytics/__init__.py +99 -0
- ingestr-0.7.6/ingestr/src/stripe_analytics/helpers.py +68 -0
- ingestr-0.7.6/ingestr/src/stripe_analytics/settings.py +14 -0
- ingestr-0.7.6/ingestr/src/version.py +1 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/package-lock.json +233 -118
- {ingestr-0.7.4 → ingestr-0.7.6}/package.json +1 -1
- {ingestr-0.7.4 → ingestr-0.7.6}/pyproject.toml +6 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/requirements.txt +1 -0
- ingestr-0.7.4/docs/supported-sources/overview.md +0 -104
- ingestr-0.7.4/ingestr/src/version.py +0 -1
- {ingestr-0.7.4 → ingestr-0.7.6}/.dockerignore +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/.gitignore +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/.python-version +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/Dockerfile +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/LICENSE.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/Makefile +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/commands/ingest.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/index.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/destinations.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/arrow_helpers.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/requirements-dev.txt +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/resources/demo.gif +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/resources/demo.tape +0 -0
- {ingestr-0.7.4 → ingestr-0.7.6}/resources/ingestr.svg +0 -0
|
@@ -19,33 +19,33 @@ env:
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
jobs:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
22
|
+
tests:
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
- name: install Microsoft ODBC
|
|
27
|
+
run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
|
|
28
|
+
- uses: actions/setup-python@v4
|
|
29
|
+
with:
|
|
30
|
+
python-version: '3.11'
|
|
31
|
+
cache: 'pip'
|
|
32
|
+
- name: Cache dependencies
|
|
33
|
+
uses: actions/cache@v3
|
|
34
|
+
id: cache
|
|
35
|
+
with:
|
|
36
|
+
path: ${{ env.pythonLocation }}
|
|
37
|
+
key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
|
|
38
|
+
- name: Install pip dependencies
|
|
39
|
+
if: steps.cache.outputs.cache-hit != 'true'
|
|
40
|
+
run: make deps-ci
|
|
41
|
+
- name: run tests
|
|
42
|
+
run: make test-ci
|
|
43
|
+
- name: check the formatting
|
|
44
|
+
run: make lint-ci
|
|
45
45
|
|
|
46
46
|
build-and-push-image:
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
needs: tests
|
|
48
|
+
if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')
|
|
49
49
|
runs-on: ubuntu-latest
|
|
50
50
|
permissions:
|
|
51
51
|
contents: read
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.6
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -38,6 +38,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
|
|
|
38
38
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
39
39
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
40
40
|
Requires-Dist: sqlalchemy==1.4.52
|
|
41
|
+
Requires-Dist: stripe==10.7.0
|
|
41
42
|
Requires-Dist: tqdm==4.66.2
|
|
42
43
|
Requires-Dist: typer==0.12.3
|
|
43
44
|
Description-Content-Type: text/markdown
|
|
@@ -172,25 +173,40 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
172
173
|
<tr>
|
|
173
174
|
<td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
|
|
174
175
|
</tr>
|
|
176
|
+
<tr>
|
|
177
|
+
<td>Chess.com</td>
|
|
178
|
+
<td>✅</td>
|
|
179
|
+
<td>-</td>
|
|
180
|
+
</tr>
|
|
175
181
|
<tr>
|
|
176
182
|
<td>Gorgias</td>
|
|
177
183
|
<td>✅</td>
|
|
178
|
-
<td
|
|
184
|
+
<td>-</td>
|
|
179
185
|
</tr>
|
|
180
186
|
<tr>
|
|
181
187
|
<td>Google Sheets</td>
|
|
182
188
|
<td>✅</td>
|
|
183
|
-
<td
|
|
189
|
+
<td>-</td>
|
|
190
|
+
</tr>
|
|
191
|
+
<tr>
|
|
192
|
+
<td>HubSpot</td>
|
|
193
|
+
<td>✅</td>
|
|
194
|
+
<td>-</td>
|
|
184
195
|
</tr>
|
|
185
196
|
<tr>
|
|
186
197
|
<td>Notion</td>
|
|
187
198
|
<td>✅</td>
|
|
188
|
-
<td
|
|
199
|
+
<td>-</td>
|
|
189
200
|
</tr>
|
|
190
201
|
<tr>
|
|
191
202
|
<td>Shopify</td>
|
|
192
203
|
<td>✅</td>
|
|
193
|
-
<td
|
|
204
|
+
<td>-</td>
|
|
205
|
+
</tr>
|
|
206
|
+
<tr>
|
|
207
|
+
<td>Stripe</td>
|
|
208
|
+
<td>✅</td>
|
|
209
|
+
<td>-</td>
|
|
194
210
|
</tr>
|
|
195
211
|
</table>
|
|
196
212
|
|
|
@@ -128,25 +128,40 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
128
128
|
<tr>
|
|
129
129
|
<td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
|
|
130
130
|
</tr>
|
|
131
|
+
<tr>
|
|
132
|
+
<td>Chess.com</td>
|
|
133
|
+
<td>✅</td>
|
|
134
|
+
<td>-</td>
|
|
135
|
+
</tr>
|
|
131
136
|
<tr>
|
|
132
137
|
<td>Gorgias</td>
|
|
133
138
|
<td>✅</td>
|
|
134
|
-
<td
|
|
139
|
+
<td>-</td>
|
|
135
140
|
</tr>
|
|
136
141
|
<tr>
|
|
137
142
|
<td>Google Sheets</td>
|
|
138
143
|
<td>✅</td>
|
|
139
|
-
<td
|
|
144
|
+
<td>-</td>
|
|
145
|
+
</tr>
|
|
146
|
+
<tr>
|
|
147
|
+
<td>HubSpot</td>
|
|
148
|
+
<td>✅</td>
|
|
149
|
+
<td>-</td>
|
|
140
150
|
</tr>
|
|
141
151
|
<tr>
|
|
142
152
|
<td>Notion</td>
|
|
143
153
|
<td>✅</td>
|
|
144
|
-
<td
|
|
154
|
+
<td>-</td>
|
|
145
155
|
</tr>
|
|
146
156
|
<tr>
|
|
147
157
|
<td>Shopify</td>
|
|
148
158
|
<td>✅</td>
|
|
149
|
-
<td
|
|
159
|
+
<td>-</td>
|
|
160
|
+
</tr>
|
|
161
|
+
<tr>
|
|
162
|
+
<td>Stripe</td>
|
|
163
|
+
<td>✅</td>
|
|
164
|
+
<td>-</td>
|
|
150
165
|
</tr>
|
|
151
166
|
</table>
|
|
152
167
|
|
|
@@ -6,7 +6,13 @@ export default defineConfig({
|
|
|
6
6
|
description: "Ingest & copy data between any source and any destination",
|
|
7
7
|
base: "/ingestr/",
|
|
8
8
|
head: [
|
|
9
|
-
[
|
|
9
|
+
[
|
|
10
|
+
"script",
|
|
11
|
+
{
|
|
12
|
+
async: "",
|
|
13
|
+
src: "https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ",
|
|
14
|
+
},
|
|
15
|
+
],
|
|
10
16
|
[
|
|
11
17
|
"script",
|
|
12
18
|
{},
|
|
@@ -22,7 +28,7 @@ export default defineConfig({
|
|
|
22
28
|
{ text: "Home", link: "/" },
|
|
23
29
|
{ text: "Getting started", link: "/getting-started/quickstart.md" },
|
|
24
30
|
],
|
|
25
|
-
outline:
|
|
31
|
+
outline: "deep",
|
|
26
32
|
|
|
27
33
|
sidebar: [
|
|
28
34
|
{
|
|
@@ -30,7 +36,10 @@ export default defineConfig({
|
|
|
30
36
|
items: [
|
|
31
37
|
{ text: "Quickstart", link: "/getting-started/quickstart.md" },
|
|
32
38
|
{ text: "Core Concepts", link: "/getting-started/core-concepts.md" },
|
|
33
|
-
{
|
|
39
|
+
{
|
|
40
|
+
text: "Incremental Loading",
|
|
41
|
+
link: "/getting-started/incremental-loading.md",
|
|
42
|
+
},
|
|
34
43
|
{ text: "Telemetry", link: "/getting-started/telemetry.md" },
|
|
35
44
|
],
|
|
36
45
|
},
|
|
@@ -44,7 +53,6 @@ export default defineConfig({
|
|
|
44
53
|
{
|
|
45
54
|
text: "Sources & Destinations",
|
|
46
55
|
items: [
|
|
47
|
-
{ text: "Overview", link: "/supported-sources/overview.md" },
|
|
48
56
|
{
|
|
49
57
|
text: "Databases",
|
|
50
58
|
collapsed: false,
|
|
@@ -52,9 +60,15 @@ export default defineConfig({
|
|
|
52
60
|
{ text: "AWS Redshift", link: "/supported-sources/redshift.md" },
|
|
53
61
|
{ text: "Databricks", link: "/supported-sources/databricks.md" },
|
|
54
62
|
{ text: "DuckDB", link: "/supported-sources/duckdb.md" },
|
|
55
|
-
{
|
|
63
|
+
{
|
|
64
|
+
text: "Google BigQuery",
|
|
65
|
+
link: "/supported-sources/bigquery.md",
|
|
66
|
+
},
|
|
56
67
|
{ text: "Local CSV Files", link: "/supported-sources/csv.md" },
|
|
57
|
-
{
|
|
68
|
+
{
|
|
69
|
+
text: "Microsoft SQL Server",
|
|
70
|
+
link: "/supported-sources/mssql.md",
|
|
71
|
+
},
|
|
58
72
|
{ text: "MongoDB", link: "/supported-sources/mongodb.md" },
|
|
59
73
|
{ text: "MySQL", link: "/supported-sources/mysql.md" },
|
|
60
74
|
{ text: "Oracle", link: "/supported-sources/oracle.md" },
|
|
@@ -69,16 +83,21 @@ export default defineConfig({
|
|
|
69
83
|
text: "Platforms",
|
|
70
84
|
collapsed: false,
|
|
71
85
|
items: [
|
|
72
|
-
{ text: "
|
|
86
|
+
{ text: "Chess.com", link: "/supported-sources/chess.md" },
|
|
73
87
|
{ text: "Google Sheets", link: "/supported-sources/gsheets.md" },
|
|
88
|
+
{ text: "Gorgias", link: "/supported-sources/gorgias.md" },
|
|
89
|
+
{ text: "HubSpot", link: "/supported-sources/hubspot.md" },
|
|
74
90
|
{ text: "Notion", link: "/supported-sources/notion.md" },
|
|
75
91
|
{ text: "Shopify", link: "/supported-sources/shopify.md" },
|
|
92
|
+
{ text: "Stripe", link: "/supported-sources/stripe.md" },
|
|
76
93
|
],
|
|
77
94
|
},
|
|
78
95
|
],
|
|
79
96
|
},
|
|
80
97
|
],
|
|
81
98
|
|
|
82
|
-
socialLinks: [
|
|
99
|
+
socialLinks: [
|
|
100
|
+
{ icon: "github", link: "https://github.com/bruin-data/ingestr" },
|
|
101
|
+
],
|
|
83
102
|
},
|
|
84
103
|
});
|
|
@@ -2,4 +2,4 @@
|
|
|
2
2
|
|
|
3
3
|
This command is supposed to serve as a guide for the user to understand the various URI formats that are supported by the `ingestr` tool. The command will provide a list of supported sources and destinations, along with the URI format for each of them.
|
|
4
4
|
|
|
5
|
-
For the detailed documentation, please refer to the
|
|
5
|
+
For the detailed documentation, please refer to the Sources & Destinations section on the sidebar.
|
|
@@ -36,4 +36,4 @@ This command will:
|
|
|
36
36
|
|
|
37
37
|
## Supported Sources & Destinations
|
|
38
38
|
|
|
39
|
-
See the
|
|
39
|
+
See the Supported Sources & Destinations page for a list of all supported sources and destinations. More to come soon!
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Chess.com
|
|
2
|
+
|
|
3
|
+
[Chess.com](https://www.chess.com/) is an online platform offering chess games, tournaments, lessons, and more.
|
|
4
|
+
|
|
5
|
+
ingestr supports Chess.com as a source, primarily to play around with the data of players, games, and more since it doesn't require any authentication.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Chess is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
--source-uri 'chess://?players=<List[str]>'
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
URI parameter:
|
|
16
|
+
|
|
17
|
+
- `players`: A list of players usernames for which you want to fetch data. If no usernames are provided, then data of 4 different players will be fetched.
|
|
18
|
+
|
|
19
|
+
## Setting up a Chess Integration
|
|
20
|
+
|
|
21
|
+
Let's say you have a list of player usernames: max2 and peter23. Here's a sample command that will copy the data from Chess into a DuckDB database:
|
|
22
|
+
|
|
23
|
+
```sh
|
|
24
|
+
ingestr ingest --source-uri 'chess://?players=max2,peter23' --source-table 'profiles' --dest-uri 'duckdb:///chess.duckdb' --dest-table 'players.profiles'
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The result of this command will be a table in the `chess.duckdb` database.
|
|
28
|
+
|
|
29
|
+
## Available Tables
|
|
30
|
+
|
|
31
|
+
Chess source allows ingesting the following sources into separate tables:
|
|
32
|
+
|
|
33
|
+
- `profiles`: Retrives player profiles based on a list of player usernames.
|
|
34
|
+
- `games`: Retrives players' games for specified players.
|
|
35
|
+
- `archives`: Retrives url to game archives for specified players.
|
|
36
|
+
|
|
37
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# HubSpot
|
|
2
|
+
|
|
3
|
+
[HubSpot](https://www.hubspot.com/) is a customer relationship management software that helps businesses attract visitors, connect with customers, and close deals.
|
|
4
|
+
|
|
5
|
+
ingestr supports HubSpot as a source.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for HubSpot is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
hubspot://?api_key=<api-key-here>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
URI parameters:
|
|
16
|
+
|
|
17
|
+
- `api_key`: The API key is used for authentication with the HubSpot API.
|
|
18
|
+
|
|
19
|
+
The URI is used to connect to the HubSpot API for extracting data.
|
|
20
|
+
|
|
21
|
+
## Setting up a HubSpot Integration
|
|
22
|
+
|
|
23
|
+
Hubspot requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/hubspot#setup-guide).
|
|
24
|
+
|
|
25
|
+
Once you complete the guide, you should have an API key. Let's say your API key is `pat_test_12345`, here's a sample command that will copy the data from HubSpot into a duckdb database:
|
|
26
|
+
|
|
27
|
+
```sh
|
|
28
|
+
ingestr ingest --source-uri 'hubspot://?api_key=pat_test_12345' --source-table 'companies' --dest-uri duckdb:///hubspot.duckdb --dest-table 'companies.data'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
The result of this command will be a table in the `hubspot.duckdb` database.
|
|
32
|
+
|
|
33
|
+
## Available Tables
|
|
34
|
+
|
|
35
|
+
HubSpot source allows ingesting the following sources into separate tables:
|
|
36
|
+
|
|
37
|
+
- `companies`: Retrieves information about organizations.
|
|
38
|
+
- `deals`: Retrieves deal records and tracks deal progress.
|
|
39
|
+
- `products`: Retrieves pricing information of products.
|
|
40
|
+
- `tickets`: Handles requests for help from customers or users.
|
|
41
|
+
- `quotes`: Retrieves price proposals that salespeople can create and send to their contacts.
|
|
42
|
+
- `hubspot_events_for_objects`: Retrieves web analytics events for a given object type and object IDs.
|
|
43
|
+
- `contacts`: Retrieves information about visitors, potential customers, and leads.
|
|
44
|
+
|
|
45
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Stripe
|
|
2
|
+
[Stripe](https://www.stripe.com/) is a technology company that builds economic infrastructure for the internet, providing payment processing software and APIs for e-commerce websites and mobile applications.
|
|
3
|
+
|
|
4
|
+
ingestr supports Stripe as a source.
|
|
5
|
+
|
|
6
|
+
## URI Format
|
|
7
|
+
The URI format for Stripe is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
stripe://?api_key=<api-key-here>
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
URI parameters:
|
|
14
|
+
- `api_key`: the API key used for authentication with the Stripe API
|
|
15
|
+
|
|
16
|
+
The URI is used to connect to the Stripe API for extracting data. More details on setting up Stripe integrations can be found [here](https://stripe.com/docs/api).
|
|
17
|
+
|
|
18
|
+
## Setting up a Stripe Integration
|
|
19
|
+
|
|
20
|
+
Stripe requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/stripe#setup-guide).
|
|
21
|
+
|
|
22
|
+
Once you complete the guide, you should have an API key. Let's say your API key is `sk_test_12345`, here's a sample command that will copy the data from Stripe into a duckdb database:
|
|
23
|
+
|
|
24
|
+
```sh
|
|
25
|
+
ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table 'stripe.charges'
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
The result of this command will be a table in the `stripe.duckdb` database with JSON columns.
|
|
29
|
+
|
|
30
|
+
## Available Tables
|
|
31
|
+
Stripe source allows ingesting the following sources into separate tables:
|
|
32
|
+
- `subscription`: Represents a customer's subscription to a recurring service, detailing billing cycles, plans, and status.
|
|
33
|
+
- `account`: Contains information about a Stripe account, including balances, payouts, and account settings.
|
|
34
|
+
- `coupon`: Stores data about discount codes or coupons that can be applied to invoices, subscriptions, or other charges.
|
|
35
|
+
- `customer`: Holds information about customers, such as billing details, payment methods, and associated transactions.
|
|
36
|
+
- `product`: Represents products that can be sold or subscribed to, including metadata and pricing information.
|
|
37
|
+
- `price`: Contains pricing information for products, including currency, amount, and billing intervals.
|
|
38
|
+
- `balancetransaction`: Records transactions that affect the Stripe account balance, such as charges, refunds, and payouts.
|
|
39
|
+
- `invoice`: Represents invoices sent to customers, detailing line items, amounts, and payment status.
|
|
40
|
+
- `event`: Logs all events in the Stripe account, including customer actions, account updates, and system-generated events.
|
|
41
|
+
|
|
42
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
43
|
+
|
|
44
|
+
> [!WARNING]
|
|
45
|
+
> Stripe does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
|
+
import tempfile
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
6
7
|
import dlt
|
|
@@ -236,6 +237,13 @@ def ingest(
|
|
|
236
237
|
envvar="SCHEMA_NAMING",
|
|
237
238
|
),
|
|
238
239
|
] = SchemaNaming.default, # type: ignore
|
|
240
|
+
pipelines_dir: Annotated[
|
|
241
|
+
Optional[str],
|
|
242
|
+
typer.Option(
|
|
243
|
+
help="The path to store dlt-related pipeline metadata. By default, ingestr will create a temporary directory and delete it after the execution is done in order to make retries stateless.",
|
|
244
|
+
envvar="PIPELINES_DIR",
|
|
245
|
+
),
|
|
246
|
+
] = None, # type: ignore
|
|
239
247
|
):
|
|
240
248
|
track(
|
|
241
249
|
"command_triggered",
|
|
@@ -280,13 +288,18 @@ def ingest(
|
|
|
280
288
|
if progress == Progress.log:
|
|
281
289
|
progressInstance = LogCollector(dump_system_stats=False)
|
|
282
290
|
|
|
291
|
+
is_pipelines_dir_temp = False
|
|
292
|
+
if pipelines_dir is None:
|
|
293
|
+
pipelines_dir = tempfile.mkdtemp()
|
|
294
|
+
is_pipelines_dir_temp = True
|
|
295
|
+
|
|
283
296
|
pipeline = dlt.pipeline(
|
|
284
297
|
pipeline_name=m.hexdigest(),
|
|
285
298
|
destination=destination.dlt_dest(
|
|
286
299
|
uri=dest_uri,
|
|
287
300
|
),
|
|
288
301
|
progress=progressInstance,
|
|
289
|
-
pipelines_dir=
|
|
302
|
+
pipelines_dir=pipelines_dir,
|
|
290
303
|
refresh="drop_resources" if full_refresh else None,
|
|
291
304
|
)
|
|
292
305
|
|
|
@@ -362,6 +375,8 @@ def ingest(
|
|
|
362
375
|
if incremental_strategy != IncrementalStrategy.none:
|
|
363
376
|
write_disposition = incremental_strategy.value
|
|
364
377
|
|
|
378
|
+
start_time = datetime.now()
|
|
379
|
+
|
|
365
380
|
run_info: LoadInfo = pipeline.run(
|
|
366
381
|
dlt_source,
|
|
367
382
|
**destination.dlt_run_params(
|
|
@@ -389,11 +404,17 @@ def ingest(
|
|
|
389
404
|
|
|
390
405
|
destination.post_load()
|
|
391
406
|
|
|
407
|
+
end_time = datetime.now()
|
|
392
408
|
elapsedHuman = ""
|
|
393
409
|
if run_info.started_at:
|
|
394
|
-
elapsed =
|
|
410
|
+
elapsed = end_time - start_time
|
|
395
411
|
elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
|
|
396
412
|
|
|
413
|
+
# remove the pipelines_dir folder if it was created by ingestr
|
|
414
|
+
if is_pipelines_dir_temp:
|
|
415
|
+
import shutil
|
|
416
|
+
shutil.rmtree(pipelines_dir)
|
|
417
|
+
|
|
397
418
|
print(
|
|
398
419
|
f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
|
|
399
420
|
)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""A source loading player profiles and games from chess.com api"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable, Dict, Iterator, List, Sequence
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.common import pendulum
|
|
7
|
+
from dlt.common.typing import TDataItem
|
|
8
|
+
from dlt.sources import DltResource
|
|
9
|
+
from dlt.sources.helpers import requests
|
|
10
|
+
|
|
11
|
+
from .helpers import get_path_with_retry, get_url_with_retry, validate_month_string
|
|
12
|
+
from .settings import UNOFFICIAL_CHESS_API_URL
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(name="chess")
|
|
16
|
+
def source(
|
|
17
|
+
players: List[str], start_month: str = None, end_month: str = None
|
|
18
|
+
) -> Sequence[DltResource]:
|
|
19
|
+
"""
|
|
20
|
+
A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing
|
|
21
|
+
various types of data: user profiles or chess match results
|
|
22
|
+
Args:
|
|
23
|
+
players (List[str]): A list of the player usernames for which to get the data.
|
|
24
|
+
start_month (str, optional): Filters out all the matches happening before `start_month`. Defaults to None.
|
|
25
|
+
end_month (str, optional): Filters out all the matches happening after `end_month`. Defaults to None.
|
|
26
|
+
Returns:
|
|
27
|
+
Sequence[DltResource]: A sequence of resources that can be selected from including players_profiles,
|
|
28
|
+
players_archives, players_games, players_online_status
|
|
29
|
+
"""
|
|
30
|
+
return (
|
|
31
|
+
players_profiles(players),
|
|
32
|
+
players_archives(players),
|
|
33
|
+
players_games(players, start_month=start_month, end_month=end_month),
|
|
34
|
+
players_online_status(players),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dlt.resource(
|
|
39
|
+
write_disposition="replace",
|
|
40
|
+
columns={
|
|
41
|
+
"last_online": {"data_type": "timestamp"},
|
|
42
|
+
"joined": {"data_type": "timestamp"},
|
|
43
|
+
},
|
|
44
|
+
)
|
|
45
|
+
def players_profiles(players: List[str]) -> Iterator[TDataItem]:
|
|
46
|
+
"""
|
|
47
|
+
Yields player profiles for a list of player usernames.
|
|
48
|
+
Args:
|
|
49
|
+
players (List[str]): List of player usernames to retrieve profiles for.
|
|
50
|
+
Yields:
|
|
51
|
+
Iterator[TDataItem]: An iterator over player profiles data.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
# get archives in parallel by decorating the http request with defer
|
|
55
|
+
@dlt.defer
|
|
56
|
+
def _get_profile(username: str) -> TDataItem:
|
|
57
|
+
return get_path_with_retry(f"player/{username}")
|
|
58
|
+
|
|
59
|
+
for username in players:
|
|
60
|
+
yield _get_profile(username)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dlt.resource(write_disposition="replace", selected=False)
|
|
64
|
+
def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
|
|
65
|
+
"""
|
|
66
|
+
Yields url to game archives for specified players.
|
|
67
|
+
Args:
|
|
68
|
+
players (List[str]): List of player usernames to retrieve archives for.
|
|
69
|
+
Yields:
|
|
70
|
+
Iterator[List[TDataItem]]: An iterator over list of player archive data.
|
|
71
|
+
"""
|
|
72
|
+
for username in players:
|
|
73
|
+
data = get_path_with_retry(f"player/{username}/games/archives")
|
|
74
|
+
yield data.get("archives", [])
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dlt.resource(
|
|
78
|
+
write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
|
|
79
|
+
)
|
|
80
|
+
def players_games(
|
|
81
|
+
players: List[str], start_month: str = None, end_month: str = None
|
|
82
|
+
) -> Iterator[Callable[[], List[TDataItem]]]:
|
|
83
|
+
"""
|
|
84
|
+
Yields `players` games that happened between `start_month` and `end_month`.
|
|
85
|
+
Args:
|
|
86
|
+
players (List[str]): List of player usernames to retrieve games for.
|
|
87
|
+
start_month (str, optional): The starting month in the format "YYYY/MM". Defaults to None.
|
|
88
|
+
end_month (str, optional): The ending month in the format "YYYY/MM". Defaults to None.
|
|
89
|
+
Yields:
|
|
90
|
+
Iterator[Callable[[], List[TDataItem]]]: An iterator over callables that return a list of games for each player.
|
|
91
|
+
""" # do a simple validation to prevent common mistakes in month format
|
|
92
|
+
validate_month_string(start_month)
|
|
93
|
+
validate_month_string(end_month)
|
|
94
|
+
|
|
95
|
+
# get a list of already checked archives
|
|
96
|
+
# from your point of view, the state is python dictionary that will have the same content the next time this function is called
|
|
97
|
+
checked_archives = dlt.current.resource_state().setdefault("archives", [])
|
|
98
|
+
# get player archives, note that you can call the resource like any other function and just iterate it like a list
|
|
99
|
+
archives = players_archives(players)
|
|
100
|
+
|
|
101
|
+
# get archives in parallel by decorating the http request with defer
|
|
102
|
+
@dlt.defer
|
|
103
|
+
def _get_archive(url: str) -> List[TDataItem]:
|
|
104
|
+
try:
|
|
105
|
+
games = get_url_with_retry(url).get("games", [])
|
|
106
|
+
return games # type: ignore
|
|
107
|
+
except requests.HTTPError as http_err:
|
|
108
|
+
# sometimes archives are not available and the error seems to be permanent
|
|
109
|
+
if http_err.response.status_code == 404:
|
|
110
|
+
return []
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
# enumerate the archives
|
|
114
|
+
for url in archives:
|
|
115
|
+
# the `url` format is https://api.chess.com/pub/player/{username}/games/{YYYY}/{MM}
|
|
116
|
+
if start_month and url[-7:] < start_month:
|
|
117
|
+
continue
|
|
118
|
+
if end_month and url[-7:] > end_month:
|
|
119
|
+
continue
|
|
120
|
+
# do not download archive again
|
|
121
|
+
if url in checked_archives:
|
|
122
|
+
continue
|
|
123
|
+
checked_archives.append(url)
|
|
124
|
+
# get the filtered archive
|
|
125
|
+
yield _get_archive(url)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dlt.resource(write_disposition="append")
|
|
129
|
+
def players_online_status(players: List[str]) -> Iterator[TDataItem]:
|
|
130
|
+
"""
|
|
131
|
+
Returns current online status for a list of players.
|
|
132
|
+
Args:
|
|
133
|
+
players (List[str]): List of player usernames to check online status for.
|
|
134
|
+
Yields:
|
|
135
|
+
Iterator[TDataItem]: An iterator over the online status of each player.
|
|
136
|
+
"""
|
|
137
|
+
# we'll use unofficial endpoint to get online status, the official seems to be removed
|
|
138
|
+
for player in players:
|
|
139
|
+
status = get_url_with_retry(f"{UNOFFICIAL_CHESS_API_URL}user/popup/{player}")
|
|
140
|
+
# return just relevant selection
|
|
141
|
+
yield {
|
|
142
|
+
"username": player,
|
|
143
|
+
"onlineStatus": status["onlineStatus"],
|
|
144
|
+
"lastLoginDate": status["lastLoginDate"],
|
|
145
|
+
"check_time": pendulum.now(), # dlt can deal with native python dates
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dlt.source
|
|
150
|
+
def chess_dlt_config_example(
|
|
151
|
+
secret_str: str = dlt.secrets.value,
|
|
152
|
+
secret_dict: Dict[str, Any] = dlt.secrets.value,
|
|
153
|
+
config_int: int = dlt.config.value,
|
|
154
|
+
) -> DltResource:
|
|
155
|
+
"""
|
|
156
|
+
An example of a source that uses dlt to provide secrets and config values.
|
|
157
|
+
Args:
|
|
158
|
+
secret_str (str, optional): Secret string provided by dlt.secrets.value. Defaults to dlt.secrets.value.
|
|
159
|
+
secret_dict (Dict[str, Any], optional): Secret dictionary provided by dlt.secrets.value. Defaults to dlt.secrets.value.
|
|
160
|
+
config_int (int, optional): Config integer provided by dlt.config.value. Defaults to dlt.config.value.
|
|
161
|
+
Returns:
|
|
162
|
+
DltResource: Returns a resource yielding the configured values.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# returns a resource yielding the configured values - it is just a test
|
|
166
|
+
return dlt.resource([secret_str, secret_dict, config_int], name="config_values")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Chess source helpers"""
|
|
2
|
+
|
|
3
|
+
from dlt.common.typing import StrAny
|
|
4
|
+
from dlt.sources.helpers import requests
|
|
5
|
+
|
|
6
|
+
from .settings import OFFICIAL_CHESS_API_URL
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_url_with_retry(url: str) -> StrAny:
|
|
10
|
+
r = requests.get(url)
|
|
11
|
+
return r.json() # type: ignore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_path_with_retry(path: str) -> StrAny:
|
|
15
|
+
return get_url_with_retry(f"{OFFICIAL_CHESS_API_URL}{path}")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def validate_month_string(string: str) -> None:
|
|
19
|
+
"""Validates that the string is in YYYY/MM format"""
|
|
20
|
+
if string and string[4] != "/":
|
|
21
|
+
raise ValueError(string)
|