ingestr 0.7.3__tar.gz → 0.7.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.7.3 → ingestr-0.7.5}/.github/workflows/tests.yml +1 -1
- ingestr-0.7.5/Dockerfile +37 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/PKG-INFO +11 -6
- {ingestr-0.7.3 → ingestr-0.7.5}/README.md +9 -4
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/overview.md +9 -4
- ingestr-0.7.5/docs/supported-sources/stripe.md +45 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/factory.py +4 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/sources.py +55 -0
- ingestr-0.7.5/ingestr/src/stripe_analytics/__init__.py +99 -0
- ingestr-0.7.5/ingestr/src/stripe_analytics/helpers.py +68 -0
- ingestr-0.7.5/ingestr/src/stripe_analytics/settings.py +14 -0
- ingestr-0.7.5/ingestr/src/version.py +1 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/pyproject.toml +2 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/requirements.txt +1 -1
- ingestr-0.7.3/Dockerfile +0 -45
- ingestr-0.7.3/ingestr/src/version.py +0 -1
- {ingestr-0.7.3 → ingestr-0.7.5}/.dockerignore +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/.gitignore +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/.python-version +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/LICENSE.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/Makefile +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/commands/example-uris.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/commands/ingest.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/index.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/main.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/destinations.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/sql_database/arrow_helpers.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/package-lock.json +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/package.json +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/requirements-dev.txt +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/resources/demo.gif +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/resources/demo.tape +0 -0
- {ingestr-0.7.3 → ingestr-0.7.5}/resources/ingestr.svg +0 -0
ingestr-0.7.5/Dockerfile
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
COPY ./requirements.txt /app/requirements.txt
|
|
6
|
+
|
|
7
|
+
# Setup dependencies for pyodbc
|
|
8
|
+
RUN \
|
|
9
|
+
export ACCEPT_EULA='Y' && \
|
|
10
|
+
# Install build dependencies
|
|
11
|
+
apt-get update && \
|
|
12
|
+
apt-get install -y curl gcc libpq-dev build-essential unixodbc-dev g++ apt-transport-https
|
|
13
|
+
|
|
14
|
+
RUN \
|
|
15
|
+
# Install pyodbc db drivers for MSSQL and PostgreSQL
|
|
16
|
+
curl -sSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /usr/share/keyrings/microsoft-prod.gpg && \
|
|
17
|
+
curl -sSL https://packages.microsoft.com/config/debian/12/prod.list | tee /etc/apt/sources.list.d/mssql-release.list
|
|
18
|
+
|
|
19
|
+
RUN \
|
|
20
|
+
# install the rest of them
|
|
21
|
+
apt-get update && \
|
|
22
|
+
ACCEPT_EULA=Y apt-get install -y msodbcsql18 odbc-postgresql && \
|
|
23
|
+
# Update odbcinst.ini to make sure full path to driver is listed, and set CommLog to 0. i.e disables any communication logs to be written to files
|
|
24
|
+
sed 's/Driver=psql/Driver=\/usr\/lib\/x86_64-linux-gnu\/odbc\/psql/;s/CommLog=1/CommLog=0/' /etc/odbcinst.ini > /tmp/temp.ini && \
|
|
25
|
+
mv -f /tmp/temp.ini /etc/odbcinst.ini
|
|
26
|
+
|
|
27
|
+
ENV VIRTUAL_ENV=/usr/local
|
|
28
|
+
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
|
|
29
|
+
RUN /install.sh && rm /install.sh
|
|
30
|
+
|
|
31
|
+
RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
|
|
32
|
+
|
|
33
|
+
COPY . /app
|
|
34
|
+
|
|
35
|
+
RUN pip3 install -e .
|
|
36
|
+
|
|
37
|
+
ENTRYPOINT ["ingestr"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.5
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -14,7 +14,6 @@ Classifier: Operating System :: OS Independent
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
|
-
Requires-Dist: connectorx==0.3.3
|
|
18
17
|
Requires-Dist: cx-oracle==8.3.0
|
|
19
18
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
20
19
|
Requires-Dist: dlt==0.5.1
|
|
@@ -39,6 +38,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
|
|
|
39
38
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
40
39
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
41
40
|
Requires-Dist: sqlalchemy==1.4.52
|
|
41
|
+
Requires-Dist: stripe==10.7.0
|
|
42
42
|
Requires-Dist: tqdm==4.66.2
|
|
43
43
|
Requires-Dist: typer==0.12.3
|
|
44
44
|
Description-Content-Type: text/markdown
|
|
@@ -176,22 +176,27 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
176
176
|
<tr>
|
|
177
177
|
<td>Gorgias</td>
|
|
178
178
|
<td>✅</td>
|
|
179
|
-
<td
|
|
179
|
+
<td>-</td>
|
|
180
180
|
</tr>
|
|
181
181
|
<tr>
|
|
182
182
|
<td>Google Sheets</td>
|
|
183
183
|
<td>✅</td>
|
|
184
|
-
<td
|
|
184
|
+
<td>-</td>
|
|
185
185
|
</tr>
|
|
186
186
|
<tr>
|
|
187
187
|
<td>Notion</td>
|
|
188
188
|
<td>✅</td>
|
|
189
|
-
<td
|
|
189
|
+
<td>-</td>
|
|
190
190
|
</tr>
|
|
191
191
|
<tr>
|
|
192
192
|
<td>Shopify</td>
|
|
193
193
|
<td>✅</td>
|
|
194
|
-
<td
|
|
194
|
+
<td>-</td>
|
|
195
|
+
</tr>
|
|
196
|
+
<tr>
|
|
197
|
+
<td>Stripe</td>
|
|
198
|
+
<td>✅</td>
|
|
199
|
+
<td>-</td>
|
|
195
200
|
</tr>
|
|
196
201
|
</table>
|
|
197
202
|
|
|
@@ -131,22 +131,27 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
131
131
|
<tr>
|
|
132
132
|
<td>Gorgias</td>
|
|
133
133
|
<td>✅</td>
|
|
134
|
-
<td
|
|
134
|
+
<td>-</td>
|
|
135
135
|
</tr>
|
|
136
136
|
<tr>
|
|
137
137
|
<td>Google Sheets</td>
|
|
138
138
|
<td>✅</td>
|
|
139
|
-
<td
|
|
139
|
+
<td>-</td>
|
|
140
140
|
</tr>
|
|
141
141
|
<tr>
|
|
142
142
|
<td>Notion</td>
|
|
143
143
|
<td>✅</td>
|
|
144
|
-
<td
|
|
144
|
+
<td>-</td>
|
|
145
145
|
</tr>
|
|
146
146
|
<tr>
|
|
147
147
|
<td>Shopify</td>
|
|
148
148
|
<td>✅</td>
|
|
149
|
-
<td
|
|
149
|
+
<td>-</td>
|
|
150
|
+
</tr>
|
|
151
|
+
<tr>
|
|
152
|
+
<td>Stripe</td>
|
|
153
|
+
<td>✅</td>
|
|
154
|
+
<td>-</td>
|
|
150
155
|
</tr>
|
|
151
156
|
</table>
|
|
152
157
|
|
|
@@ -73,6 +73,7 @@ export default defineConfig({
|
|
|
73
73
|
{ text: "Google Sheets", link: "/supported-sources/gsheets.md" },
|
|
74
74
|
{ text: "Notion", link: "/supported-sources/notion.md" },
|
|
75
75
|
{ text: "Shopify", link: "/supported-sources/shopify.md" },
|
|
76
|
+
{ text: "Stripe", link: "/supported-sources/stripe.md" },
|
|
76
77
|
],
|
|
77
78
|
},
|
|
78
79
|
],
|
|
@@ -82,22 +82,27 @@ ingestr supports the following sources and destinations:
|
|
|
82
82
|
<tr>
|
|
83
83
|
<td>Gorgias</td>
|
|
84
84
|
<td>✅</td>
|
|
85
|
-
<td
|
|
85
|
+
<td>-</td>
|
|
86
86
|
</tr>
|
|
87
87
|
<tr>
|
|
88
88
|
<td>Google Sheets</td>
|
|
89
89
|
<td>✅</td>
|
|
90
|
-
<td
|
|
90
|
+
<td>-</td>
|
|
91
91
|
</tr>
|
|
92
92
|
<tr>
|
|
93
93
|
<td>Notion</td>
|
|
94
94
|
<td>✅</td>
|
|
95
|
-
<td
|
|
95
|
+
<td>-</td>
|
|
96
96
|
</tr>
|
|
97
97
|
<tr>
|
|
98
98
|
<td>Shopify</td>
|
|
99
99
|
<td>✅</td>
|
|
100
|
-
<td
|
|
100
|
+
<td>-</td>
|
|
101
|
+
</tr>
|
|
102
|
+
<tr>
|
|
103
|
+
<td>Stripe</td>
|
|
104
|
+
<td>✅</td>
|
|
105
|
+
<td>-</td>
|
|
101
106
|
</tr>
|
|
102
107
|
</table>
|
|
103
108
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Stripe
|
|
2
|
+
[Stripe](https://www.stripe.com/) is a technology company that builds economic infrastructure for the internet, providing payment processing software and APIs for e-commerce websites and mobile applications.
|
|
3
|
+
|
|
4
|
+
ingestr supports Stripe as a source.
|
|
5
|
+
|
|
6
|
+
## URI Format
|
|
7
|
+
The URI format for Stripe is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
stripe://?api_key=<api-key-here>
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
URI parameters:
|
|
14
|
+
- `api_key`: the API key used for authentication with the Stripe API
|
|
15
|
+
|
|
16
|
+
The URI is used to connect to the Stripe API for extracting data. More details on setting up Stripe integrations can be found [here](https://stripe.com/docs/api).
|
|
17
|
+
|
|
18
|
+
## Setting up a Stripe Integration
|
|
19
|
+
|
|
20
|
+
Stripe requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/stripe#setup-guide).
|
|
21
|
+
|
|
22
|
+
Once you complete the guide, you should have an API key. Let's say your API key is `sk_test_12345`, here's a sample command that will copy the data from Stripe into a duckdb database:
|
|
23
|
+
|
|
24
|
+
```sh
|
|
25
|
+
ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table 'stripe.charges'
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
The result of this command will be a table in the `stripe.duckdb` database with JSON columns.
|
|
29
|
+
|
|
30
|
+
## Available Tables
|
|
31
|
+
Stripe source allows ingesting the following sources into separate tables:
|
|
32
|
+
- `subscription`: Represents a customer's subscription to a recurring service, detailing billing cycles, plans, and status.
|
|
33
|
+
- `account`: Contains information about a Stripe account, including balances, payouts, and account settings.
|
|
34
|
+
- `coupon`: Stores data about discount codes or coupons that can be applied to invoices, subscriptions, or other charges.
|
|
35
|
+
- `customer`: Holds information about customers, such as billing details, payment methods, and associated transactions.
|
|
36
|
+
- `product`: Represents products that can be sold or subscribed to, including metadata and pricing information.
|
|
37
|
+
- `price`: Contains pricing information for products, including currency, amount, and billing intervals.
|
|
38
|
+
- `balancetransaction`: Records transactions that affect the Stripe account balance, such as charges, refunds, and payouts.
|
|
39
|
+
- `invoice`: Represents invoices sent to customers, detailing line items, amounts, and payment status.
|
|
40
|
+
- `event`: Logs all events in the Stripe account, including customer actions, account updates, and system-generated events.
|
|
41
|
+
|
|
42
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
43
|
+
|
|
44
|
+
> [!WARNING]
|
|
45
|
+
> Stripe does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
|
|
@@ -22,6 +22,7 @@ from ingestr.src.sources import (
|
|
|
22
22
|
NotionSource,
|
|
23
23
|
ShopifySource,
|
|
24
24
|
SqlSource,
|
|
25
|
+
StripeAnalyticsSource,
|
|
25
26
|
)
|
|
26
27
|
|
|
27
28
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -102,6 +103,9 @@ class SourceDestinationFactory:
|
|
|
102
103
|
return ShopifySource()
|
|
103
104
|
elif self.source_scheme == "gorgias":
|
|
104
105
|
return GorgiasSource()
|
|
106
|
+
elif self.source_scheme == "stripe":
|
|
107
|
+
return StripeAnalyticsSource()
|
|
108
|
+
|
|
105
109
|
else:
|
|
106
110
|
raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
|
|
107
111
|
|
|
@@ -12,6 +12,7 @@ from ingestr.src.mongodb import mongodb_collection
|
|
|
12
12
|
from ingestr.src.notion import notion_databases
|
|
13
13
|
from ingestr.src.shopify import shopify_source
|
|
14
14
|
from ingestr.src.sql_database import sql_table
|
|
15
|
+
from ingestr.src.stripe_analytics import stripe_source
|
|
15
16
|
from ingestr.src.table_definition import table_string_to_dataclass
|
|
16
17
|
|
|
17
18
|
|
|
@@ -295,3 +296,57 @@ class GoogleSheetsSource:
|
|
|
295
296
|
range_names=[table_fields.dataset],
|
|
296
297
|
get_named_ranges=False,
|
|
297
298
|
)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class StripeAnalyticsSource:
|
|
302
|
+
def handles_incrementality(self) -> bool:
|
|
303
|
+
return True
|
|
304
|
+
|
|
305
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
306
|
+
if kwargs.get("incremental_key"):
|
|
307
|
+
raise ValueError(
|
|
308
|
+
"Stripe takes care of incrementality on its own, you should not provide incremental_key"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
api_key = None
|
|
312
|
+
source_field = urlparse(uri)
|
|
313
|
+
source_params = parse_qs(source_field.query)
|
|
314
|
+
api_key = source_params.get("api_key")
|
|
315
|
+
|
|
316
|
+
if not api_key:
|
|
317
|
+
raise ValueError("api_key in the URI is required to connect to Stripe")
|
|
318
|
+
|
|
319
|
+
endpoint = None
|
|
320
|
+
table = str.capitalize(table)
|
|
321
|
+
|
|
322
|
+
if table in [
|
|
323
|
+
"Subscription",
|
|
324
|
+
"Account",
|
|
325
|
+
"Coupon",
|
|
326
|
+
"Customer",
|
|
327
|
+
"Product",
|
|
328
|
+
"Price",
|
|
329
|
+
"BalanceTransaction",
|
|
330
|
+
"Invoice",
|
|
331
|
+
"Event",
|
|
332
|
+
]:
|
|
333
|
+
endpoint = table
|
|
334
|
+
else:
|
|
335
|
+
raise ValueError(
|
|
336
|
+
f"Resource '{table}' is not supported for stripe source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
date_args = {}
|
|
340
|
+
if kwargs.get("interval_start"):
|
|
341
|
+
date_args["start_date"] = kwargs.get("interval_start")
|
|
342
|
+
|
|
343
|
+
if kwargs.get("interval_end"):
|
|
344
|
+
date_args["end_date"] = kwargs.get("interval_end")
|
|
345
|
+
|
|
346
|
+
return stripe_source(
|
|
347
|
+
endpoints=[
|
|
348
|
+
endpoint,
|
|
349
|
+
],
|
|
350
|
+
stripe_secret_key=api_key[0],
|
|
351
|
+
**date_args,
|
|
352
|
+
).with_resources(endpoint)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""This source uses Stripe API and dlt to load data such as Customer, Subscription, Event etc. to the database and to calculate the MRR and churn rate."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Generator, Iterable, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import stripe
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from pendulum import DateTime
|
|
9
|
+
|
|
10
|
+
from .helpers import pagination, transform_date
|
|
11
|
+
from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dlt.source
|
|
15
|
+
def stripe_source(
|
|
16
|
+
endpoints: Tuple[str, ...] = ENDPOINTS,
|
|
17
|
+
stripe_secret_key: str = dlt.secrets.value,
|
|
18
|
+
start_date: Optional[DateTime] = None,
|
|
19
|
+
end_date: Optional[DateTime] = None,
|
|
20
|
+
) -> Iterable[DltResource]:
|
|
21
|
+
"""
|
|
22
|
+
Retrieves data from the Stripe API for the specified endpoints.
|
|
23
|
+
|
|
24
|
+
For all endpoints, Stripe API responses do not provide the key "updated",
|
|
25
|
+
so in most cases, we are forced to load the data in 'replace' mode.
|
|
26
|
+
This source is suitable for all types of endpoints, including 'Events', 'Invoice', etc.
|
|
27
|
+
but these endpoints can also be loaded in incremental mode (see source incremental_stripe_source).
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from. Defaults to most popular Stripe API endpoints.
|
|
31
|
+
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
|
|
32
|
+
start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
|
|
33
|
+
end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to None.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
|
|
37
|
+
"""
|
|
38
|
+
stripe.api_key = stripe_secret_key
|
|
39
|
+
stripe.api_version = "2022-11-15"
|
|
40
|
+
|
|
41
|
+
def stripe_resource(
|
|
42
|
+
endpoint: str,
|
|
43
|
+
) -> Generator[Dict[Any, Any], Any, None]:
|
|
44
|
+
yield from pagination(endpoint, start_date, end_date)
|
|
45
|
+
|
|
46
|
+
for endpoint in endpoints:
|
|
47
|
+
yield dlt.resource(
|
|
48
|
+
stripe_resource,
|
|
49
|
+
name=endpoint,
|
|
50
|
+
write_disposition="replace",
|
|
51
|
+
)(endpoint)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dlt.source
|
|
55
|
+
def incremental_stripe_source(
|
|
56
|
+
endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS,
|
|
57
|
+
stripe_secret_key: str = dlt.secrets.value,
|
|
58
|
+
initial_start_date: Optional[DateTime] = None,
|
|
59
|
+
end_date: Optional[DateTime] = None,
|
|
60
|
+
) -> Iterable[DltResource]:
|
|
61
|
+
"""
|
|
62
|
+
As Stripe API does not include the "updated" key in its responses,
|
|
63
|
+
we are only able to perform incremental downloads from endpoints where all objects are uneditable.
|
|
64
|
+
This source yields the resources with incremental loading based on "append" mode.
|
|
65
|
+
You will load only the newest data without duplicating and without downloading a huge amount of data each time.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
|
|
69
|
+
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
|
|
70
|
+
initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
|
|
71
|
+
If parameter is not None, then load only data that were created after initial_start_date on the first run.
|
|
72
|
+
Defaults to None. Format: datetime(YYYY, MM, DD).
|
|
73
|
+
end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
|
|
74
|
+
Defaults to None. Format: datetime(YYYY, MM, DD).
|
|
75
|
+
Returns:
|
|
76
|
+
Iterable[DltResource]: Resources with only that data has not yet been loaded.
|
|
77
|
+
"""
|
|
78
|
+
stripe.api_key = stripe_secret_key
|
|
79
|
+
stripe.api_version = "2022-11-15"
|
|
80
|
+
start_date_unix = (
|
|
81
|
+
transform_date(initial_start_date) if initial_start_date is not None else -1
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def incremental_resource(
|
|
85
|
+
endpoint: str,
|
|
86
|
+
created: Optional[Any] = dlt.sources.incremental(
|
|
87
|
+
"created", initial_value=start_date_unix
|
|
88
|
+
),
|
|
89
|
+
) -> Generator[Dict[Any, Any], Any, None]:
|
|
90
|
+
start_value = created.last_value
|
|
91
|
+
yield from pagination(endpoint, start_date=start_value, end_date=end_date)
|
|
92
|
+
|
|
93
|
+
for endpoint in endpoints:
|
|
94
|
+
yield dlt.resource(
|
|
95
|
+
incremental_resource,
|
|
96
|
+
name=endpoint,
|
|
97
|
+
write_disposition="append",
|
|
98
|
+
primary_key="id",
|
|
99
|
+
)(endpoint)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Stripe analytics source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterable, Optional, Union
|
|
4
|
+
|
|
5
|
+
import stripe
|
|
6
|
+
from dlt.common import pendulum
|
|
7
|
+
from dlt.common.typing import TDataItem
|
|
8
|
+
from pendulum import DateTime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def pagination(
|
|
12
|
+
endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None
|
|
13
|
+
) -> Iterable[TDataItem]:
|
|
14
|
+
"""
|
|
15
|
+
Retrieves data from an endpoint with pagination.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
endpoint (str): The endpoint to retrieve data from.
|
|
19
|
+
start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None.
|
|
20
|
+
end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Iterable[TDataItem]: Data items retrieved from the endpoint.
|
|
24
|
+
"""
|
|
25
|
+
starting_after = None
|
|
26
|
+
while True:
|
|
27
|
+
response = stripe_get_data(
|
|
28
|
+
endpoint,
|
|
29
|
+
start_date=start_date,
|
|
30
|
+
end_date=end_date,
|
|
31
|
+
starting_after=starting_after,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if len(response["data"]) > 0:
|
|
35
|
+
starting_after = response["data"][-1]["id"]
|
|
36
|
+
yield response["data"]
|
|
37
|
+
|
|
38
|
+
if not response["has_more"]:
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def transform_date(date: Union[str, DateTime, int]) -> int:
|
|
43
|
+
if isinstance(date, str):
|
|
44
|
+
date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ")
|
|
45
|
+
if isinstance(date, DateTime):
|
|
46
|
+
# convert to unix timestamp
|
|
47
|
+
date = int(date.timestamp())
|
|
48
|
+
return date
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def stripe_get_data(
|
|
52
|
+
resource: str,
|
|
53
|
+
start_date: Optional[Any] = None,
|
|
54
|
+
end_date: Optional[Any] = None,
|
|
55
|
+
**kwargs: Any,
|
|
56
|
+
) -> Dict[Any, Any]:
|
|
57
|
+
if start_date:
|
|
58
|
+
start_date = transform_date(start_date)
|
|
59
|
+
if end_date:
|
|
60
|
+
end_date = transform_date(end_date)
|
|
61
|
+
|
|
62
|
+
if resource == "Subscription":
|
|
63
|
+
kwargs.update({"status": "all"})
|
|
64
|
+
|
|
65
|
+
resource_dict = getattr(stripe, resource).list(
|
|
66
|
+
created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
|
|
67
|
+
)
|
|
68
|
+
return dict(resource_dict)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Stripe analytics source settings and constants"""
|
|
2
|
+
|
|
3
|
+
# the most popular endpoints
|
|
4
|
+
# Full list of the Stripe API endpoints you can find here: https://stripe.com/docs/api.
|
|
5
|
+
ENDPOINTS = (
|
|
6
|
+
"Subscription",
|
|
7
|
+
"Account",
|
|
8
|
+
"Coupon",
|
|
9
|
+
"Customer",
|
|
10
|
+
"Product",
|
|
11
|
+
"Price",
|
|
12
|
+
)
|
|
13
|
+
# possible incremental endpoints
|
|
14
|
+
INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.7.5"
|
|
@@ -68,6 +68,7 @@ exclude = [
|
|
|
68
68
|
'src/google_sheets/.*',
|
|
69
69
|
'src/shopify/.*',
|
|
70
70
|
'src/gorgias/.*',
|
|
71
|
+
'src/stripe_analytics/.*'
|
|
71
72
|
]
|
|
72
73
|
|
|
73
74
|
[[tool.mypy.overrides]]
|
|
@@ -77,6 +78,7 @@ module = [
|
|
|
77
78
|
"ingestr.src.google_sheets.*",
|
|
78
79
|
"ingestr.src.shopify.*",
|
|
79
80
|
"ingestr.src.gorgias.*",
|
|
81
|
+
"ingestr.src.stripe_analytics.*",
|
|
80
82
|
]
|
|
81
83
|
follow_imports = "skip"
|
|
82
84
|
|
ingestr-0.7.3/Dockerfile
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
FROM python:3.11-slim
|
|
2
|
-
|
|
3
|
-
WORKDIR /app
|
|
4
|
-
|
|
5
|
-
COPY ./requirements.txt /app/requirements.txt
|
|
6
|
-
|
|
7
|
-
# Setup dependencies for pyodbc
|
|
8
|
-
RUN \
|
|
9
|
-
export ACCEPT_EULA='Y' && \
|
|
10
|
-
export MYSQL_CONNECTOR='mysql-connector-odbc-8.0.33-linux-glibc2.28-x86-64bit' && \
|
|
11
|
-
export MYSQL_CONNECTOR_CHECKSUM='41d03d5df0c631f8071cc697f7714620' && \
|
|
12
|
-
# Install build dependencies
|
|
13
|
-
apt-get update && \
|
|
14
|
-
apt-get install -y curl gcc libpq-dev build-essential unixodbc-dev g++ apt-transport-https && \
|
|
15
|
-
# Install pyodbc db drivers for MSSQL, PG and MySQL
|
|
16
|
-
curl -sSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /usr/share/keyrings/microsoft-prod.gpg && \
|
|
17
|
-
curl -sSL https://packages.microsoft.com/config/debian/12/prod.list | tee /etc/apt/sources.list.d/mssql-release.list && \
|
|
18
|
-
# install the mysql connector
|
|
19
|
-
curl -L -o ${MYSQL_CONNECTOR}.tar.gz https://dev.mysql.com/get/Downloads/Connector-ODBC/8.0/${MYSQL_CONNECTOR}.tar.gz && \
|
|
20
|
-
echo "${MYSQL_CONNECTOR_CHECKSUM} ${MYSQL_CONNECTOR}.tar.gz" | md5sum -c - && \
|
|
21
|
-
gunzip ${MYSQL_CONNECTOR}.tar.gz && tar xvf ${MYSQL_CONNECTOR}.tar && \
|
|
22
|
-
cp -r ${MYSQL_CONNECTOR}/bin/* /usr/local/bin && cp -r ${MYSQL_CONNECTOR}/lib/* /usr/local/lib && \
|
|
23
|
-
myodbc-installer -a -d -n "MySQL ODBC 8.0.33 Driver" -t "Driver=/usr/local/lib/libmyodbc8w.so" && \
|
|
24
|
-
myodbc-installer -a -d -n "MySQL ODBC 8.0.33" -t "Driver=/usr/local/lib/libmyodbc8a.so" && \
|
|
25
|
-
# install the rest of them
|
|
26
|
-
apt-get update && \
|
|
27
|
-
ACCEPT_EULA=Y apt-get install -y msodbcsql17 msodbcsql18 odbc-postgresql && \
|
|
28
|
-
# Update odbcinst.ini to make sure full path to driver is listed, and set CommLog to 0. i.e disables any communication logs to be written to files
|
|
29
|
-
sed 's/Driver=psql/Driver=\/usr\/lib\/x86_64-linux-gnu\/odbc\/psql/;s/CommLog=1/CommLog=0/' /etc/odbcinst.ini > /tmp/temp.ini && \
|
|
30
|
-
mv -f /tmp/temp.ini /etc/odbcinst.ini && \
|
|
31
|
-
# Cleanup build dependencies
|
|
32
|
-
rm -rf ${MYSQL_CONNECTOR}*
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
ENV VIRTUAL_ENV=/usr/local
|
|
36
|
-
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
|
|
37
|
-
RUN /install.sh && rm /install.sh
|
|
38
|
-
|
|
39
|
-
RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
|
|
40
|
-
|
|
41
|
-
COPY . /app
|
|
42
|
-
|
|
43
|
-
RUN pip3 install -e .
|
|
44
|
-
|
|
45
|
-
ENTRYPOINT ["ingestr"]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.7.3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|