ingestr 0.7.5__tar.gz → 0.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (98) hide show
  1. {ingestr-0.7.5 → ingestr-0.7.6}/PKG-INFO +11 -1
  2. {ingestr-0.7.5 → ingestr-0.7.6}/README.md +10 -0
  3. {ingestr-0.7.5 → ingestr-0.7.6}/docs/.vitepress/config.mjs +26 -8
  4. {ingestr-0.7.5 → ingestr-0.7.6}/docs/commands/example-uris.md +1 -1
  5. {ingestr-0.7.5 → ingestr-0.7.6}/docs/getting-started/quickstart.md +1 -1
  6. ingestr-0.7.6/docs/supported-sources/chess.md +37 -0
  7. ingestr-0.7.6/docs/supported-sources/hubspot.md +45 -0
  8. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/main.py +23 -2
  9. ingestr-0.7.6/ingestr/src/chess/__init__.py +166 -0
  10. ingestr-0.7.6/ingestr/src/chess/helpers.py +21 -0
  11. ingestr-0.7.6/ingestr/src/chess/settings.py +4 -0
  12. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/factory.py +6 -1
  13. ingestr-0.7.6/ingestr/src/hubspot/__init__.py +281 -0
  14. ingestr-0.7.6/ingestr/src/hubspot/helpers.py +188 -0
  15. ingestr-0.7.6/ingestr/src/hubspot/settings.py +99 -0
  16. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/sources.py +81 -0
  17. ingestr-0.7.6/ingestr/src/version.py +1 -0
  18. {ingestr-0.7.5 → ingestr-0.7.6}/package-lock.json +233 -118
  19. {ingestr-0.7.5 → ingestr-0.7.6}/package.json +1 -1
  20. {ingestr-0.7.5 → ingestr-0.7.6}/pyproject.toml +5 -1
  21. ingestr-0.7.5/docs/supported-sources/overview.md +0 -109
  22. ingestr-0.7.5/ingestr/src/version.py +0 -1
  23. {ingestr-0.7.5 → ingestr-0.7.6}/.dockerignore +0 -0
  24. {ingestr-0.7.5 → ingestr-0.7.6}/.github/workflows/deploy-docs.yml +0 -0
  25. {ingestr-0.7.5 → ingestr-0.7.6}/.github/workflows/tests.yml +0 -0
  26. {ingestr-0.7.5 → ingestr-0.7.6}/.gitignore +0 -0
  27. {ingestr-0.7.5 → ingestr-0.7.6}/.python-version +0 -0
  28. {ingestr-0.7.5 → ingestr-0.7.6}/Dockerfile +0 -0
  29. {ingestr-0.7.5 → ingestr-0.7.6}/LICENSE.md +0 -0
  30. {ingestr-0.7.5 → ingestr-0.7.6}/Makefile +0 -0
  31. {ingestr-0.7.5 → ingestr-0.7.6}/docs/.vitepress/theme/custom.css +0 -0
  32. {ingestr-0.7.5 → ingestr-0.7.6}/docs/.vitepress/theme/index.js +0 -0
  33. {ingestr-0.7.5 → ingestr-0.7.6}/docs/commands/ingest.md +0 -0
  34. {ingestr-0.7.5 → ingestr-0.7.6}/docs/getting-started/core-concepts.md +0 -0
  35. {ingestr-0.7.5 → ingestr-0.7.6}/docs/getting-started/incremental-loading.md +0 -0
  36. {ingestr-0.7.5 → ingestr-0.7.6}/docs/getting-started/telemetry.md +0 -0
  37. {ingestr-0.7.5 → ingestr-0.7.6}/docs/index.md +0 -0
  38. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/bigquery.md +0 -0
  39. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/csv.md +0 -0
  40. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/databricks.md +0 -0
  41. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/duckdb.md +0 -0
  42. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/gorgias.md +0 -0
  43. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/gsheets.md +0 -0
  44. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/mongodb.md +0 -0
  45. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/mssql.md +0 -0
  46. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/mysql.md +0 -0
  47. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/notion.md +0 -0
  48. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/oracle.md +0 -0
  49. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/postgres.md +0 -0
  50. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/redshift.md +0 -0
  51. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/sap-hana.md +0 -0
  52. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/shopify.md +0 -0
  53. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/snowflake.md +0 -0
  54. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/sqlite.md +0 -0
  55. {ingestr-0.7.5 → ingestr-0.7.6}/docs/supported-sources/stripe.md +0 -0
  56. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/destinations.py +0 -0
  57. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/google_sheets/README.md +0 -0
  58. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/google_sheets/__init__.py +0 -0
  59. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  60. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  61. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  62. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/gorgias/__init__.py +0 -0
  63. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/gorgias/helpers.py +0 -0
  64. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/mongodb/__init__.py +0 -0
  65. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/mongodb/helpers.py +0 -0
  66. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/notion/__init__.py +0 -0
  67. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/notion/helpers/__init__.py +0 -0
  68. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/notion/helpers/client.py +0 -0
  69. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/notion/helpers/database.py +0 -0
  70. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/notion/settings.py +0 -0
  71. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/shopify/__init__.py +0 -0
  72. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/shopify/exceptions.py +0 -0
  73. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/shopify/helpers.py +0 -0
  74. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/shopify/settings.py +0 -0
  75. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/sql_database/__init__.py +0 -0
  76. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/sql_database/arrow_helpers.py +0 -0
  77. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/sql_database/helpers.py +0 -0
  78. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/sql_database/override.py +0 -0
  79. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/sql_database/schema_types.py +0 -0
  80. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/stripe_analytics/__init__.py +0 -0
  81. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/stripe_analytics/helpers.py +0 -0
  82. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/stripe_analytics/settings.py +0 -0
  83. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/table_definition.py +0 -0
  84. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/telemetry/event.py +0 -0
  85. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  86. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/.gitignore +0 -0
  87. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/create_replace.csv +0 -0
  88. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/delete_insert_expected.csv +0 -0
  89. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/delete_insert_part1.csv +0 -0
  90. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/delete_insert_part2.csv +0 -0
  91. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/merge_expected.csv +0 -0
  92. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/merge_part1.csv +0 -0
  93. {ingestr-0.7.5 → ingestr-0.7.6}/ingestr/testdata/merge_part2.csv +0 -0
  94. {ingestr-0.7.5 → ingestr-0.7.6}/requirements-dev.txt +0 -0
  95. {ingestr-0.7.5 → ingestr-0.7.6}/requirements.txt +0 -0
  96. {ingestr-0.7.5 → ingestr-0.7.6}/resources/demo.gif +0 -0
  97. {ingestr-0.7.5 → ingestr-0.7.6}/resources/demo.tape +0 -0
  98. {ingestr-0.7.5 → ingestr-0.7.6}/resources/ingestr.svg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.5
3
+ Version: 0.7.6
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -173,6 +173,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
173
173
  <tr>
174
174
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
175
175
  </tr>
176
+ <tr>
177
+ <td>Chess.com</td>
178
+ <td>✅</td>
179
+ <td>-</td>
180
+ </tr>
176
181
  <tr>
177
182
  <td>Gorgias</td>
178
183
  <td>✅</td>
@@ -183,6 +188,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
183
188
  <td>✅</td>
184
189
  <td>-</td>
185
190
  </tr>
191
+ <tr>
192
+ <td>HubSpot</td>
193
+ <td>✅</td>
194
+ <td>-</td>
195
+ </tr>
186
196
  <tr>
187
197
  <td>Notion</td>
188
198
  <td>✅</td>
@@ -128,6 +128,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
128
128
  <tr>
129
129
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
130
130
  </tr>
131
+ <tr>
132
+ <td>Chess.com</td>
133
+ <td>✅</td>
134
+ <td>-</td>
135
+ </tr>
131
136
  <tr>
132
137
  <td>Gorgias</td>
133
138
  <td>✅</td>
@@ -138,6 +143,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
138
143
  <td>✅</td>
139
144
  <td>-</td>
140
145
  </tr>
146
+ <tr>
147
+ <td>HubSpot</td>
148
+ <td>✅</td>
149
+ <td>-</td>
150
+ </tr>
141
151
  <tr>
142
152
  <td>Notion</td>
143
153
  <td>✅</td>
@@ -6,7 +6,13 @@ export default defineConfig({
6
6
  description: "Ingest & copy data between any source and any destination",
7
7
  base: "/ingestr/",
8
8
  head: [
9
- ["script", { async: "", src: "https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ" }],
9
+ [
10
+ "script",
11
+ {
12
+ async: "",
13
+ src: "https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ",
14
+ },
15
+ ],
10
16
  [
11
17
  "script",
12
18
  {},
@@ -22,7 +28,7 @@ export default defineConfig({
22
28
  { text: "Home", link: "/" },
23
29
  { text: "Getting started", link: "/getting-started/quickstart.md" },
24
30
  ],
25
- outline: 'deep',
31
+ outline: "deep",
26
32
 
27
33
  sidebar: [
28
34
  {
@@ -30,7 +36,10 @@ export default defineConfig({
30
36
  items: [
31
37
  { text: "Quickstart", link: "/getting-started/quickstart.md" },
32
38
  { text: "Core Concepts", link: "/getting-started/core-concepts.md" },
33
- { text: "Incremental Loading", link: "/getting-started/incremental-loading.md" },
39
+ {
40
+ text: "Incremental Loading",
41
+ link: "/getting-started/incremental-loading.md",
42
+ },
34
43
  { text: "Telemetry", link: "/getting-started/telemetry.md" },
35
44
  ],
36
45
  },
@@ -44,7 +53,6 @@ export default defineConfig({
44
53
  {
45
54
  text: "Sources & Destinations",
46
55
  items: [
47
- { text: "Overview", link: "/supported-sources/overview.md" },
48
56
  {
49
57
  text: "Databases",
50
58
  collapsed: false,
@@ -52,9 +60,15 @@ export default defineConfig({
52
60
  { text: "AWS Redshift", link: "/supported-sources/redshift.md" },
53
61
  { text: "Databricks", link: "/supported-sources/databricks.md" },
54
62
  { text: "DuckDB", link: "/supported-sources/duckdb.md" },
55
- { text: "Google BigQuery", link: "/supported-sources/bigquery.md" },
63
+ {
64
+ text: "Google BigQuery",
65
+ link: "/supported-sources/bigquery.md",
66
+ },
56
67
  { text: "Local CSV Files", link: "/supported-sources/csv.md" },
57
- { text: "Microsoft SQL Server", link: "/supported-sources/mssql.md" },
68
+ {
69
+ text: "Microsoft SQL Server",
70
+ link: "/supported-sources/mssql.md",
71
+ },
58
72
  { text: "MongoDB", link: "/supported-sources/mongodb.md" },
59
73
  { text: "MySQL", link: "/supported-sources/mysql.md" },
60
74
  { text: "Oracle", link: "/supported-sources/oracle.md" },
@@ -69,8 +83,10 @@ export default defineConfig({
69
83
  text: "Platforms",
70
84
  collapsed: false,
71
85
  items: [
72
- { text: "Gorgias", link: "/supported-sources/gorgias.md" },
86
+ { text: "Chess.com", link: "/supported-sources/chess.md" },
73
87
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
88
+ { text: "Gorgias", link: "/supported-sources/gorgias.md" },
89
+ { text: "HubSpot", link: "/supported-sources/hubspot.md" },
74
90
  { text: "Notion", link: "/supported-sources/notion.md" },
75
91
  { text: "Shopify", link: "/supported-sources/shopify.md" },
76
92
  { text: "Stripe", link: "/supported-sources/stripe.md" },
@@ -80,6 +96,8 @@ export default defineConfig({
80
96
  },
81
97
  ],
82
98
 
83
- socialLinks: [{ icon: "github", link: "https://github.com/bruin-data/ingestr" }],
99
+ socialLinks: [
100
+ { icon: "github", link: "https://github.com/bruin-data/ingestr" },
101
+ ],
84
102
  },
85
103
  });
@@ -2,4 +2,4 @@
2
2
 
3
3
  This command is supposed to serve as a guide for the user to understand the various URI formats that are supported by the `ingestr` tool. The command will provide a list of supported sources and destinations, along with the URI format for each of them.
4
4
 
5
- For the detailed documentation, please refer to the [Sources & Destinations](../supported-sources/overview.md) section.
5
+ For the detailed documentation, please refer to the Sources & Destinations section on the sidebar.
@@ -36,4 +36,4 @@ This command will:
36
36
 
37
37
  ## Supported Sources & Destinations
38
38
 
39
- See the [Supported Sources & Destinations](/supported-sources/overview.md) page for a list of all supported sources and destinations. More to come soon!
39
+ See the Supported Sources & Destinations page for a list of all supported sources and destinations. More to come soon!
@@ -0,0 +1,37 @@
1
+ # Chess.com
2
+
3
+ [Chess.com](https://www.chess.com/) is an online platform offering chess games, tournaments, lessons, and more.
4
+
5
+ ingestr supports Chess.com as a source, primarily to play around with the data of players, games, and more since it doesn't require any authentication.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Chess is as follows:
10
+
11
+ ```plaintext
12
+ --source-uri 'chess://?players=<List[str]>'
13
+ ```
14
+
15
+ URI parameter:
16
+
17
+ - `players`: A list of players usernames for which you want to fetch data. If no usernames are provided, then data of 4 different players will be fetched.
18
+
19
+ ## Setting up a Chess Integration
20
+
21
+ Let's say you have a list of player usernames: max2 and peter23. Here's a sample command that will copy the data from Chess into a DuckDB database:
22
+
23
+ ```sh
24
+ ingestr ingest --source-uri 'chess://?players=max2,peter23' --source-table 'profiles' --dest-uri 'duckdb:///chess.duckdb' --dest-table 'players.profiles'
25
+ ```
26
+
27
+ The result of this command will be a table in the `chess.duckdb` database.
28
+
29
+ ## Available Tables
30
+
31
+ Chess source allows ingesting the following sources into separate tables:
32
+
33
+ - `profiles`: Retrives player profiles based on a list of player usernames.
34
+ - `games`: Retrives players' games for specified players.
35
+ - `archives`: Retrives url to game archives for specified players.
36
+
37
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -0,0 +1,45 @@
1
+ # HubSpot
2
+
3
+ [HubSpot](https://www.hubspot.com/) is a customer relationship management software that helps businesses attract visitors, connect with customers, and close deals.
4
+
5
+ ingestr supports HubSpot as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for HubSpot is as follows:
10
+
11
+ ```plaintext
12
+ hubspot://?api_key=<api-key-here>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `api_key`: The API key is used for authentication with the HubSpot API.
18
+
19
+ The URI is used to connect to the HubSpot API for extracting data.
20
+
21
+ ## Setting up a HubSpot Integration
22
+
23
+ Hubspot requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/hubspot#setup-guide).
24
+
25
+ Once you complete the guide, you should have an API key. Let's say your API key is `pat_test_12345`, here's a sample command that will copy the data from HubSpot into a duckdb database:
26
+
27
+ ```sh
28
+ ingestr ingest --source-uri 'hubspot://?api_key=pat_test_12345' --source-table 'companies' --dest-uri duckdb:///hubspot.duckdb --dest-table 'companies.data'
29
+ ```
30
+
31
+ The result of this command will be a table in the `hubspot.duckdb` database.
32
+
33
+ ## Available Tables
34
+
35
+ HubSpot source allows ingesting the following sources into separate tables:
36
+
37
+ - `companies`: Retrieves information about organizations.
38
+ - `deals`: Retrieves deal records and tracks deal progress.
39
+ - `products`: Retrieves pricing information of products.
40
+ - `tickets`: Handles requests for help from customers or users.
41
+ - `quotes`: Retrieves price proposals that salespeople can create and send to their contacts.
42
+ - `hubspot_events_for_objects`: Retrieves web analytics events for a given object type and object IDs.
43
+ - `contacts`: Retrieves information about visitors, potential customers, and leads.
44
+
45
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
+ import tempfile
4
5
  from typing import Optional
5
6
 
6
7
  import dlt
@@ -236,6 +237,13 @@ def ingest(
236
237
  envvar="SCHEMA_NAMING",
237
238
  ),
238
239
  ] = SchemaNaming.default, # type: ignore
240
+ pipelines_dir: Annotated[
241
+ Optional[str],
242
+ typer.Option(
243
+ help="The path to store dlt-related pipeline metadata. By default, ingestr will create a temporary directory and delete it after the execution is done in order to make retries stateless.",
244
+ envvar="PIPELINES_DIR",
245
+ ),
246
+ ] = None, # type: ignore
239
247
  ):
240
248
  track(
241
249
  "command_triggered",
@@ -280,13 +288,18 @@ def ingest(
280
288
  if progress == Progress.log:
281
289
  progressInstance = LogCollector(dump_system_stats=False)
282
290
 
291
+ is_pipelines_dir_temp = False
292
+ if pipelines_dir is None:
293
+ pipelines_dir = tempfile.mkdtemp()
294
+ is_pipelines_dir_temp = True
295
+
283
296
  pipeline = dlt.pipeline(
284
297
  pipeline_name=m.hexdigest(),
285
298
  destination=destination.dlt_dest(
286
299
  uri=dest_uri,
287
300
  ),
288
301
  progress=progressInstance,
289
- pipelines_dir="pipeline_data",
302
+ pipelines_dir=pipelines_dir,
290
303
  refresh="drop_resources" if full_refresh else None,
291
304
  )
292
305
 
@@ -362,6 +375,8 @@ def ingest(
362
375
  if incremental_strategy != IncrementalStrategy.none:
363
376
  write_disposition = incremental_strategy.value
364
377
 
378
+ start_time = datetime.now()
379
+
365
380
  run_info: LoadInfo = pipeline.run(
366
381
  dlt_source,
367
382
  **destination.dlt_run_params(
@@ -389,11 +404,17 @@ def ingest(
389
404
 
390
405
  destination.post_load()
391
406
 
407
+ end_time = datetime.now()
392
408
  elapsedHuman = ""
393
409
  if run_info.started_at:
394
- elapsed = run_info.finished_at - run_info.started_at
410
+ elapsed = end_time - start_time
395
411
  elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
396
412
 
413
+ # remove the pipelines_dir folder if it was created by ingestr
414
+ if is_pipelines_dir_temp:
415
+ import shutil
416
+ shutil.rmtree(pipelines_dir)
417
+
397
418
  print(
398
419
  f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
399
420
  )
@@ -0,0 +1,166 @@
1
+ """A source loading player profiles and games from chess.com api"""
2
+
3
+ from typing import Any, Callable, Dict, Iterator, List, Sequence
4
+
5
+ import dlt
6
+ from dlt.common import pendulum
7
+ from dlt.common.typing import TDataItem
8
+ from dlt.sources import DltResource
9
+ from dlt.sources.helpers import requests
10
+
11
+ from .helpers import get_path_with_retry, get_url_with_retry, validate_month_string
12
+ from .settings import UNOFFICIAL_CHESS_API_URL
13
+
14
+
15
+ @dlt.source(name="chess")
16
+ def source(
17
+ players: List[str], start_month: str = None, end_month: str = None
18
+ ) -> Sequence[DltResource]:
19
+ """
20
+ A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing
21
+ various types of data: user profiles or chess match results
22
+ Args:
23
+ players (List[str]): A list of the player usernames for which to get the data.
24
+ start_month (str, optional): Filters out all the matches happening before `start_month`. Defaults to None.
25
+ end_month (str, optional): Filters out all the matches happening after `end_month`. Defaults to None.
26
+ Returns:
27
+ Sequence[DltResource]: A sequence of resources that can be selected from including players_profiles,
28
+ players_archives, players_games, players_online_status
29
+ """
30
+ return (
31
+ players_profiles(players),
32
+ players_archives(players),
33
+ players_games(players, start_month=start_month, end_month=end_month),
34
+ players_online_status(players),
35
+ )
36
+
37
+
38
+ @dlt.resource(
39
+ write_disposition="replace",
40
+ columns={
41
+ "last_online": {"data_type": "timestamp"},
42
+ "joined": {"data_type": "timestamp"},
43
+ },
44
+ )
45
+ def players_profiles(players: List[str]) -> Iterator[TDataItem]:
46
+ """
47
+ Yields player profiles for a list of player usernames.
48
+ Args:
49
+ players (List[str]): List of player usernames to retrieve profiles for.
50
+ Yields:
51
+ Iterator[TDataItem]: An iterator over player profiles data.
52
+ """
53
+
54
+ # get archives in parallel by decorating the http request with defer
55
+ @dlt.defer
56
+ def _get_profile(username: str) -> TDataItem:
57
+ return get_path_with_retry(f"player/{username}")
58
+
59
+ for username in players:
60
+ yield _get_profile(username)
61
+
62
+
63
+ @dlt.resource(write_disposition="replace", selected=False)
64
+ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
65
+ """
66
+ Yields url to game archives for specified players.
67
+ Args:
68
+ players (List[str]): List of player usernames to retrieve archives for.
69
+ Yields:
70
+ Iterator[List[TDataItem]]: An iterator over list of player archive data.
71
+ """
72
+ for username in players:
73
+ data = get_path_with_retry(f"player/{username}/games/archives")
74
+ yield data.get("archives", [])
75
+
76
+
77
+ @dlt.resource(
78
+ write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
79
+ )
80
+ def players_games(
81
+ players: List[str], start_month: str = None, end_month: str = None
82
+ ) -> Iterator[Callable[[], List[TDataItem]]]:
83
+ """
84
+ Yields `players` games that happened between `start_month` and `end_month`.
85
+ Args:
86
+ players (List[str]): List of player usernames to retrieve games for.
87
+ start_month (str, optional): The starting month in the format "YYYY/MM". Defaults to None.
88
+ end_month (str, optional): The ending month in the format "YYYY/MM". Defaults to None.
89
+ Yields:
90
+ Iterator[Callable[[], List[TDataItem]]]: An iterator over callables that return a list of games for each player.
91
+ """ # do a simple validation to prevent common mistakes in month format
92
+ validate_month_string(start_month)
93
+ validate_month_string(end_month)
94
+
95
+ # get a list of already checked archives
96
+ # from your point of view, the state is python dictionary that will have the same content the next time this function is called
97
+ checked_archives = dlt.current.resource_state().setdefault("archives", [])
98
+ # get player archives, note that you can call the resource like any other function and just iterate it like a list
99
+ archives = players_archives(players)
100
+
101
+ # get archives in parallel by decorating the http request with defer
102
+ @dlt.defer
103
+ def _get_archive(url: str) -> List[TDataItem]:
104
+ try:
105
+ games = get_url_with_retry(url).get("games", [])
106
+ return games # type: ignore
107
+ except requests.HTTPError as http_err:
108
+ # sometimes archives are not available and the error seems to be permanent
109
+ if http_err.response.status_code == 404:
110
+ return []
111
+ raise
112
+
113
+ # enumerate the archives
114
+ for url in archives:
115
+ # the `url` format is https://api.chess.com/pub/player/{username}/games/{YYYY}/{MM}
116
+ if start_month and url[-7:] < start_month:
117
+ continue
118
+ if end_month and url[-7:] > end_month:
119
+ continue
120
+ # do not download archive again
121
+ if url in checked_archives:
122
+ continue
123
+ checked_archives.append(url)
124
+ # get the filtered archive
125
+ yield _get_archive(url)
126
+
127
+
128
+ @dlt.resource(write_disposition="append")
129
+ def players_online_status(players: List[str]) -> Iterator[TDataItem]:
130
+ """
131
+ Returns current online status for a list of players.
132
+ Args:
133
+ players (List[str]): List of player usernames to check online status for.
134
+ Yields:
135
+ Iterator[TDataItem]: An iterator over the online status of each player.
136
+ """
137
+ # we'll use unofficial endpoint to get online status, the official seems to be removed
138
+ for player in players:
139
+ status = get_url_with_retry(f"{UNOFFICIAL_CHESS_API_URL}user/popup/{player}")
140
+ # return just relevant selection
141
+ yield {
142
+ "username": player,
143
+ "onlineStatus": status["onlineStatus"],
144
+ "lastLoginDate": status["lastLoginDate"],
145
+ "check_time": pendulum.now(), # dlt can deal with native python dates
146
+ }
147
+
148
+
149
+ @dlt.source
150
+ def chess_dlt_config_example(
151
+ secret_str: str = dlt.secrets.value,
152
+ secret_dict: Dict[str, Any] = dlt.secrets.value,
153
+ config_int: int = dlt.config.value,
154
+ ) -> DltResource:
155
+ """
156
+ An example of a source that uses dlt to provide secrets and config values.
157
+ Args:
158
+ secret_str (str, optional): Secret string provided by dlt.secrets.value. Defaults to dlt.secrets.value.
159
+ secret_dict (Dict[str, Any], optional): Secret dictionary provided by dlt.secrets.value. Defaults to dlt.secrets.value.
160
+ config_int (int, optional): Config integer provided by dlt.config.value. Defaults to dlt.config.value.
161
+ Returns:
162
+ DltResource: Returns a resource yielding the configured values.
163
+ """
164
+
165
+ # returns a resource yielding the configured values - it is just a test
166
+ return dlt.resource([secret_str, secret_dict, config_int], name="config_values")
@@ -0,0 +1,21 @@
1
+ """Chess source helpers"""
2
+
3
+ from dlt.common.typing import StrAny
4
+ from dlt.sources.helpers import requests
5
+
6
+ from .settings import OFFICIAL_CHESS_API_URL
7
+
8
+
9
+ def get_url_with_retry(url: str) -> StrAny:
10
+ r = requests.get(url)
11
+ return r.json() # type: ignore
12
+
13
+
14
+ def get_path_with_retry(path: str) -> StrAny:
15
+ return get_url_with_retry(f"{OFFICIAL_CHESS_API_URL}{path}")
16
+
17
+
18
+ def validate_month_string(string: str) -> None:
19
+ """Validates that the string is in YYYY/MM format"""
20
+ if string and string[4] != "/":
21
+ raise ValueError(string)
@@ -0,0 +1,4 @@
1
+ """Chess source settings and constants"""
2
+
3
+ OFFICIAL_CHESS_API_URL = "https://api.chess.com/pub/"
4
+ UNOFFICIAL_CHESS_API_URL = "https://www.chess.com/callback/"
@@ -15,8 +15,10 @@ from ingestr.src.destinations import (
15
15
  SynapseDestination,
16
16
  )
17
17
  from ingestr.src.sources import (
18
+ ChessSource,
18
19
  GoogleSheetsSource,
19
20
  GorgiasSource,
21
+ HubspotSource,
20
22
  LocalCsvSource,
21
23
  MongoDbSource,
22
24
  NotionSource,
@@ -103,9 +105,12 @@ class SourceDestinationFactory:
103
105
  return ShopifySource()
104
106
  elif self.source_scheme == "gorgias":
105
107
  return GorgiasSource()
108
+ elif self.source_scheme == "chess":
109
+ return ChessSource()
106
110
  elif self.source_scheme == "stripe":
107
111
  return StripeAnalyticsSource()
108
-
112
+ elif self.source_scheme == "hubspot":
113
+ return HubspotSource()
109
114
  else:
110
115
  raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
111
116