ingestr 0.13.93__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -1,157 +1,157 @@
1
- from typing import Any, Iterator, Optional
2
-
3
- import dlt
4
- from dlt.common.pendulum import pendulum
5
- from dlt.common.time import ensure_pendulum_datetime
6
- from dlt.common.typing import TAnyDateTime
7
-
8
- from ingestr.src.frankfurter.helpers import get_path_with_retry
9
-
10
-
11
- @dlt.source(
12
- name="frankfurter",
13
- max_table_nesting=0,
14
- )
15
- def frankfurter_source(
16
- start_date: TAnyDateTime,
17
- end_date: TAnyDateTime | None,
18
- base_currency: str,
19
- ) -> Any:
20
- """
21
- A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
22
- various types of data: currencies, latest rates, historical rates.
23
- """
24
-
25
- @dlt.resource(
26
- write_disposition="replace",
27
- )
28
- def currencies() -> Iterator[dict]:
29
- """
30
- Yields each currency as a separate row with two columns: currency_code and currency_name.
31
- """
32
- # Retrieve the list of currencies from the API
33
- currencies_data = get_path_with_retry("currencies")
34
-
35
- for currency_code, currency_name in currencies_data.items():
36
- yield {"currency_code": currency_code, "currency_name": currency_name}
37
-
38
- @dlt.resource(
39
- write_disposition="merge",
40
- columns={
41
- "date": {"data_type": "text"},
42
- "currency_code": {"data_type": "text"},
43
- "rate": {"data_type": "double"},
44
- "base_currency": {"data_type": "text"},
45
- },
46
- primary_key=["date", "currency_code", "base_currency"],
47
- )
48
- def latest(base_currency: Optional[str] = "") -> Iterator[dict]:
49
- """
50
- Fetches the latest exchange rates and yields them as rows.
51
- """
52
- # Base URL
53
- url = "latest?"
54
-
55
- if base_currency:
56
- url += f"base={base_currency}"
57
-
58
- # Fetch data
59
- data = get_path_with_retry(url)
60
-
61
- # Extract rates and base currency
62
- rates = data["rates"]
63
- date = pendulum.parse(data["date"])
64
-
65
- # Add the base currency with a rate of 1.0
66
- yield {
67
- "date": date,
68
- "currency_code": base_currency,
69
- "rate": 1.0,
70
- "base_currency": base_currency,
71
- }
72
-
73
- # Add all currencies and their rates
74
- for currency_code, rate in rates.items():
75
- yield {
76
- "date": date,
77
- "currency_code": currency_code,
78
- "rate": rate,
79
- "base_currency": base_currency,
80
- }
81
-
82
- @dlt.resource(
83
- write_disposition="merge",
84
- columns={
85
- "date": {"data_type": "text"},
86
- "currency_code": {"data_type": "text"},
87
- "rate": {"data_type": "double"},
88
- "base_currency": {"data_type": "text"},
89
- },
90
- primary_key=("date", "currency_code", "base_currency"),
91
- )
92
- def exchange_rates(
93
- date_time=dlt.sources.incremental(
94
- "date",
95
- initial_value=start_date,
96
- end_value=end_date,
97
- range_start="closed",
98
- range_end="closed",
99
- ),
100
- ) -> Iterator[dict]:
101
- """
102
- Fetches exchange rates for a specified date range.
103
- If only start_date is provided, fetches data until now.
104
- If both start_date and end_date are provided, fetches data for each day in the range.
105
- """
106
- if date_time.last_value is not None:
107
- start_date = date_time.last_value
108
- else:
109
- start_date = start_date
110
-
111
- if date_time.end_value is not None:
112
- end_date = date_time.end_value
113
- else:
114
- end_date = pendulum.now()
115
-
116
- # Ensure start_date.last_value is a pendulum.DateTime object
117
- start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
118
- start_date_str = start_date_obj.format("YYYY-MM-DD")
119
-
120
- # Ensure end_date is a pendulum.DateTime object
121
- end_date_obj = ensure_pendulum_datetime(end_date)
122
- end_date_str = end_date_obj.format("YYYY-MM-DD")
123
-
124
- # Compose the URL
125
- url = f"{start_date_str}..{end_date_str}?"
126
-
127
- if base_currency:
128
- url += f"base={base_currency}"
129
-
130
- # Fetch data from the API
131
- data = get_path_with_retry(url)
132
-
133
- # Extract base currency and rates from the API response
134
- rates = data["rates"]
135
-
136
- # Iterate over the rates dictionary (one entry per date)
137
- for date, daily_rates in rates.items():
138
- formatted_date = pendulum.parse(date)
139
-
140
- # Add the base currency with a rate of 1.0
141
- yield {
142
- "date": formatted_date,
143
- "currency_code": base_currency,
144
- "rate": 1.0,
145
- "base_currency": base_currency,
146
- }
147
-
148
- # Add all other currencies and their rates
149
- for currency_code, rate in daily_rates.items():
150
- yield {
151
- "date": formatted_date,
152
- "currency_code": currency_code,
153
- "rate": rate,
154
- "base_currency": base_currency,
155
- }
156
-
157
- return currencies, latest, exchange_rates
1
+ from typing import Any, Iterator, Optional
2
+
3
+ import dlt
4
+ from dlt.common.pendulum import pendulum
5
+ from dlt.common.time import ensure_pendulum_datetime
6
+ from dlt.common.typing import TAnyDateTime
7
+
8
+ from ingestr.src.frankfurter.helpers import get_path_with_retry
9
+
10
+
11
+ @dlt.source(
12
+ name="frankfurter",
13
+ max_table_nesting=0,
14
+ )
15
+ def frankfurter_source(
16
+ start_date: TAnyDateTime,
17
+ end_date: TAnyDateTime | None,
18
+ base_currency: str,
19
+ ) -> Any:
20
+ """
21
+ A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
22
+ various types of data: currencies, latest rates, historical rates.
23
+ """
24
+
25
+ @dlt.resource(
26
+ write_disposition="replace",
27
+ )
28
+ def currencies() -> Iterator[dict]:
29
+ """
30
+ Yields each currency as a separate row with two columns: currency_code and currency_name.
31
+ """
32
+ # Retrieve the list of currencies from the API
33
+ currencies_data = get_path_with_retry("currencies")
34
+
35
+ for currency_code, currency_name in currencies_data.items():
36
+ yield {"currency_code": currency_code, "currency_name": currency_name}
37
+
38
+ @dlt.resource(
39
+ write_disposition="merge",
40
+ columns={
41
+ "date": {"data_type": "text"},
42
+ "currency_code": {"data_type": "text"},
43
+ "rate": {"data_type": "double"},
44
+ "base_currency": {"data_type": "text"},
45
+ },
46
+ primary_key=["date", "currency_code", "base_currency"],
47
+ )
48
+ def latest(base_currency: Optional[str] = "") -> Iterator[dict]:
49
+ """
50
+ Fetches the latest exchange rates and yields them as rows.
51
+ """
52
+ # Base URL
53
+ url = "latest?"
54
+
55
+ if base_currency:
56
+ url += f"base={base_currency}"
57
+
58
+ # Fetch data
59
+ data = get_path_with_retry(url)
60
+
61
+ # Extract rates and base currency
62
+ rates = data["rates"]
63
+ date = pendulum.parse(data["date"])
64
+
65
+ # Add the base currency with a rate of 1.0
66
+ yield {
67
+ "date": date,
68
+ "currency_code": base_currency,
69
+ "rate": 1.0,
70
+ "base_currency": base_currency,
71
+ }
72
+
73
+ # Add all currencies and their rates
74
+ for currency_code, rate in rates.items():
75
+ yield {
76
+ "date": date,
77
+ "currency_code": currency_code,
78
+ "rate": rate,
79
+ "base_currency": base_currency,
80
+ }
81
+
82
+ @dlt.resource(
83
+ write_disposition="merge",
84
+ columns={
85
+ "date": {"data_type": "text"},
86
+ "currency_code": {"data_type": "text"},
87
+ "rate": {"data_type": "double"},
88
+ "base_currency": {"data_type": "text"},
89
+ },
90
+ primary_key=("date", "currency_code", "base_currency"),
91
+ )
92
+ def exchange_rates(
93
+ date_time=dlt.sources.incremental(
94
+ "date",
95
+ initial_value=start_date,
96
+ end_value=end_date,
97
+ range_start="closed",
98
+ range_end="closed",
99
+ ),
100
+ ) -> Iterator[dict]:
101
+ """
102
+ Fetches exchange rates for a specified date range.
103
+ If only start_date is provided, fetches data until now.
104
+ If both start_date and end_date are provided, fetches data for each day in the range.
105
+ """
106
+ if date_time.last_value is not None:
107
+ start_date = date_time.last_value
108
+ else:
109
+ start_date = start_date
110
+
111
+ if date_time.end_value is not None:
112
+ end_date = date_time.end_value
113
+ else:
114
+ end_date = pendulum.now()
115
+
116
+ # Ensure start_date.last_value is a pendulum.DateTime object
117
+ start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
118
+ start_date_str = start_date_obj.format("YYYY-MM-DD")
119
+
120
+ # Ensure end_date is a pendulum.DateTime object
121
+ end_date_obj = ensure_pendulum_datetime(end_date)
122
+ end_date_str = end_date_obj.format("YYYY-MM-DD")
123
+
124
+ # Compose the URL
125
+ url = f"{start_date_str}..{end_date_str}?"
126
+
127
+ if base_currency:
128
+ url += f"base={base_currency}"
129
+
130
+ # Fetch data from the API
131
+ data = get_path_with_retry(url)
132
+
133
+ # Extract base currency and rates from the API response
134
+ rates = data["rates"]
135
+
136
+ # Iterate over the rates dictionary (one entry per date)
137
+ for date, daily_rates in rates.items():
138
+ formatted_date = pendulum.parse(date)
139
+
140
+ # Add the base currency with a rate of 1.0
141
+ yield {
142
+ "date": formatted_date,
143
+ "currency_code": base_currency,
144
+ "rate": 1.0,
145
+ "base_currency": base_currency,
146
+ }
147
+
148
+ # Add all other currencies and their rates
149
+ for currency_code, rate in daily_rates.items():
150
+ yield {
151
+ "date": formatted_date,
152
+ "currency_code": currency_code,
153
+ "rate": rate,
154
+ "base_currency": base_currency,
155
+ }
156
+
157
+ return currencies, latest, exchange_rates
@@ -0,0 +1,49 @@
1
+ """Fundraiseup source for ingesting donations, events, fundraisers, recurring plans, and supporters."""
2
+
3
+ from typing import Any, Dict, Generator, Iterable
4
+
5
+ import dlt
6
+ from dlt.sources import DltResource
7
+
8
+ from .client import FundraiseupClient
9
+
10
+
11
+ @dlt.source(name="fundraiseup", max_table_nesting=0)
12
+ def fundraiseup_source(api_key: str) -> Iterable[DltResource]:
13
+ """
14
+ Return resources for Fundraiseup API.
15
+
16
+ Args:
17
+ api_key: API key for authentication
18
+
19
+ Returns:
20
+ Iterable of DLT resources
21
+ """
22
+ client = FundraiseupClient(api_key=api_key)
23
+
24
+ # Define available resources and their configurations
25
+ resources = {
26
+ "donations": {"write_disposition": "replace", "primary_key": "id"},
27
+ "events": {"write_disposition": "replace", "primary_key": "id"},
28
+ "fundraisers": {"write_disposition": "replace", "primary_key": "id"},
29
+ "recurring_plans": {"write_disposition": "replace", "primary_key": "id"},
30
+ "supporters": {"write_disposition": "replace", "primary_key": "id"},
31
+ }
32
+
33
+ def create_resource(resource_name: str, config: Dict[str, Any]) -> DltResource:
34
+ """Create a DLT resource dynamically."""
35
+
36
+ @dlt.resource(
37
+ name=resource_name,
38
+ write_disposition=config["write_disposition"],
39
+ primary_key=config["primary_key"],
40
+ )
41
+ def generic_resource() -> Generator[Dict[str, Any], None, None]:
42
+ """Generic resource that yields batches directly."""
43
+ for batch in client.get_paginated_data(resource_name):
44
+ yield batch # type: ignore[misc]
45
+
46
+ return generic_resource()
47
+
48
+ # Return all resources
49
+ return [create_resource(name, config) for name, config in resources.items()]
@@ -0,0 +1,81 @@
1
+ """Fundraiseup API Client for handling authentication and paginated requests."""
2
+
3
+ from typing import Any, Dict, Iterator, Optional
4
+
5
+ from ingestr.src.http_client import create_client
6
+
7
+
8
+ class FundraiseupClient:
9
+ """Client for interacting with Fundraiseup API v1."""
10
+
11
+ def __init__(self, api_key: str):
12
+ """
13
+ Initialize Fundraiseup API client.
14
+
15
+ Args:
16
+ api_key: API key for authentication
17
+ """
18
+ self.api_key = api_key
19
+ self.base_url = "https://api.fundraiseup.com/v1"
20
+ # Use shared HTTP client with retry logic for rate limiting
21
+ self.client = create_client(retry_status_codes=[429, 500, 502, 503, 504])
22
+
23
+ def get_paginated_data(
24
+ self,
25
+ endpoint: str,
26
+ params: Optional[Dict[str, Any]] = None,
27
+ page_size: int = 100,
28
+ ) -> Iterator[list[Dict[str, Any]]]:
29
+ """
30
+ Fetch paginated data from a Fundraiseup API endpoint using cursor-based pagination.
31
+
32
+ Args:
33
+ endpoint: API endpoint path (e.g., "donations")
34
+ params: Additional query parameters
35
+ page_size: Number of items per page (default 100)
36
+
37
+ Yields:
38
+ Batches of items from the API
39
+ """
40
+ url = f"{self.base_url}/{endpoint}"
41
+ headers = {
42
+ "Authorization": f"Bearer {self.api_key}",
43
+ "Content-Type": "application/json",
44
+ }
45
+
46
+ if params is None:
47
+ params = {}
48
+
49
+ params["limit"] = page_size
50
+ starting_after = None
51
+
52
+ while True:
53
+ # Add cursor for pagination if not first page
54
+ if starting_after:
55
+ params["starting_after"] = starting_after
56
+
57
+ response = self.client.get(url=url, headers=headers, params=params)
58
+ response.raise_for_status()
59
+
60
+ data = response.json()
61
+
62
+ # Handle both list response and object with data array
63
+ if isinstance(data, list):
64
+ items = data
65
+ has_more = len(items) == page_size
66
+ else:
67
+ items = data.get("data", [])
68
+ has_more = data.get("has_more", False)
69
+
70
+ if not items:
71
+ break
72
+
73
+ yield items
74
+
75
+ # Set cursor for next page
76
+ if has_more and items:
77
+ starting_after = items[-1].get("id")
78
+ if not starting_after:
79
+ break
80
+ else:
81
+ break
@@ -7,7 +7,7 @@ from typing import Iterator, List, Optional, Union
7
7
  import dlt
8
8
  from dlt.common import pendulum
9
9
  from dlt.common.typing import DictStrAny, TDataItem
10
- from dlt.extract import DltResource
10
+ from dlt.sources import DltResource
11
11
  from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
12
12
  from google.analytics.data_v1beta import BetaAnalyticsDataClient
13
13
  from google.analytics.data_v1beta.types import (
@@ -107,7 +107,7 @@ def mongodb_collection(
107
107
  projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
108
108
  pymongoarrow_schema: Optional[Any] = None,
109
109
  custom_query: Optional[List[Dict[str, Any]]] = None,
110
- ) -> Any:
110
+ ) -> DltResource:
111
111
  """
112
112
  A DLT source which loads a collection from a mongo database using PyMongo.
113
113