ingestr 0.13.34__py3-none-any.whl → 0.13.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +14 -6
- ingestr/src/factory.py +2 -0
- ingestr/src/frankfurter/__init__.py +44 -36
- ingestr/src/frankfurter/helpers.py +2 -2
- ingestr/src/freshdesk/__init__.py +72 -0
- ingestr/src/freshdesk/freshdesk_client.py +102 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/google_analytics/__init__.py +21 -3
- ingestr/src/google_analytics/helpers.py +121 -6
- ingestr/src/sources.py +95 -60
- {ingestr-0.13.34.dist-info → ingestr-0.13.36.dist-info}/METADATA +2 -2
- {ingestr-0.13.34.dist-info → ingestr-0.13.36.dist-info}/RECORD +17 -14
- {ingestr-0.13.34.dist-info → ingestr-0.13.36.dist-info}/WHEEL +0 -0
- {ingestr-0.13.34.dist-info → ingestr-0.13.36.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.34.dist-info → ingestr-0.13.36.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/airtable/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ import pyairtable
|
|
|
9
9
|
from dlt.sources import DltResource
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
@dlt.source
|
|
12
|
+
@dlt.source(max_table_nesting=1)
|
|
13
13
|
def airtable_source(
|
|
14
14
|
base_id: str = dlt.config.value,
|
|
15
15
|
table_names: Optional[List[str]] = dlt.config.value,
|
|
@@ -50,12 +50,13 @@ def airtable_resource(
|
|
|
50
50
|
It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
|
|
51
51
|
table (Dict[str, Any]): Metadata about an airtable, does not contain the actual records
|
|
52
52
|
"""
|
|
53
|
+
|
|
53
54
|
primary_key_id = table["primaryFieldId"]
|
|
54
55
|
primary_key_field = [
|
|
55
56
|
field for field in table["fields"] if field["id"] == primary_key_id
|
|
56
57
|
][0]
|
|
57
58
|
table_name: str = table["name"]
|
|
58
|
-
primary_key: List[str] = [f"fields__{primary_key_field['name']}"]
|
|
59
|
+
primary_key: List[str] = [f"fields__{primary_key_field['name']}".lower()]
|
|
59
60
|
air_table = api.table(base_id, table["id"])
|
|
60
61
|
|
|
61
62
|
# Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.36"
|
ingestr/src/destinations.py
CHANGED
|
@@ -235,12 +235,19 @@ class AthenaDestination:
|
|
|
235
235
|
if not bucket.startswith("s3://"):
|
|
236
236
|
bucket = f"s3://{bucket}"
|
|
237
237
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
238
|
+
bucket = bucket.rstrip("/")
|
|
239
|
+
|
|
240
|
+
dest_table = kwargs.get("dest_table", None)
|
|
241
|
+
if not dest_table:
|
|
242
|
+
raise ValueError("A destination table is required to connect to Athena.")
|
|
243
|
+
|
|
244
|
+
dest_table_fields = dest_table.split(".")
|
|
245
|
+
if len(dest_table_fields) != 2:
|
|
246
|
+
raise ValueError(
|
|
247
|
+
f"Table name must be in the format <schema>.<table>, given: {dest_table}"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
query_result_path = f"{bucket}/{dest_table_fields[0]}_staging/metadata"
|
|
244
251
|
|
|
245
252
|
access_key_id = source_params.get("access_key_id", [None])[0]
|
|
246
253
|
secret_access_key = source_params.get("secret_access_key", [None])[0]
|
|
@@ -285,6 +292,7 @@ class AthenaDestination:
|
|
|
285
292
|
region_name=region_name,
|
|
286
293
|
),
|
|
287
294
|
destination_name=bucket,
|
|
295
|
+
force_iceberg=True,
|
|
288
296
|
)
|
|
289
297
|
|
|
290
298
|
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
ingestr/src/factory.py
CHANGED
|
@@ -53,6 +53,7 @@ from ingestr.src.sources import (
|
|
|
53
53
|
StripeAnalyticsSource,
|
|
54
54
|
TikTokSource,
|
|
55
55
|
ZendeskSource,
|
|
56
|
+
FreshdeskSource,
|
|
56
57
|
)
|
|
57
58
|
|
|
58
59
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -148,6 +149,7 @@ class SourceDestinationFactory:
|
|
|
148
149
|
"kinesis": KinesisSource,
|
|
149
150
|
"pipedrive": PipedriveSource,
|
|
150
151
|
"frankfurter": FrankfurterSource,
|
|
152
|
+
"freshdesk": FreshdeskSource,
|
|
151
153
|
}
|
|
152
154
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
153
155
|
"bigquery": BigQueryDestination,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Iterator
|
|
1
|
+
from typing import Any, Iterator
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
4
|
from dlt.common.pendulum import pendulum
|
|
@@ -13,25 +13,28 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
|
|
|
13
13
|
max_table_nesting=0,
|
|
14
14
|
)
|
|
15
15
|
def frankfurter_source(
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
end_date: Optional[TAnyDateTime] = None,
|
|
16
|
+
start_date: TAnyDateTime,
|
|
17
|
+
end_date: TAnyDateTime,
|
|
19
18
|
) -> Any:
|
|
20
19
|
"""
|
|
21
20
|
A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
|
|
22
21
|
various types of data: currencies, latest rates, historical rates.
|
|
23
|
-
|
|
24
|
-
Returns the appropriate resource based on the provided parameters.
|
|
25
22
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
date_time = dlt.sources.incremental(
|
|
24
|
+
|
|
25
|
+
"date",
|
|
26
|
+
initial_value=start_date,
|
|
27
|
+
end_value=end_date,
|
|
28
|
+
range_start="closed",
|
|
29
|
+
range_end="closed",
|
|
30
|
+
)
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
return (
|
|
33
|
+
currencies(),
|
|
34
|
+
latest(),
|
|
35
|
+
exchange_rates(start_date=date_time, end_date=end_date),
|
|
32
36
|
|
|
33
|
-
|
|
34
|
-
return exchange_rates(start_date=start_date, end_date=end_date)
|
|
37
|
+
)
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
@dlt.resource(
|
|
@@ -53,13 +56,13 @@ def currencies() -> Iterator[dict]:
|
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
@dlt.resource(
|
|
56
|
-
write_disposition="
|
|
59
|
+
write_disposition="merge",
|
|
57
60
|
columns={
|
|
58
61
|
"date": {"data_type": "text"},
|
|
59
|
-
"
|
|
62
|
+
"currency_code": {"data_type": "text"},
|
|
60
63
|
"rate": {"data_type": "double"},
|
|
61
64
|
},
|
|
62
|
-
primary_key=["date", "
|
|
65
|
+
primary_key=["date", "currency_code"], # Composite primary key
|
|
63
66
|
)
|
|
64
67
|
def latest() -> Iterator[dict]:
|
|
65
68
|
"""
|
|
@@ -69,50 +72,54 @@ def latest() -> Iterator[dict]:
|
|
|
69
72
|
url = "latest?"
|
|
70
73
|
|
|
71
74
|
# Fetch data
|
|
72
|
-
|
|
75
|
+
data = get_path_with_retry(url)
|
|
73
76
|
|
|
74
77
|
# Extract rates and base currency
|
|
75
|
-
rates =
|
|
78
|
+
rates = data["rates"]
|
|
76
79
|
|
|
77
|
-
|
|
78
|
-
date = pendulum.now().to_date_string()
|
|
80
|
+
date = pendulum.parse(data["date"])
|
|
79
81
|
|
|
80
82
|
# Add the base currency (EUR) with a rate of 1.0
|
|
81
83
|
yield {
|
|
82
84
|
"date": date,
|
|
83
|
-
"
|
|
85
|
+
"currency_code": "EUR",
|
|
84
86
|
"rate": 1.0,
|
|
85
87
|
}
|
|
86
88
|
|
|
87
89
|
# Add all currencies and their rates
|
|
88
|
-
for
|
|
90
|
+
for currency_code, rate in rates.items():
|
|
89
91
|
yield {
|
|
90
92
|
"date": date,
|
|
91
|
-
"
|
|
93
|
+
"currency_code": currency_code,
|
|
92
94
|
"rate": rate,
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
|
|
96
98
|
@dlt.resource(
|
|
97
|
-
write_disposition="
|
|
99
|
+
write_disposition="merge",
|
|
98
100
|
columns={
|
|
99
101
|
"date": {"data_type": "text"},
|
|
100
|
-
"
|
|
102
|
+
"currency_code": {"data_type": "text"},
|
|
101
103
|
"rate": {"data_type": "double"},
|
|
102
104
|
},
|
|
103
|
-
primary_key=
|
|
105
|
+
primary_key=("date", "currency_code"), # Composite primary key
|
|
104
106
|
)
|
|
105
107
|
def exchange_rates(
|
|
106
|
-
start_date: TAnyDateTime,
|
|
107
108
|
end_date: TAnyDateTime,
|
|
109
|
+
start_date: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental("date"),
|
|
108
110
|
) -> Iterator[dict]:
|
|
109
111
|
"""
|
|
110
112
|
Fetches exchange rates for a specified date range.
|
|
111
|
-
If only start_date is provided, fetches data
|
|
113
|
+
If only start_date is provided, fetches data until now.
|
|
112
114
|
If both start_date and end_date are provided, fetches data for each day in the range.
|
|
113
115
|
"""
|
|
114
|
-
|
|
115
|
-
|
|
116
|
+
# Ensure start_date.last_value is a pendulum.DateTime object
|
|
117
|
+
start_date_obj = ensure_pendulum_datetime(start_date.last_value) # type: ignore
|
|
118
|
+
start_date_str = start_date_obj.format("YYYY-MM-DD")
|
|
119
|
+
|
|
120
|
+
# Ensure end_date is a pendulum.DateTime object
|
|
121
|
+
end_date_obj = ensure_pendulum_datetime(end_date)
|
|
122
|
+
end_date_str = end_date_obj.format("YYYY-MM-DD")
|
|
116
123
|
|
|
117
124
|
# Compose the URL
|
|
118
125
|
url = f"{start_date_str}..{end_date_str}?"
|
|
@@ -121,22 +128,23 @@ def exchange_rates(
|
|
|
121
128
|
data = get_path_with_retry(url)
|
|
122
129
|
|
|
123
130
|
# Extract base currency and rates from the API response
|
|
124
|
-
base_currency = data["base"]
|
|
125
131
|
rates = data["rates"]
|
|
126
132
|
|
|
127
133
|
# Iterate over the rates dictionary (one entry per date)
|
|
128
134
|
for date, daily_rates in rates.items():
|
|
135
|
+
formatted_date = pendulum.parse(date)
|
|
136
|
+
|
|
129
137
|
# Add the base currency with a rate of 1.0
|
|
130
138
|
yield {
|
|
131
|
-
"date":
|
|
132
|
-
"
|
|
139
|
+
"date": formatted_date,
|
|
140
|
+
"currency_code": "EUR",
|
|
133
141
|
"rate": 1.0,
|
|
134
142
|
}
|
|
135
143
|
|
|
136
144
|
# Add all other currencies and their rates
|
|
137
|
-
for
|
|
145
|
+
for currency_code, rate in daily_rates.items():
|
|
138
146
|
yield {
|
|
139
|
-
"date":
|
|
140
|
-
"
|
|
147
|
+
"date": formatted_date,
|
|
148
|
+
"currency_code": currency_code,
|
|
141
149
|
"rate": rate,
|
|
142
150
|
}
|
|
@@ -8,7 +8,7 @@ FRANKFURTER_API_URL = "https://api.frankfurter.dev/v1/"
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def get_url_with_retry(url: str) -> StrAny:
|
|
11
|
-
r = requests.get(url)
|
|
11
|
+
r = requests.get(url, timeout=5)
|
|
12
12
|
return r.json() # type: ignore
|
|
13
13
|
|
|
14
14
|
|
|
@@ -19,7 +19,7 @@ def get_path_with_retry(path: str) -> StrAny:
|
|
|
19
19
|
def validate_dates(start_date: datetime, end_date: datetime) -> None:
|
|
20
20
|
current_date = pendulum.now()
|
|
21
21
|
|
|
22
|
-
# Check if start_date is in the
|
|
22
|
+
# Check if start_date is in the futurep
|
|
23
23
|
if start_date > current_date:
|
|
24
24
|
raise ValueError("Interval-start cannot be in the future.")
|
|
25
25
|
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""This source uses Freshdesk API and dlt to load data such as Agents, Companies, Tickets
|
|
2
|
+
etc. to the database"""
|
|
3
|
+
|
|
4
|
+
from typing import Any, Dict, Generator, Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
import dlt
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
|
|
9
|
+
from .freshdesk_client import FreshdeskClient
|
|
10
|
+
from .settings import DEFAULT_ENDPOINTS
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dlt.source()
|
|
14
|
+
def freshdesk_source(
|
|
15
|
+
endpoints: Optional[List[str]] = None,
|
|
16
|
+
per_page: int = 100,
|
|
17
|
+
domain: str = dlt.secrets.value,
|
|
18
|
+
api_secret_key: str = dlt.secrets.value,
|
|
19
|
+
) -> Iterable[DltResource]:
|
|
20
|
+
"""
|
|
21
|
+
Retrieves data from specified Freshdesk API endpoints.
|
|
22
|
+
|
|
23
|
+
This source supports pagination and incremental data loading. It fetches data from a list of
|
|
24
|
+
specified endpoints, or defaults to predefined endpoints in 'settings.py'.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
endpoints: A list of Freshdesk API endpoints to fetch. Deafults to 'settings.py'.
|
|
28
|
+
per_page: The number of items to fetch per page, with a maximum of 100.
|
|
29
|
+
domain: The Freshdesk domain from which to fetch the data. Defaults to 'config.toml'.
|
|
30
|
+
api_secret_key: Freshdesk API key. Defaults to 'secrets.toml'.
|
|
31
|
+
|
|
32
|
+
Yields:
|
|
33
|
+
Iterable[DltResource]: Resources with data updated after the last 'updated_at'
|
|
34
|
+
timestamp for each endpoint.
|
|
35
|
+
"""
|
|
36
|
+
# Instantiate FreshdeskClient with the provided domain and API key
|
|
37
|
+
freshdesk = FreshdeskClient(api_key=api_secret_key, domain=domain)
|
|
38
|
+
|
|
39
|
+
def incremental_resource(
|
|
40
|
+
endpoint: str,
|
|
41
|
+
updated_at: Optional[Any] = dlt.sources.incremental(
|
|
42
|
+
"updated_at", initial_value="2022-01-01T00:00:00Z"
|
|
43
|
+
),
|
|
44
|
+
) -> Generator[Dict[Any, Any], Any, None]:
|
|
45
|
+
"""
|
|
46
|
+
Fetches and yields paginated data from a specified API endpoint.
|
|
47
|
+
Each page of data is fetched based on the `updated_at` timestamp
|
|
48
|
+
to ensure incremental loading.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
# Retrieve the last updated timestamp to fetch only new or updated records.
|
|
52
|
+
if updated_at is not None:
|
|
53
|
+
updated_at = updated_at.last_value
|
|
54
|
+
|
|
55
|
+
# Use the FreshdeskClient instance to fetch paginated responses
|
|
56
|
+
yield from freshdesk.paginated_response(
|
|
57
|
+
endpoint=endpoint,
|
|
58
|
+
per_page=per_page,
|
|
59
|
+
updated_at=updated_at,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Set default endpoints if not provided
|
|
63
|
+
endpoints = endpoints or DEFAULT_ENDPOINTS
|
|
64
|
+
|
|
65
|
+
# For each endpoint, create and yield a DLT resource
|
|
66
|
+
for endpoint in endpoints:
|
|
67
|
+
yield dlt.resource(
|
|
68
|
+
incremental_resource,
|
|
69
|
+
name=endpoint,
|
|
70
|
+
write_disposition="merge",
|
|
71
|
+
primary_key="id",
|
|
72
|
+
)(endpoint=endpoint)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Freshdesk Client for making authenticated requests"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any, Dict, Iterable, Optional
|
|
6
|
+
|
|
7
|
+
from dlt.common.typing import TDataItem
|
|
8
|
+
from dlt.sources.helpers import requests
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FreshdeskClient:
|
|
12
|
+
"""
|
|
13
|
+
Client for making authenticated requests to the Freshdesk API. It incorporates API requests with
|
|
14
|
+
rate limit and pagination.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
api_key (str): The API key used for authenticating requests to the Freshdesk API.
|
|
18
|
+
domain (str): The Freshdesk domain specific to the user, used in constructing the base URL.
|
|
19
|
+
base_url (str): The base URL constructed from the domain, targeting the Freshdesk API v2.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, api_key: str, domain: str):
|
|
23
|
+
# Initialize the FreshdeskClient instance with API key and domain.
|
|
24
|
+
# The API key is used for authentication with the Freshdesk API.
|
|
25
|
+
# The domain specifies the unique Freshdesk domain of the user.
|
|
26
|
+
|
|
27
|
+
# Store the API key provided during initialization.
|
|
28
|
+
self.api_key = api_key
|
|
29
|
+
# Store the Freshdesk domain provided during initialization.
|
|
30
|
+
self.domain = domain
|
|
31
|
+
|
|
32
|
+
# Construct the base URL for the API requests.
|
|
33
|
+
# This URL is formed by appending the domain to the standard Freshdesk API base URL format.
|
|
34
|
+
# All API requests will use this base URL as their starting point.
|
|
35
|
+
self.base_url = f"https://{domain}.freshdesk.com/api/v2"
|
|
36
|
+
|
|
37
|
+
def _request_with_rate_limit(self, url: str, **kwargs: Any) -> requests.Response:
|
|
38
|
+
"""
|
|
39
|
+
Handles rate limits in HTTP requests and ensures
|
|
40
|
+
that the client doesn't exceed the limit set by the server.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
while True:
|
|
44
|
+
try:
|
|
45
|
+
response = requests.get(url, **kwargs, auth=(self.api_key, "X"))
|
|
46
|
+
response.raise_for_status()
|
|
47
|
+
|
|
48
|
+
return response
|
|
49
|
+
except requests.HTTPError as e:
|
|
50
|
+
if e.response.status_code == 429:
|
|
51
|
+
# Get the 'Retry-After' header to know how long to wait
|
|
52
|
+
# Fallback to 60 seconds if header is missing
|
|
53
|
+
seconds_to_wait = int(e.response.headers.get("Retry-After", 60))
|
|
54
|
+
# Log a warning message
|
|
55
|
+
logging.warning(
|
|
56
|
+
"Rate limited. Waiting to retry after: %s secs", seconds_to_wait
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Wait for the specified number of seconds before retrying
|
|
60
|
+
time.sleep(seconds_to_wait)
|
|
61
|
+
else:
|
|
62
|
+
# If the error is not a rate limit (429), raise the exception to be
|
|
63
|
+
# handled elsewhere or stop execution
|
|
64
|
+
raise
|
|
65
|
+
|
|
66
|
+
def paginated_response(
|
|
67
|
+
self,
|
|
68
|
+
endpoint: str,
|
|
69
|
+
per_page: int,
|
|
70
|
+
updated_at: Optional[str] = None,
|
|
71
|
+
) -> Iterable[TDataItem]:
|
|
72
|
+
"""
|
|
73
|
+
Fetches a paginated response from a specified endpoint.
|
|
74
|
+
|
|
75
|
+
This method will continuously fetch data from the given endpoint,
|
|
76
|
+
page by page, until no more data is available or until it reaches data
|
|
77
|
+
updated at the specified timestamp.
|
|
78
|
+
"""
|
|
79
|
+
page = 1
|
|
80
|
+
while True:
|
|
81
|
+
# Construct the URL for the specific endpoint
|
|
82
|
+
url = f"{self.base_url}/{endpoint}"
|
|
83
|
+
|
|
84
|
+
params: Dict[str, Any] = {"per_page": per_page, "page": page}
|
|
85
|
+
|
|
86
|
+
# Implement date range splitting logic here, if applicable
|
|
87
|
+
if endpoint in ["tickets", "contacts"]:
|
|
88
|
+
param_key = (
|
|
89
|
+
"updated_since" if endpoint == "tickets" else "_updated_since"
|
|
90
|
+
)
|
|
91
|
+
if updated_at:
|
|
92
|
+
params[param_key] = updated_at
|
|
93
|
+
|
|
94
|
+
# Handle requests with rate-limiting
|
|
95
|
+
# A maximum of 300 pages (30000 tickets) will be returned.
|
|
96
|
+
response = self._request_with_rate_limit(url, params=params)
|
|
97
|
+
data = response.json()
|
|
98
|
+
|
|
99
|
+
if not data:
|
|
100
|
+
break # Stop if no data or max page limit reached
|
|
101
|
+
yield data
|
|
102
|
+
page += 1
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines default settings for the Freshdesk integration.
|
|
3
|
+
|
|
4
|
+
It specifies a list of default endpoints to be used when interacting with the Freshdesk API,
|
|
5
|
+
covering common entities such as agents, companies, contacts, groups, roles, and tickets.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Define default endpoints for the Freshdesk API integration.
|
|
9
|
+
DEFAULT_ENDPOINTS = ["agents", "companies", "contacts", "groups", "roles", "tickets"]
|
|
@@ -13,9 +13,10 @@ from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
|
|
13
13
|
from google.analytics.data_v1beta.types import (
|
|
14
14
|
Dimension,
|
|
15
15
|
Metric,
|
|
16
|
+
MinuteRange,
|
|
16
17
|
)
|
|
17
18
|
|
|
18
|
-
from .helpers import get_report
|
|
19
|
+
from .helpers import get_realtime_report, get_report
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@dlt.source(max_table_nesting=0)
|
|
@@ -29,6 +30,7 @@ def google_analytics(
|
|
|
29
30
|
start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
|
|
30
31
|
end_date: Optional[pendulum.DateTime] = None,
|
|
31
32
|
rows_per_page: int = 10000,
|
|
33
|
+
minute_range_objects: List[MinuteRange] | None = None,
|
|
32
34
|
) -> List[DltResource]:
|
|
33
35
|
try:
|
|
34
36
|
property_id = int(property_id)
|
|
@@ -58,7 +60,7 @@ def google_analytics(
|
|
|
58
60
|
dimensions = query["dimensions"]
|
|
59
61
|
|
|
60
62
|
@dlt.resource(
|
|
61
|
-
name="
|
|
63
|
+
name="custom",
|
|
62
64
|
merge_key=datetime_dimension,
|
|
63
65
|
write_disposition="merge",
|
|
64
66
|
)
|
|
@@ -87,6 +89,22 @@ def google_analytics(
|
|
|
87
89
|
end_date=end_date,
|
|
88
90
|
)
|
|
89
91
|
|
|
92
|
+
# real time report
|
|
93
|
+
@dlt.resource(
|
|
94
|
+
name="realtime",
|
|
95
|
+
merge_key="ingested_at",
|
|
96
|
+
write_disposition="merge",
|
|
97
|
+
)
|
|
98
|
+
def real_time_report() -> Iterator[TDataItem]:
|
|
99
|
+
yield from get_realtime_report(
|
|
100
|
+
client=client,
|
|
101
|
+
property_id=property_id,
|
|
102
|
+
dimension_list=[Dimension(name=dimension) for dimension in dimensions],
|
|
103
|
+
metric_list=[Metric(name=metric) for metric in query["metrics"]],
|
|
104
|
+
per_page=rows_per_page,
|
|
105
|
+
minute_range_objects=minute_range_objects,
|
|
106
|
+
)
|
|
107
|
+
|
|
90
108
|
# res = dlt.resource(
|
|
91
109
|
# basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
|
|
92
110
|
# )(
|
|
@@ -103,4 +121,4 @@ def google_analytics(
|
|
|
103
121
|
# ),
|
|
104
122
|
# )
|
|
105
123
|
|
|
106
|
-
return [basic_report]
|
|
124
|
+
return [basic_report, real_time_report]
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
This module contains helpers that process data and make it ready for loading into the database
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import base64
|
|
5
6
|
import json
|
|
6
7
|
from typing import Any, Iterator, List, Union
|
|
8
|
+
from urllib.parse import parse_qs, urlparse
|
|
7
9
|
|
|
8
10
|
import proto
|
|
9
11
|
from dlt.common.exceptions import MissingDependencyException
|
|
@@ -22,6 +24,8 @@ try:
|
|
|
22
24
|
Metric,
|
|
23
25
|
MetricMetadata, # noqa: F401
|
|
24
26
|
MetricType,
|
|
27
|
+
MinuteRange,
|
|
28
|
+
RunRealtimeReportRequest,
|
|
25
29
|
RunReportRequest,
|
|
26
30
|
RunReportResponse,
|
|
27
31
|
)
|
|
@@ -52,6 +56,53 @@ def to_dict(item: Any) -> Iterator[TDataItem]:
|
|
|
52
56
|
yield item
|
|
53
57
|
|
|
54
58
|
|
|
59
|
+
def get_realtime_report(
|
|
60
|
+
client: Resource,
|
|
61
|
+
property_id: int,
|
|
62
|
+
dimension_list: List[Dimension],
|
|
63
|
+
metric_list: List[Metric],
|
|
64
|
+
per_page: int,
|
|
65
|
+
minute_range_objects: List[MinuteRange] | None = None,
|
|
66
|
+
) -> Iterator[TDataItem]:
|
|
67
|
+
"""
|
|
68
|
+
Gets all the possible pages of reports with the given query parameters.
|
|
69
|
+
Processes every page and yields a dictionary for every row of the report.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
client: The Google Analytics client used to make requests.
|
|
73
|
+
property_id: A reference to the Google Analytics project.
|
|
74
|
+
More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
|
|
75
|
+
dimension_list: A list of all the dimensions requested in the query.
|
|
76
|
+
metric_list: A list of all the metrics requested in the query.
|
|
77
|
+
limit: Describes how many rows there should be per page.
|
|
78
|
+
|
|
79
|
+
Yields:
|
|
80
|
+
Generator of all rows of data in the report.
|
|
81
|
+
"""
|
|
82
|
+
offset = 0
|
|
83
|
+
ingest_at = pendulum.now().to_date_string()
|
|
84
|
+
|
|
85
|
+
while True:
|
|
86
|
+
request = RunRealtimeReportRequest(
|
|
87
|
+
property=f"properties/{property_id}",
|
|
88
|
+
dimensions=dimension_list,
|
|
89
|
+
metrics=metric_list,
|
|
90
|
+
limit=per_page,
|
|
91
|
+
minute_ranges=minute_range_objects if minute_range_objects else None,
|
|
92
|
+
)
|
|
93
|
+
response = client.run_realtime_report(request)
|
|
94
|
+
|
|
95
|
+
# process request
|
|
96
|
+
processed_response_generator = process_report(
|
|
97
|
+
response=response, ingest_at=ingest_at
|
|
98
|
+
)
|
|
99
|
+
# import pdb; pdb.set_trace()
|
|
100
|
+
yield from processed_response_generator
|
|
101
|
+
offset += per_page
|
|
102
|
+
if len(response.rows) < per_page or offset > 1000000:
|
|
103
|
+
break
|
|
104
|
+
|
|
105
|
+
|
|
55
106
|
def get_report(
|
|
56
107
|
client: Resource,
|
|
57
108
|
property_id: int,
|
|
@@ -79,10 +130,6 @@ def get_report(
|
|
|
79
130
|
Generator of all rows of data in the report.
|
|
80
131
|
"""
|
|
81
132
|
|
|
82
|
-
print(
|
|
83
|
-
"fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
|
|
84
|
-
)
|
|
85
|
-
|
|
86
133
|
offset = 0
|
|
87
134
|
while True:
|
|
88
135
|
request = RunReportRequest(
|
|
@@ -98,9 +145,11 @@ def get_report(
|
|
|
98
145
|
)
|
|
99
146
|
],
|
|
100
147
|
)
|
|
101
|
-
# process request
|
|
102
148
|
response = client.run_report(request)
|
|
149
|
+
|
|
150
|
+
# process request
|
|
103
151
|
processed_response_generator = process_report(response=response)
|
|
152
|
+
|
|
104
153
|
# import pdb; pdb.set_trace()
|
|
105
154
|
yield from processed_response_generator
|
|
106
155
|
offset += per_page
|
|
@@ -108,7 +157,9 @@ def get_report(
|
|
|
108
157
|
break
|
|
109
158
|
|
|
110
159
|
|
|
111
|
-
def process_report(
|
|
160
|
+
def process_report(
|
|
161
|
+
response: RunReportResponse, ingest_at: str | None = None
|
|
162
|
+
) -> Iterator[TDataItems]:
|
|
112
163
|
metrics_headers = [header.name for header in response.metric_headers]
|
|
113
164
|
dimensions_headers = [header.name for header in response.dimension_headers]
|
|
114
165
|
|
|
@@ -131,6 +182,8 @@ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
|
|
|
131
182
|
metric_type=metric_type, value=row.metric_values[i].value
|
|
132
183
|
)
|
|
133
184
|
response_dict[metrics_headers[i]] = metric_value
|
|
185
|
+
if ingest_at is not None:
|
|
186
|
+
response_dict["ingested_at"] = ingest_at
|
|
134
187
|
|
|
135
188
|
unique_key = "-".join(list(response_dict.keys()))
|
|
136
189
|
if unique_key not in distinct_key_combinations:
|
|
@@ -170,3 +223,65 @@ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
|
|
|
170
223
|
return pendulum.from_format(dimension_value, "YYYYMMDDHHmm", tz="UTC")
|
|
171
224
|
else:
|
|
172
225
|
return dimension_value
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def convert_minutes_ranges_to_minute_range_objects(minutes_ranges: str) -> List[MinuteRange]:
|
|
229
|
+
minutes_ranges = minutes_ranges.strip()
|
|
230
|
+
minutes = minutes_ranges.replace(" ", "").split(",")
|
|
231
|
+
if minutes == "":
|
|
232
|
+
raise ValueError(
|
|
233
|
+
"Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
minute_range_objects = []
|
|
238
|
+
for min_range in minutes:
|
|
239
|
+
if "-" not in min_range:
|
|
240
|
+
raise ValueError(
|
|
241
|
+
"Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
|
|
242
|
+
)
|
|
243
|
+
parts = min_range.split("-")
|
|
244
|
+
|
|
245
|
+
if not parts[0].isdigit() or not parts[1].isdigit():
|
|
246
|
+
raise ValueError(
|
|
247
|
+
f"Invalid input '{min_range}'. Both start and end minutes must be digits. For example: 1-2,5-6"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
end_minutes_ago = int(parts[0])
|
|
251
|
+
start_minutes_ago = int(parts[1])
|
|
252
|
+
minute_range_objects.append(MinuteRange(
|
|
253
|
+
name=f"{end_minutes_ago}-{start_minutes_ago} minutes ago",
|
|
254
|
+
start_minutes_ago= start_minutes_ago,
|
|
255
|
+
end_minutes_ago=end_minutes_ago
|
|
256
|
+
))
|
|
257
|
+
|
|
258
|
+
return minute_range_objects
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def parse_google_analytics_uri(uri: str):
|
|
262
|
+
parse_uri = urlparse(uri)
|
|
263
|
+
source_fields = parse_qs(parse_uri.query)
|
|
264
|
+
cred_path = source_fields.get("credentials_path")
|
|
265
|
+
cred_base64 = source_fields.get("credentials_base64")
|
|
266
|
+
|
|
267
|
+
if not cred_path and not cred_base64:
|
|
268
|
+
raise ValueError(
|
|
269
|
+
"credentials_path or credentials_base64 is required to connect Google Analytics"
|
|
270
|
+
)
|
|
271
|
+
credentials = {}
|
|
272
|
+
if cred_path:
|
|
273
|
+
with open(cred_path[0], "r") as f:
|
|
274
|
+
credentials = json.load(f)
|
|
275
|
+
elif cred_base64:
|
|
276
|
+
credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
|
|
277
|
+
|
|
278
|
+
property_id = source_fields.get("property_id")
|
|
279
|
+
if not property_id:
|
|
280
|
+
raise ValueError("property_id is required to connect to Google Analytics")
|
|
281
|
+
|
|
282
|
+
if (not cred_path and not cred_base64) or (not property_id):
|
|
283
|
+
raise ValueError(
|
|
284
|
+
"credentials_path or credentials_base64 and property_id are required to connect Google Analytics"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return {"credentials": credentials, "property_id": property_id[0]}
|
ingestr/src/sources.py
CHANGED
|
@@ -852,22 +852,31 @@ class AirtableSource:
|
|
|
852
852
|
if not table:
|
|
853
853
|
raise ValueError("Source table is required to connect to Airtable")
|
|
854
854
|
|
|
855
|
-
tables = table.split(",")
|
|
856
|
-
|
|
857
855
|
source_parts = urlparse(uri)
|
|
858
856
|
source_fields = parse_qs(source_parts.query)
|
|
859
|
-
base_id = source_fields.get("base_id")
|
|
860
857
|
access_token = source_fields.get("access_token")
|
|
861
858
|
|
|
862
|
-
if not
|
|
859
|
+
if not access_token:
|
|
863
860
|
raise ValueError(
|
|
864
|
-
"
|
|
861
|
+
"access_token in the URI is required to connect to Airtable"
|
|
865
862
|
)
|
|
866
863
|
|
|
864
|
+
base_id = source_fields.get("base_id", [None])[0]
|
|
865
|
+
clean_table = table
|
|
866
|
+
|
|
867
|
+
table_fields = table.split("/")
|
|
868
|
+
if len(table_fields) == 2:
|
|
869
|
+
clean_table = table_fields[1]
|
|
870
|
+
if not base_id:
|
|
871
|
+
base_id = table_fields[0]
|
|
872
|
+
|
|
873
|
+
if not base_id:
|
|
874
|
+
raise ValueError("base_id in the URI is required to connect to Airtable")
|
|
875
|
+
|
|
867
876
|
from ingestr.src.airtable import airtable_source
|
|
868
877
|
|
|
869
878
|
return airtable_source(
|
|
870
|
-
base_id=base_id
|
|
879
|
+
base_id=base_id, table_names=[clean_table], access_token=access_token[0]
|
|
871
880
|
)
|
|
872
881
|
|
|
873
882
|
|
|
@@ -1460,48 +1469,49 @@ class GoogleAnalyticsSource:
|
|
|
1460
1469
|
return True
|
|
1461
1470
|
|
|
1462
1471
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1472
|
+
import ingestr.src.google_analytics.helpers as helpers
|
|
1473
|
+
|
|
1474
|
+
result = helpers.parse_google_analytics_uri(uri)
|
|
1475
|
+
credentials = result["credentials"]
|
|
1476
|
+
property_id = result["property_id"]
|
|
1467
1477
|
|
|
1468
|
-
|
|
1478
|
+
fields = table.split(":")
|
|
1479
|
+
if len(fields) != 3 and len(fields) != 4:
|
|
1469
1480
|
raise ValueError(
|
|
1470
|
-
"
|
|
1481
|
+
"Invalid table format. Expected format: <report_type>:<dimensions>:<metrics> or <report_type>:<dimensions>:<metrics>:<minute_ranges>"
|
|
1471
1482
|
)
|
|
1472
1483
|
|
|
1473
|
-
|
|
1474
|
-
if
|
|
1475
|
-
with open(cred_path[0], "r") as f:
|
|
1476
|
-
credentials = json.load(f)
|
|
1477
|
-
elif cred_base64:
|
|
1478
|
-
credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
|
|
1479
|
-
|
|
1480
|
-
property_id = source_fields.get("property_id")
|
|
1481
|
-
if not property_id:
|
|
1482
|
-
raise ValueError("property_id is required to connect to Google Analytics")
|
|
1483
|
-
|
|
1484
|
-
fields = table.split(":")
|
|
1485
|
-
if len(fields) != 3:
|
|
1484
|
+
report_type = fields[0]
|
|
1485
|
+
if report_type not in ["custom", "realtime"]:
|
|
1486
1486
|
raise ValueError(
|
|
1487
|
-
"Invalid
|
|
1487
|
+
"Invalid report type. Expected format: <report_type>:<dimensions>:<metrics>. Available report types: custom, realtime"
|
|
1488
1488
|
)
|
|
1489
1489
|
|
|
1490
1490
|
dimensions = fields[1].replace(" ", "").split(",")
|
|
1491
|
+
metrics = fields[2].replace(" ", "").split(",")
|
|
1492
|
+
|
|
1493
|
+
minute_range_objects = []
|
|
1494
|
+
if len(fields) == 4:
|
|
1495
|
+
minute_range_objects = helpers.convert_minutes_ranges_to_minute_range_objects(fields[3])
|
|
1491
1496
|
|
|
1492
1497
|
datetime = ""
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1498
|
+
resource_name = fields[0].lower()
|
|
1499
|
+
if resource_name == "custom":
|
|
1500
|
+
for dimension_datetime in ["date", "dateHourMinute", "dateHour"]:
|
|
1501
|
+
if dimension_datetime in dimensions:
|
|
1502
|
+
datetime = dimension_datetime
|
|
1503
|
+
break
|
|
1504
|
+
else:
|
|
1505
|
+
raise ValueError(
|
|
1506
|
+
"You must provide at least one dimension: [dateHour, dateHourMinute, date]"
|
|
1507
|
+
)
|
|
1501
1508
|
|
|
1502
|
-
metrics = fields[2].replace(" ", "").split(",")
|
|
1503
1509
|
queries = [
|
|
1504
|
-
{
|
|
1510
|
+
{
|
|
1511
|
+
"resource_name": resource_name,
|
|
1512
|
+
"dimensions": dimensions,
|
|
1513
|
+
"metrics": metrics,
|
|
1514
|
+
}
|
|
1505
1515
|
]
|
|
1506
1516
|
|
|
1507
1517
|
start_date = pendulum.now().subtract(days=30).start_of("day")
|
|
@@ -1515,13 +1525,14 @@ class GoogleAnalyticsSource:
|
|
|
1515
1525
|
from ingestr.src.google_analytics import google_analytics
|
|
1516
1526
|
|
|
1517
1527
|
return google_analytics(
|
|
1518
|
-
property_id=property_id
|
|
1528
|
+
property_id=property_id,
|
|
1519
1529
|
start_date=start_date,
|
|
1520
1530
|
end_date=end_date,
|
|
1521
1531
|
datetime_dimension=datetime,
|
|
1522
1532
|
queries=queries,
|
|
1523
1533
|
credentials=credentials,
|
|
1524
|
-
|
|
1534
|
+
minute_range_objects=minute_range_objects if minute_range_objects else None,
|
|
1535
|
+
).with_resources(resource_name)
|
|
1525
1536
|
|
|
1526
1537
|
|
|
1527
1538
|
class GitHubSource:
|
|
@@ -2164,36 +2175,60 @@ class FrankfurterSource:
|
|
|
2164
2175
|
return True
|
|
2165
2176
|
|
|
2166
2177
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2178
|
+
if kwargs.get("incremental_key"):
|
|
2179
|
+
raise ValueError(
|
|
2180
|
+
"Frankfurter takes care of incrementality on its own, you should not provide incremental_key"
|
|
2181
|
+
)
|
|
2171
2182
|
|
|
2172
|
-
if
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
2177
|
-
else:
|
|
2178
|
-
end_date = start_date
|
|
2183
|
+
if kwargs.get("interval_start"):
|
|
2184
|
+
start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
|
|
2185
|
+
if kwargs.get("interval_end"):
|
|
2186
|
+
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
2179
2187
|
else:
|
|
2180
|
-
start_date = pendulum.now()
|
|
2181
2188
|
end_date = pendulum.now()
|
|
2182
|
-
validate_dates(start_date=start_date, end_date=end_date)
|
|
2183
|
-
|
|
2184
|
-
# For currencies and latest tables, set start and end dates to current date
|
|
2185
2189
|
else:
|
|
2186
2190
|
start_date = pendulum.now()
|
|
2187
2191
|
end_date = pendulum.now()
|
|
2188
2192
|
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
)
|
|
2193
|
+
from ingestr.src.frankfurter import frankfurter_source
|
|
2194
|
+
from ingestr.src.frankfurter.helpers import validate_dates
|
|
2195
|
+
|
|
2196
|
+
validate_dates(start_date=start_date, end_date=end_date)
|
|
2194
2197
|
|
|
2195
|
-
|
|
2196
|
-
table=table,
|
|
2198
|
+
src = frankfurter_source(
|
|
2197
2199
|
start_date=start_date,
|
|
2198
2200
|
end_date=end_date,
|
|
2199
2201
|
)
|
|
2202
|
+
|
|
2203
|
+
if table not in src.resources:
|
|
2204
|
+
raise UnsupportedResourceError(table, "Frankfurter")
|
|
2205
|
+
|
|
2206
|
+
return src.with_resources(table)
|
|
2207
|
+
|
|
2208
|
+
class FreshdeskSource:
|
|
2209
|
+
# freshdesk://domain?api_key=<api_key>
|
|
2210
|
+
def handles_incrementality(self) -> bool:
|
|
2211
|
+
return True
|
|
2212
|
+
|
|
2213
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2214
|
+
parsed_uri = urlparse(uri)
|
|
2215
|
+
domain = parsed_uri.netloc
|
|
2216
|
+
query = parsed_uri.query
|
|
2217
|
+
params = parse_qs(query)
|
|
2218
|
+
|
|
2219
|
+
if not domain:
|
|
2220
|
+
raise MissingValueError("domain", "Freshdesk")
|
|
2221
|
+
|
|
2222
|
+
if '.' in domain:
|
|
2223
|
+
domain = domain.split('.')[0]
|
|
2224
|
+
|
|
2225
|
+
api_key = params.get("api_key")
|
|
2226
|
+
if api_key is None:
|
|
2227
|
+
raise MissingValueError("api_key", "Freshdesk")
|
|
2228
|
+
|
|
2229
|
+
if table not in ["agents", "companies", "contacts", "groups", "roles", "tickets"]:
|
|
2230
|
+
raise UnsupportedResourceError(table, "Freshdesk")
|
|
2231
|
+
|
|
2232
|
+
from ingestr.src.freshdesk import freshdesk_source
|
|
2233
|
+
return freshdesk_source(api_secret_key=api_key[0], domain=domain).with_resources(table)
|
|
2234
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.36
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -46,7 +46,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
46
46
|
Requires-Dist: dataclasses-json==0.6.7
|
|
47
47
|
Requires-Dist: decorator==5.2.1
|
|
48
48
|
Requires-Dist: deprecation==2.1.0
|
|
49
|
-
Requires-Dist: dlt==1.
|
|
49
|
+
Requires-Dist: dlt==1.10.0
|
|
50
50
|
Requires-Dist: dnspython==2.7.0
|
|
51
51
|
Requires-Dist: duckdb-engine==0.17.0
|
|
52
52
|
Requires-Dist: duckdb==1.2.1
|
|
@@ -2,21 +2,21 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=mRlGSqi2sHcZ2AKlwn5MqoMvFxXlSjcZxmPJr76rmRk,25187
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=abX1HXd_dkzG2hkJg7JdFGSvgjGi72VrEucHcTxIziA,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=Z79f01BSmEaXnQno2IQVt4Th4dmD-BiOQXlibZJ5sTw,13180
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=M0FAes6KsvqCzuTnUBcxc6DF7UVO51IlrFxy2VDpbkQ,5392
|
|
9
9
|
ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
|
|
10
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
11
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
12
12
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
13
|
-
ingestr/src/sources.py,sha256=
|
|
13
|
+
ingestr/src/sources.py,sha256=YpaUS5Ui-YXeZYLETPAj60WhU5fWI_lP6jVA0w6J6qo,77250
|
|
14
14
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
15
15
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
16
16
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
17
17
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
18
18
|
ingestr/src/adjust/adjust_helpers.py,sha256=IHSS94A7enOWkZ8cP5iW3RdYt0Xl3qZGAmDc1Xy4qkI,3802
|
|
19
|
-
ingestr/src/airtable/__init__.py,sha256=
|
|
19
|
+
ingestr/src/airtable/__init__.py,sha256=XzRsS39xszUlh_s7P1_zq5v8vLfjz3m-NtTPaa8TTZU,2818
|
|
20
20
|
ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
|
|
21
21
|
ingestr/src/applovin_max/__init__.py,sha256=ZrxOUSirGxkGDmM9wsQO3anwNVzqtoCwN_OuCXfPkXE,3285
|
|
22
22
|
ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
|
|
@@ -42,8 +42,11 @@ ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_
|
|
|
42
42
|
ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
|
|
43
43
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
44
44
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
45
|
-
ingestr/src/frankfurter/__init__.py,sha256=
|
|
46
|
-
ingestr/src/frankfurter/helpers.py,sha256=
|
|
45
|
+
ingestr/src/frankfurter/__init__.py,sha256=sjxfq377-lryuFC3JswcbHBRoBjLnGLKNRTwBpDZyLw,4403
|
|
46
|
+
ingestr/src/frankfurter/helpers.py,sha256=wqm087QVPcyTuMl6yj_Pl1wcuqElwcBMPz3P4773wcM,979
|
|
47
|
+
ingestr/src/freshdesk/__init__.py,sha256=uFQW_cJyymxtHQiYb_xjzZAklc487L0n9GkgHgC7yAI,2618
|
|
48
|
+
ingestr/src/freshdesk/freshdesk_client.py,sha256=3z5Yc008ADzRcJWtNc00PwjkLzG-RMI8jVIOOyYA-Rw,4088
|
|
49
|
+
ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
|
|
47
50
|
ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
|
|
48
51
|
ingestr/src/github/helpers.py,sha256=rpv_3HzuOl4PQ-FUeA66pev-pgze9SaE8RUHIPYfZ_A,6759
|
|
49
52
|
ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
|
|
@@ -53,8 +56,8 @@ ingestr/src/google_ads/field.py,sha256=uc8KEaYQrwgQoQPUdxIQWZxpFeZHbiV98FM0ZSael
|
|
|
53
56
|
ingestr/src/google_ads/metrics.py,sha256=tAqpBpm-8l95oPT9cBxMWaEoDTNHVXnqUphYDHWKDiE,12099
|
|
54
57
|
ingestr/src/google_ads/predicates.py,sha256=K4wTuqfmJ9ko1RKeHTBDfQO_mUADVyuRqtywBPP-72w,683
|
|
55
58
|
ingestr/src/google_ads/reports.py,sha256=AVY1pPt5yaIFskQe1k5VW2Dhlux3bzewsHlDrdGEems,12686
|
|
56
|
-
ingestr/src/google_analytics/__init__.py,sha256=
|
|
57
|
-
ingestr/src/google_analytics/helpers.py,sha256=
|
|
59
|
+
ingestr/src/google_analytics/__init__.py,sha256=8b9CBWJFrBpHVRl993Z7J01sKKbYyXEtngdfEUwqlfE,4343
|
|
60
|
+
ingestr/src/google_analytics/helpers.py,sha256=bUTPp5C-k5wqq-ccEAn-asRH2CLbBS2SOs1v9wiRU6U,10087
|
|
58
61
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
59
62
|
ingestr/src/google_sheets/__init__.py,sha256=CL0HfY74uxX8-ge0ucI0VhWMYZVAfoX7WRPBitRi-CI,6647
|
|
60
63
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
@@ -122,8 +125,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
122
125
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
123
126
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
124
127
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
125
|
-
ingestr-0.13.
|
|
126
|
-
ingestr-0.13.
|
|
127
|
-
ingestr-0.13.
|
|
128
|
-
ingestr-0.13.
|
|
129
|
-
ingestr-0.13.
|
|
128
|
+
ingestr-0.13.36.dist-info/METADATA,sha256=AFJ4qtGMrtaG5luUcRCXAsp7yP8FKlL4EjP8KorvXKI,13575
|
|
129
|
+
ingestr-0.13.36.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
130
|
+
ingestr-0.13.36.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
131
|
+
ingestr-0.13.36.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
132
|
+
ingestr-0.13.36.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|