ingestr 0.13.77__py3-none-any.whl → 0.13.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +10 -3
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +18 -0
- ingestr/src/facebook_ads/__init__.py +0 -1
- ingestr/src/factory.py +5 -0
- ingestr/src/freshdesk/__init__.py +23 -8
- ingestr/src/freshdesk/freshdesk_client.py +16 -5
- ingestr/src/github/__init__.py +5 -3
- ingestr/src/github/helpers.py +1 -0
- ingestr/src/influxdb/__init__.py +1 -0
- ingestr/src/mongodb/__init__.py +3 -0
- ingestr/src/mongodb/helpers.py +184 -9
- ingestr/src/sources.py +203 -24
- {ingestr-0.13.77.dist-info → ingestr-0.13.79.dist-info}/METADATA +6 -1
- {ingestr-0.13.77.dist-info → ingestr-0.13.79.dist-info}/RECORD +18 -18
- {ingestr-0.13.77.dist-info → ingestr-0.13.79.dist-info}/WHEEL +0 -0
- {ingestr-0.13.77.dist-info → ingestr-0.13.79.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.77.dist-info → ingestr-0.13.79.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Optional
|
|
@@ -8,6 +9,14 @@ from typing_extensions import Annotated
|
|
|
8
9
|
|
|
9
10
|
from ingestr.src.telemetry.event import track
|
|
10
11
|
|
|
12
|
+
try:
|
|
13
|
+
from duckdb_engine import DuckDBEngineWarning
|
|
14
|
+
|
|
15
|
+
warnings.filterwarnings("ignore", category=DuckDBEngineWarning)
|
|
16
|
+
except ImportError:
|
|
17
|
+
# duckdb-engine not installed
|
|
18
|
+
pass
|
|
19
|
+
|
|
11
20
|
app = typer.Typer(
|
|
12
21
|
name="ingestr",
|
|
13
22
|
help="ingestr is the CLI tool to ingest data from one source to another",
|
|
@@ -506,7 +515,6 @@ def ingest(
|
|
|
506
515
|
|
|
507
516
|
if factory.source_scheme == "sqlite":
|
|
508
517
|
source_table = "main." + source_table.split(".")[-1]
|
|
509
|
-
|
|
510
518
|
|
|
511
519
|
if (
|
|
512
520
|
incremental_key
|
|
@@ -600,10 +608,9 @@ def ingest(
|
|
|
600
608
|
if factory.source_scheme == "influxdb":
|
|
601
609
|
if primary_key:
|
|
602
610
|
write_disposition = "merge"
|
|
603
|
-
|
|
604
611
|
|
|
605
612
|
start_time = datetime.now()
|
|
606
|
-
|
|
613
|
+
|
|
607
614
|
run_info: LoadInfo = pipeline.run(
|
|
608
615
|
dlt_source,
|
|
609
616
|
**destination.dlt_run_params(
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.79"
|
ingestr/src/destinations.py
CHANGED
|
@@ -147,6 +147,24 @@ class DuckDBDestination(GenericSqlDestination):
|
|
|
147
147
|
return dlt.destinations.duckdb(uri, **kwargs)
|
|
148
148
|
|
|
149
149
|
|
|
150
|
+
class MotherduckDestination(GenericSqlDestination):
|
|
151
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
152
|
+
from urllib.parse import parse_qs, urlparse
|
|
153
|
+
|
|
154
|
+
parsed = urlparse(uri)
|
|
155
|
+
query = parse_qs(parsed.query)
|
|
156
|
+
token = query.get("token", [None])[0]
|
|
157
|
+
from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials
|
|
158
|
+
|
|
159
|
+
creds = {
|
|
160
|
+
"password": token,
|
|
161
|
+
}
|
|
162
|
+
if parsed.path.lstrip("/"):
|
|
163
|
+
creds["database"] = parsed.path.lstrip("/")
|
|
164
|
+
|
|
165
|
+
return dlt.destinations.motherduck(MotherDuckCredentials(creds), **kwargs)
|
|
166
|
+
|
|
167
|
+
|
|
150
168
|
def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
|
|
151
169
|
# ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
|
|
152
170
|
tup = struct.unpack(
|
ingestr/src/factory.py
CHANGED
|
@@ -12,6 +12,7 @@ from ingestr.src.destinations import (
|
|
|
12
12
|
DatabricksDestination,
|
|
13
13
|
DuckDBDestination,
|
|
14
14
|
GCSDestination,
|
|
15
|
+
MotherduckDestination,
|
|
15
16
|
MsSQLDestination,
|
|
16
17
|
MySqlDestination,
|
|
17
18
|
PostgresDestination,
|
|
@@ -85,6 +86,8 @@ SQL_SOURCE_SCHEMES = [
|
|
|
85
86
|
"mysql",
|
|
86
87
|
"mysql+pymysql",
|
|
87
88
|
"mysql+mysqlconnector",
|
|
89
|
+
"md",
|
|
90
|
+
"motherduck",
|
|
88
91
|
"postgres",
|
|
89
92
|
"postgresql",
|
|
90
93
|
"postgresql+psycopg2",
|
|
@@ -195,6 +198,8 @@ class SourceDestinationFactory:
|
|
|
195
198
|
"cratedb": CrateDBDestination,
|
|
196
199
|
"databricks": DatabricksDestination,
|
|
197
200
|
"duckdb": DuckDBDestination,
|
|
201
|
+
"motherduck": MotherduckDestination,
|
|
202
|
+
"md": MotherduckDestination,
|
|
198
203
|
"mssql": MsSQLDestination,
|
|
199
204
|
"postgres": PostgresDestination,
|
|
200
205
|
"postgresql": PostgresDestination,
|
|
@@ -4,6 +4,8 @@ etc. to the database"""
|
|
|
4
4
|
from typing import Any, Dict, Generator, Iterable, List, Optional
|
|
5
5
|
|
|
6
6
|
import dlt
|
|
7
|
+
import pendulum
|
|
8
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
7
9
|
from dlt.sources import DltResource
|
|
8
10
|
|
|
9
11
|
from .freshdesk_client import FreshdeskClient
|
|
@@ -12,10 +14,12 @@ from .settings import DEFAULT_ENDPOINTS
|
|
|
12
14
|
|
|
13
15
|
@dlt.source()
|
|
14
16
|
def freshdesk_source(
|
|
15
|
-
|
|
17
|
+
domain: str,
|
|
18
|
+
api_secret_key: str,
|
|
19
|
+
start_date: pendulum.DateTime,
|
|
20
|
+
end_date: Optional[pendulum.DateTime] = None,
|
|
16
21
|
per_page: int = 100,
|
|
17
|
-
|
|
18
|
-
api_secret_key: str = dlt.secrets.value,
|
|
22
|
+
endpoints: Optional[List[str]] = None,
|
|
19
23
|
) -> Iterable[DltResource]:
|
|
20
24
|
"""
|
|
21
25
|
Retrieves data from specified Freshdesk API endpoints.
|
|
@@ -39,7 +43,11 @@ def freshdesk_source(
|
|
|
39
43
|
def incremental_resource(
|
|
40
44
|
endpoint: str,
|
|
41
45
|
updated_at: Optional[Any] = dlt.sources.incremental(
|
|
42
|
-
"updated_at",
|
|
46
|
+
"updated_at",
|
|
47
|
+
initial_value=start_date.isoformat(),
|
|
48
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
49
|
+
range_start="closed",
|
|
50
|
+
range_end="closed",
|
|
43
51
|
),
|
|
44
52
|
) -> Generator[Dict[Any, Any], Any, None]:
|
|
45
53
|
"""
|
|
@@ -48,15 +56,22 @@ def freshdesk_source(
|
|
|
48
56
|
to ensure incremental loading.
|
|
49
57
|
"""
|
|
50
58
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
59
|
+
if updated_at.last_value is not None:
|
|
60
|
+
start_date = ensure_pendulum_datetime(updated_at.last_value)
|
|
61
|
+
else:
|
|
62
|
+
start_date = start_date
|
|
63
|
+
|
|
64
|
+
if updated_at.end_value is not None:
|
|
65
|
+
end_date = ensure_pendulum_datetime(updated_at.end_value)
|
|
66
|
+
else:
|
|
67
|
+
end_date = pendulum.now(tz="UTC")
|
|
54
68
|
|
|
55
69
|
# Use the FreshdeskClient instance to fetch paginated responses
|
|
56
70
|
yield from freshdesk.paginated_response(
|
|
57
71
|
endpoint=endpoint,
|
|
58
72
|
per_page=per_page,
|
|
59
|
-
|
|
73
|
+
start_date=start_date,
|
|
74
|
+
end_date=end_date,
|
|
60
75
|
)
|
|
61
76
|
|
|
62
77
|
# Set default endpoints if not provided
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
-
from typing import Any, Dict, Iterable
|
|
5
|
+
from typing import Any, Dict, Iterable
|
|
6
6
|
|
|
7
|
+
import pendulum
|
|
7
8
|
from dlt.common.typing import TDataItem
|
|
8
9
|
from dlt.sources.helpers import requests
|
|
9
10
|
|
|
@@ -67,7 +68,8 @@ class FreshdeskClient:
|
|
|
67
68
|
self,
|
|
68
69
|
endpoint: str,
|
|
69
70
|
per_page: int,
|
|
70
|
-
|
|
71
|
+
start_date: pendulum.DateTime,
|
|
72
|
+
end_date: pendulum.DateTime,
|
|
71
73
|
) -> Iterable[TDataItem]:
|
|
72
74
|
"""
|
|
73
75
|
Fetches a paginated response from a specified endpoint.
|
|
@@ -88,8 +90,8 @@ class FreshdeskClient:
|
|
|
88
90
|
param_key = (
|
|
89
91
|
"updated_since" if endpoint == "tickets" else "_updated_since"
|
|
90
92
|
)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
+
|
|
94
|
+
params[param_key] = start_date.to_iso8601_string()
|
|
93
95
|
|
|
94
96
|
# Handle requests with rate-limiting
|
|
95
97
|
# A maximum of 300 pages (30000 tickets) will be returned.
|
|
@@ -98,5 +100,14 @@ class FreshdeskClient:
|
|
|
98
100
|
|
|
99
101
|
if not data:
|
|
100
102
|
break # Stop if no data or max page limit reached
|
|
101
|
-
|
|
103
|
+
|
|
104
|
+
filtered_data = [
|
|
105
|
+
item
|
|
106
|
+
for item in data
|
|
107
|
+
if "updated_at" in item
|
|
108
|
+
and pendulum.parse(item["updated_at"]) <= end_date
|
|
109
|
+
]
|
|
110
|
+
if not filtered_data:
|
|
111
|
+
break
|
|
112
|
+
yield filtered_data
|
|
102
113
|
page += 1
|
ingestr/src/github/__init__.py
CHANGED
|
@@ -91,7 +91,9 @@ def github_repo_events(
|
|
|
91
91
|
"""
|
|
92
92
|
|
|
93
93
|
# use naming function in table name to generate separate tables for each event
|
|
94
|
-
@dlt.resource(
|
|
94
|
+
@dlt.resource(
|
|
95
|
+
primary_key="id", table_name=lambda i: i["type"], write_disposition="merge"
|
|
96
|
+
)
|
|
95
97
|
def repo_events(
|
|
96
98
|
last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
97
99
|
"created_at",
|
|
@@ -105,7 +107,7 @@ def github_repo_events(
|
|
|
105
107
|
repos_path = (
|
|
106
108
|
f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
|
|
107
109
|
)
|
|
108
|
-
|
|
110
|
+
|
|
109
111
|
# Get the date range from the incremental state
|
|
110
112
|
start_filter = pendulum.parse(
|
|
111
113
|
last_created_at.last_value or last_created_at.initial_value
|
|
@@ -115,7 +117,7 @@ def github_repo_events(
|
|
|
115
117
|
if last_created_at.end_value
|
|
116
118
|
else pendulum.now()
|
|
117
119
|
)
|
|
118
|
-
|
|
120
|
+
|
|
119
121
|
for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
|
|
120
122
|
# Filter events by date range
|
|
121
123
|
filtered_events = []
|
ingestr/src/github/helpers.py
CHANGED
ingestr/src/influxdb/__init__.py
CHANGED
ingestr/src/mongodb/__init__.py
CHANGED
|
@@ -106,6 +106,7 @@ def mongodb_collection(
|
|
|
106
106
|
filter_: Optional[Dict[str, Any]] = None,
|
|
107
107
|
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
|
|
108
108
|
pymongoarrow_schema: Optional[Any] = None,
|
|
109
|
+
custom_query: Optional[List[Dict[str, Any]]] = None,
|
|
109
110
|
) -> Any:
|
|
110
111
|
"""
|
|
111
112
|
A DLT source which loads a collection from a mongo database using PyMongo.
|
|
@@ -132,6 +133,7 @@ def mongodb_collection(
|
|
|
132
133
|
exclude (dict) - {"released": False, "runtime": False}
|
|
133
134
|
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
134
135
|
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
|
|
136
|
+
custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
|
|
135
137
|
|
|
136
138
|
Returns:
|
|
137
139
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
@@ -161,4 +163,5 @@ def mongodb_collection(
|
|
|
161
163
|
filter_=filter_ or {},
|
|
162
164
|
projection=projection,
|
|
163
165
|
pymongoarrow_schema=pymongoarrow_schema,
|
|
166
|
+
custom_query=custom_query,
|
|
164
167
|
)
|
ingestr/src/mongodb/helpers.py
CHANGED
|
@@ -204,7 +204,14 @@ class CollectionLoader:
|
|
|
204
204
|
cursor = self._limit(cursor, limit)
|
|
205
205
|
|
|
206
206
|
while docs_slice := list(islice(cursor, self.chunk_size)):
|
|
207
|
-
|
|
207
|
+
res = map_nested_in_place(convert_mongo_objs, docs_slice)
|
|
208
|
+
if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
|
|
209
|
+
yield dlt.mark.with_hints(
|
|
210
|
+
res,
|
|
211
|
+
dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
yield res
|
|
208
215
|
|
|
209
216
|
|
|
210
217
|
class CollectionLoaderParallel(CollectionLoader):
|
|
@@ -464,6 +471,145 @@ class CollectionArrowLoaderParallel(CollectionLoaderParallel):
|
|
|
464
471
|
yield convert_arrow_columns(table)
|
|
465
472
|
|
|
466
473
|
|
|
474
|
+
class CollectionAggregationLoader(CollectionLoader):
|
|
475
|
+
"""
|
|
476
|
+
MongoDB collection loader that uses aggregation pipelines instead of find queries.
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
def __init__(
|
|
480
|
+
self,
|
|
481
|
+
client: TMongoClient,
|
|
482
|
+
collection: TCollection,
|
|
483
|
+
chunk_size: int,
|
|
484
|
+
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
485
|
+
) -> None:
|
|
486
|
+
super().__init__(client, collection, chunk_size, incremental)
|
|
487
|
+
self.custom_query: Optional[List[Dict[str, Any]]] = None
|
|
488
|
+
|
|
489
|
+
def set_custom_query(self, query: List[Dict[str, Any]]):
|
|
490
|
+
"""Set the custom aggregation pipeline query"""
|
|
491
|
+
self.custom_query = query
|
|
492
|
+
|
|
493
|
+
def load_documents(
|
|
494
|
+
self,
|
|
495
|
+
filter_: Dict[str, Any],
|
|
496
|
+
limit: Optional[int] = None,
|
|
497
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
498
|
+
) -> Iterator[TDataItem]:
|
|
499
|
+
"""Load documents using aggregation pipeline"""
|
|
500
|
+
if not self.custom_query:
|
|
501
|
+
# Fallback to parent method if no custom query
|
|
502
|
+
yield from super().load_documents(filter_, limit, projection)
|
|
503
|
+
return
|
|
504
|
+
|
|
505
|
+
# Build aggregation pipeline
|
|
506
|
+
pipeline = list(self.custom_query) # Copy the query
|
|
507
|
+
|
|
508
|
+
# For custom queries, we assume incremental filtering is already handled
|
|
509
|
+
# via interval placeholders (:interval_start, :interval_end) in the query itself.
|
|
510
|
+
# We don't add additional incremental filtering to avoid conflicts.
|
|
511
|
+
|
|
512
|
+
# Add additional filter if provided
|
|
513
|
+
if filter_:
|
|
514
|
+
filter_match = {"$match": filter_}
|
|
515
|
+
pipeline.insert(0, filter_match)
|
|
516
|
+
|
|
517
|
+
# Add limit if specified
|
|
518
|
+
if limit and limit > 0:
|
|
519
|
+
pipeline.append({"$limit": limit})
|
|
520
|
+
|
|
521
|
+
print("pipeline", pipeline)
|
|
522
|
+
# Execute aggregation
|
|
523
|
+
cursor = self.collection.aggregate(pipeline, allowDiskUse=True)
|
|
524
|
+
|
|
525
|
+
# Process results in chunks
|
|
526
|
+
while docs_slice := list(islice(cursor, self.chunk_size)):
|
|
527
|
+
res = map_nested_in_place(convert_mongo_objs, docs_slice)
|
|
528
|
+
print("res", res)
|
|
529
|
+
if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
|
|
530
|
+
yield dlt.mark.with_hints(
|
|
531
|
+
res,
|
|
532
|
+
dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
|
|
533
|
+
)
|
|
534
|
+
else:
|
|
535
|
+
yield res
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
class CollectionAggregationLoaderParallel(CollectionAggregationLoader):
|
|
539
|
+
"""
|
|
540
|
+
MongoDB collection parallel loader that uses aggregation pipelines.
|
|
541
|
+
Note: Parallel loading is not supported for aggregation pipelines due to cursor limitations.
|
|
542
|
+
Falls back to sequential loading.
|
|
543
|
+
"""
|
|
544
|
+
|
|
545
|
+
def load_documents(
|
|
546
|
+
self,
|
|
547
|
+
filter_: Dict[str, Any],
|
|
548
|
+
limit: Optional[int] = None,
|
|
549
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
550
|
+
) -> Iterator[TDataItem]:
|
|
551
|
+
"""Load documents using aggregation pipeline (sequential only)"""
|
|
552
|
+
logger.warning(
|
|
553
|
+
"Parallel loading is not supported for MongoDB aggregation pipelines. Using sequential loading."
|
|
554
|
+
)
|
|
555
|
+
yield from super().load_documents(filter_, limit, projection)
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
class CollectionAggregationArrowLoader(CollectionAggregationLoader):
|
|
559
|
+
"""
|
|
560
|
+
MongoDB collection aggregation loader that uses Apache Arrow for data processing.
|
|
561
|
+
"""
|
|
562
|
+
|
|
563
|
+
def load_documents(
|
|
564
|
+
self,
|
|
565
|
+
filter_: Dict[str, Any],
|
|
566
|
+
limit: Optional[int] = None,
|
|
567
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
568
|
+
pymongoarrow_schema: Any = None,
|
|
569
|
+
) -> Iterator[Any]:
|
|
570
|
+
"""Load documents using aggregation pipeline with Arrow format"""
|
|
571
|
+
logger.warning(
|
|
572
|
+
"Arrow format is not directly supported for MongoDB aggregation pipelines. Converting to Arrow after loading."
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# Load documents normally and convert to arrow format
|
|
576
|
+
for batch in super().load_documents(filter_, limit, projection):
|
|
577
|
+
if batch: # Only process non-empty batches
|
|
578
|
+
try:
|
|
579
|
+
from dlt.common.libs.pyarrow import pyarrow
|
|
580
|
+
|
|
581
|
+
# Convert dict batch to arrow table
|
|
582
|
+
table = pyarrow.Table.from_pylist(batch)
|
|
583
|
+
yield convert_arrow_columns(table)
|
|
584
|
+
except ImportError:
|
|
585
|
+
logger.warning(
|
|
586
|
+
"PyArrow not available, falling back to object format"
|
|
587
|
+
)
|
|
588
|
+
yield batch
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class CollectionAggregationArrowLoaderParallel(CollectionAggregationArrowLoader):
|
|
592
|
+
"""
|
|
593
|
+
MongoDB collection parallel aggregation loader with Arrow support.
|
|
594
|
+
Falls back to sequential loading.
|
|
595
|
+
"""
|
|
596
|
+
|
|
597
|
+
def load_documents(
|
|
598
|
+
self,
|
|
599
|
+
filter_: Dict[str, Any],
|
|
600
|
+
limit: Optional[int] = None,
|
|
601
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
602
|
+
pymongoarrow_schema: Any = None,
|
|
603
|
+
) -> Iterator[TDataItem]:
|
|
604
|
+
"""Load documents using aggregation pipeline with Arrow format (sequential only)"""
|
|
605
|
+
logger.warning(
|
|
606
|
+
"Parallel loading is not supported for MongoDB aggregation pipelines. Using sequential loading."
|
|
607
|
+
)
|
|
608
|
+
yield from super().load_documents(
|
|
609
|
+
filter_, limit, projection, pymongoarrow_schema
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
|
|
467
613
|
def collection_documents(
|
|
468
614
|
client: TMongoClient,
|
|
469
615
|
collection: TCollection,
|
|
@@ -475,6 +621,7 @@ def collection_documents(
|
|
|
475
621
|
limit: Optional[int] = None,
|
|
476
622
|
chunk_size: Optional[int] = 10000,
|
|
477
623
|
data_item_format: Optional[TDataItemFormat] = "object",
|
|
624
|
+
custom_query: Optional[List[Dict[str, Any]]] = None,
|
|
478
625
|
) -> Iterator[TDataItem]:
|
|
479
626
|
"""
|
|
480
627
|
A DLT source which loads data from a Mongo database using PyMongo.
|
|
@@ -499,6 +646,7 @@ def collection_documents(
|
|
|
499
646
|
Supported formats:
|
|
500
647
|
object - Python objects (dicts, lists).
|
|
501
648
|
arrow - Apache Arrow tables.
|
|
649
|
+
custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
|
|
502
650
|
|
|
503
651
|
Returns:
|
|
504
652
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
@@ -521,21 +669,48 @@ def collection_documents(
|
|
|
521
669
|
"create a projection to select fields, `projection` will be ignored."
|
|
522
670
|
)
|
|
523
671
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
672
|
+
# If custom query is provided, use aggregation loaders
|
|
673
|
+
if custom_query:
|
|
674
|
+
if parallel:
|
|
675
|
+
if data_item_format == "arrow":
|
|
676
|
+
LoaderClass = CollectionAggregationArrowLoaderParallel
|
|
677
|
+
else:
|
|
678
|
+
LoaderClass = CollectionAggregationLoaderParallel # type: ignore
|
|
527
679
|
else:
|
|
528
|
-
|
|
680
|
+
if data_item_format == "arrow":
|
|
681
|
+
LoaderClass = CollectionAggregationArrowLoader # type: ignore
|
|
682
|
+
else:
|
|
683
|
+
LoaderClass = CollectionAggregationLoader # type: ignore
|
|
529
684
|
else:
|
|
530
|
-
if
|
|
531
|
-
|
|
685
|
+
if parallel:
|
|
686
|
+
if data_item_format == "arrow":
|
|
687
|
+
LoaderClass = CollectionArrowLoaderParallel
|
|
688
|
+
else:
|
|
689
|
+
LoaderClass = CollectionLoaderParallel # type: ignore
|
|
532
690
|
else:
|
|
533
|
-
|
|
691
|
+
if data_item_format == "arrow":
|
|
692
|
+
LoaderClass = CollectionArrowLoader # type: ignore
|
|
693
|
+
else:
|
|
694
|
+
LoaderClass = CollectionLoader # type: ignore
|
|
534
695
|
|
|
535
696
|
loader = LoaderClass(
|
|
536
697
|
client, collection, incremental=incremental, chunk_size=chunk_size
|
|
537
698
|
)
|
|
538
|
-
|
|
699
|
+
|
|
700
|
+
# Set custom query if provided
|
|
701
|
+
if custom_query and hasattr(loader, "set_custom_query"):
|
|
702
|
+
loader.set_custom_query(custom_query)
|
|
703
|
+
|
|
704
|
+
# Load documents based on loader type
|
|
705
|
+
if isinstance(
|
|
706
|
+
loader,
|
|
707
|
+
(
|
|
708
|
+
CollectionArrowLoader,
|
|
709
|
+
CollectionArrowLoaderParallel,
|
|
710
|
+
CollectionAggregationArrowLoader,
|
|
711
|
+
CollectionAggregationArrowLoaderParallel,
|
|
712
|
+
),
|
|
713
|
+
):
|
|
539
714
|
yield from loader.load_documents(
|
|
540
715
|
limit=limit,
|
|
541
716
|
filter_=filter_,
|
ingestr/src/sources.py
CHANGED
|
@@ -73,6 +73,20 @@ class SqlSource:
|
|
|
73
73
|
|
|
74
74
|
engine_adapter_callback = None
|
|
75
75
|
|
|
76
|
+
if uri.startswith("md://") or uri.startswith("motherduck://"):
|
|
77
|
+
parsed_uri = urlparse(uri)
|
|
78
|
+
query_params = parse_qs(parsed_uri.query)
|
|
79
|
+
# Convert md:// URI to duckdb:///md: format
|
|
80
|
+
if parsed_uri.path:
|
|
81
|
+
db_path = parsed_uri.path
|
|
82
|
+
else:
|
|
83
|
+
db_path = ""
|
|
84
|
+
|
|
85
|
+
token = query_params.get("token", [""])[0]
|
|
86
|
+
if not token:
|
|
87
|
+
raise ValueError("Token is required for MotherDuck connection")
|
|
88
|
+
uri = f"duckdb:///md:{db_path}?motherduck_token={token}"
|
|
89
|
+
|
|
76
90
|
if uri.startswith("mysql://"):
|
|
77
91
|
uri = uri.replace("mysql://", "mysql+pymysql://")
|
|
78
92
|
|
|
@@ -409,31 +423,181 @@ class MongoDbSource:
|
|
|
409
423
|
return False
|
|
410
424
|
|
|
411
425
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
412
|
-
|
|
426
|
+
# Check if this is a custom query format (collection:query)
|
|
427
|
+
if ":" in table:
|
|
428
|
+
collection_name, query_json = table.split(":", 1)
|
|
413
429
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
end_value = kwargs.get("interval_end")
|
|
430
|
+
# Parse and validate the query
|
|
431
|
+
try:
|
|
432
|
+
import json
|
|
418
433
|
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
434
|
+
query = json.loads(query_json)
|
|
435
|
+
except json.JSONDecodeError as e:
|
|
436
|
+
raise ValueError(f"Invalid JSON query format: {e}")
|
|
437
|
+
|
|
438
|
+
# Validate that it's a list for aggregation pipeline
|
|
439
|
+
if not isinstance(query, list):
|
|
440
|
+
raise ValueError(
|
|
441
|
+
"Query must be a JSON array representing a MongoDB aggregation pipeline"
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# Check for incremental load requirements
|
|
445
|
+
incremental = None
|
|
446
|
+
if kwargs.get("incremental_key"):
|
|
447
|
+
start_value = kwargs.get("interval_start")
|
|
448
|
+
end_value = kwargs.get("interval_end")
|
|
449
|
+
|
|
450
|
+
# Validate that incremental key is present in the pipeline
|
|
451
|
+
incremental_key = kwargs.get("incremental_key")
|
|
452
|
+
self._validate_incremental_query(query, str(incremental_key))
|
|
453
|
+
|
|
454
|
+
incremental = dlt_incremental(
|
|
455
|
+
str(incremental_key),
|
|
456
|
+
initial_value=start_value,
|
|
457
|
+
end_value=end_value,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# Substitute interval parameters in the query
|
|
461
|
+
query = self._substitute_interval_params(query, kwargs)
|
|
462
|
+
|
|
463
|
+
# Parse collection name to get database and collection
|
|
464
|
+
if "." in collection_name:
|
|
465
|
+
# Handle database.collection format
|
|
466
|
+
table_fields = table_string_to_dataclass(collection_name)
|
|
467
|
+
database = table_fields.dataset
|
|
468
|
+
collection = table_fields.table
|
|
469
|
+
else:
|
|
470
|
+
# Single collection name, use default database
|
|
471
|
+
database = None
|
|
472
|
+
collection = collection_name
|
|
473
|
+
|
|
474
|
+
table_instance = self.table_builder(
|
|
475
|
+
connection_url=uri,
|
|
476
|
+
database=database,
|
|
477
|
+
collection=collection,
|
|
478
|
+
parallel=False,
|
|
479
|
+
incremental=incremental,
|
|
480
|
+
custom_query=query,
|
|
481
|
+
)
|
|
482
|
+
table_instance.max_table_nesting = 1
|
|
483
|
+
return table_instance
|
|
484
|
+
else:
|
|
485
|
+
# Default behavior for simple collection names
|
|
486
|
+
table_fields = table_string_to_dataclass(table)
|
|
487
|
+
|
|
488
|
+
incremental = None
|
|
489
|
+
if kwargs.get("incremental_key"):
|
|
490
|
+
start_value = kwargs.get("interval_start")
|
|
491
|
+
end_value = kwargs.get("interval_end")
|
|
492
|
+
|
|
493
|
+
incremental = dlt_incremental(
|
|
494
|
+
kwargs.get("incremental_key", ""),
|
|
495
|
+
initial_value=start_value,
|
|
496
|
+
end_value=end_value,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
table_instance = self.table_builder(
|
|
500
|
+
connection_url=uri,
|
|
501
|
+
database=table_fields.dataset,
|
|
502
|
+
collection=table_fields.table,
|
|
503
|
+
parallel=False,
|
|
504
|
+
incremental=incremental,
|
|
505
|
+
)
|
|
506
|
+
table_instance.max_table_nesting = 1
|
|
507
|
+
|
|
508
|
+
return table_instance
|
|
509
|
+
|
|
510
|
+
def _validate_incremental_query(self, query: list, incremental_key: str):
|
|
511
|
+
"""Validate that incremental key is projected in the aggregation pipeline"""
|
|
512
|
+
# Check if there's a $project stage and if incremental_key is included
|
|
513
|
+
has_project = False
|
|
514
|
+
incremental_key_projected = False
|
|
515
|
+
|
|
516
|
+
for stage in query:
|
|
517
|
+
if "$project" in stage:
|
|
518
|
+
has_project = True
|
|
519
|
+
project_stage = stage["$project"]
|
|
520
|
+
if isinstance(project_stage, dict):
|
|
521
|
+
# Check if incremental_key is explicitly included
|
|
522
|
+
if incremental_key in project_stage:
|
|
523
|
+
if project_stage[incremental_key] not in [0, False]:
|
|
524
|
+
incremental_key_projected = True
|
|
525
|
+
# If there are only inclusions (1 or True values) and incremental_key is not included
|
|
526
|
+
elif any(v in [1, True] for v in project_stage.values()):
|
|
527
|
+
# This is an inclusion projection, incremental_key must be explicitly included
|
|
528
|
+
incremental_key_projected = False
|
|
529
|
+
# If there are only exclusions (0 or False values) and incremental_key is not excluded
|
|
530
|
+
elif all(
|
|
531
|
+
v in [0, False]
|
|
532
|
+
for v in project_stage.values()
|
|
533
|
+
if v in [0, False, 1, True]
|
|
534
|
+
):
|
|
535
|
+
# This is an exclusion projection, incremental_key is included by default
|
|
536
|
+
if incremental_key not in project_stage:
|
|
537
|
+
incremental_key_projected = True
|
|
538
|
+
else:
|
|
539
|
+
incremental_key_projected = project_stage[
|
|
540
|
+
incremental_key
|
|
541
|
+
] not in [0, False]
|
|
542
|
+
else:
|
|
543
|
+
# Mixed or unclear projection, assume incremental_key needs to be explicit
|
|
544
|
+
incremental_key_projected = False
|
|
545
|
+
|
|
546
|
+
# If there's a $project stage but incremental_key is not projected, raise error
|
|
547
|
+
if has_project and not incremental_key_projected:
|
|
548
|
+
raise ValueError(
|
|
549
|
+
f"Incremental key '{incremental_key}' must be included in the projected fields of the aggregation pipeline"
|
|
425
550
|
)
|
|
426
551
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
collection=table_fields.table,
|
|
431
|
-
parallel=True,
|
|
432
|
-
incremental=incremental,
|
|
433
|
-
)
|
|
434
|
-
table_instance.max_table_nesting = 1
|
|
552
|
+
def _substitute_interval_params(self, query: list, kwargs: dict):
|
|
553
|
+
"""Substitute :interval_start and :interval_end placeholders with actual datetime values"""
|
|
554
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
435
555
|
|
|
436
|
-
|
|
556
|
+
# Get interval values and convert them to datetime objects
|
|
557
|
+
interval_start = kwargs.get("interval_start")
|
|
558
|
+
interval_end = kwargs.get("interval_end")
|
|
559
|
+
|
|
560
|
+
# Convert string dates to datetime objects if needed
|
|
561
|
+
if interval_start is not None:
|
|
562
|
+
if isinstance(interval_start, str):
|
|
563
|
+
pendulum_dt = ensure_pendulum_datetime(interval_start)
|
|
564
|
+
interval_start = (
|
|
565
|
+
pendulum_dt.to_datetime()
|
|
566
|
+
if hasattr(pendulum_dt, "to_datetime")
|
|
567
|
+
else pendulum_dt
|
|
568
|
+
)
|
|
569
|
+
elif hasattr(interval_start, "to_datetime"):
|
|
570
|
+
interval_start = interval_start.to_datetime()
|
|
571
|
+
|
|
572
|
+
if interval_end is not None:
|
|
573
|
+
if isinstance(interval_end, str):
|
|
574
|
+
pendulum_dt = ensure_pendulum_datetime(interval_end)
|
|
575
|
+
interval_end = (
|
|
576
|
+
pendulum_dt.to_datetime()
|
|
577
|
+
if hasattr(pendulum_dt, "to_datetime")
|
|
578
|
+
else pendulum_dt
|
|
579
|
+
)
|
|
580
|
+
elif hasattr(interval_end, "to_datetime"):
|
|
581
|
+
interval_end = interval_end.to_datetime()
|
|
582
|
+
|
|
583
|
+
# Deep copy the query and replace placeholders with actual datetime objects
|
|
584
|
+
def replace_placeholders(obj):
|
|
585
|
+
if isinstance(obj, dict):
|
|
586
|
+
result = {}
|
|
587
|
+
for key, value in obj.items():
|
|
588
|
+
if value == ":interval_start" and interval_start is not None:
|
|
589
|
+
result[key] = interval_start
|
|
590
|
+
elif value == ":interval_end" and interval_end is not None:
|
|
591
|
+
result[key] = interval_end
|
|
592
|
+
else:
|
|
593
|
+
result[key] = replace_placeholders(value)
|
|
594
|
+
return result
|
|
595
|
+
elif isinstance(obj, list):
|
|
596
|
+
return [replace_placeholders(item) for item in obj]
|
|
597
|
+
else:
|
|
598
|
+
return obj
|
|
599
|
+
|
|
600
|
+
return replace_placeholders(query)
|
|
437
601
|
|
|
438
602
|
|
|
439
603
|
class LocalCsvSource:
|
|
@@ -961,7 +1125,7 @@ class SlackSource:
|
|
|
961
1125
|
|
|
962
1126
|
class HubspotSource:
|
|
963
1127
|
def handles_incrementality(self) -> bool:
|
|
964
|
-
return
|
|
1128
|
+
return False
|
|
965
1129
|
|
|
966
1130
|
# hubspot://?api_key=<api_key>
|
|
967
1131
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
@@ -2528,6 +2692,18 @@ class FreshdeskSource:
|
|
|
2528
2692
|
if api_key is None:
|
|
2529
2693
|
raise MissingValueError("api_key", "Freshdesk")
|
|
2530
2694
|
|
|
2695
|
+
start_date = kwargs.get("interval_start")
|
|
2696
|
+
if start_date is not None:
|
|
2697
|
+
start_date = ensure_pendulum_datetime(start_date).in_tz("UTC")
|
|
2698
|
+
else:
|
|
2699
|
+
start_date = ensure_pendulum_datetime("2022-01-01T00:00:00Z")
|
|
2700
|
+
|
|
2701
|
+
end_date = kwargs.get("interval_end")
|
|
2702
|
+
if end_date is not None:
|
|
2703
|
+
end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
|
|
2704
|
+
else:
|
|
2705
|
+
end_date = None
|
|
2706
|
+
|
|
2531
2707
|
if table not in [
|
|
2532
2708
|
"agents",
|
|
2533
2709
|
"companies",
|
|
@@ -2541,7 +2717,10 @@ class FreshdeskSource:
|
|
|
2541
2717
|
from ingestr.src.freshdesk import freshdesk_source
|
|
2542
2718
|
|
|
2543
2719
|
return freshdesk_source(
|
|
2544
|
-
api_secret_key=api_key[0],
|
|
2720
|
+
api_secret_key=api_key[0],
|
|
2721
|
+
domain=domain,
|
|
2722
|
+
start_date=start_date,
|
|
2723
|
+
end_date=end_date,
|
|
2545
2724
|
).with_resources(table)
|
|
2546
2725
|
|
|
2547
2726
|
|
|
@@ -2684,7 +2863,7 @@ class ElasticsearchSource:
|
|
|
2684
2863
|
|
|
2685
2864
|
class AttioSource:
|
|
2686
2865
|
def handles_incrementality(self) -> bool:
|
|
2687
|
-
return
|
|
2866
|
+
return False
|
|
2688
2867
|
|
|
2689
2868
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2690
2869
|
parsed_uri = urlparse(uri)
|
|
@@ -3056,7 +3235,7 @@ class InfluxDBSource:
|
|
|
3056
3235
|
|
|
3057
3236
|
secure = params.get("secure", ["true"])[0].lower() != "false"
|
|
3058
3237
|
scheme = "https" if secure else "http"
|
|
3059
|
-
|
|
3238
|
+
|
|
3060
3239
|
if port:
|
|
3061
3240
|
host_url = f"{scheme}://{host}:{port}"
|
|
3062
3241
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.79
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -355,6 +355,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
355
355
|
<td>✅</td>
|
|
356
356
|
<td>❌</td>
|
|
357
357
|
</tr>
|
|
358
|
+
<tr>
|
|
359
|
+
<td>MotherDuck</td>
|
|
360
|
+
<td>✅</td>
|
|
361
|
+
<td>✅</td>
|
|
362
|
+
</tr>
|
|
358
363
|
<tr>
|
|
359
364
|
<td>MySQL</td>
|
|
360
365
|
<td>✅</td>
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
|
|
2
|
-
ingestr/main.py,sha256=
|
|
2
|
+
ingestr/main.py,sha256=qoWHNcHh0-xVnyQxbQ-SKuTxPb1RNV3ENkCpqO7CLrk,26694
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=yE0cfxWae8TNJJLYcRmNexeK769vtdz_-vJGzcROgwE,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=M2Yni6wiWcrvZ8EPJemidqxN156l0rehgCc7xuil7mo,22840
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=rF5Ry4o4t8KulSPBtrd7ZKCI_0TH1DAetG0zs9H7oik,6792
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=qZz35cdO-nO9CZsdOJ8Ni56wclNfbGQuGj4nsoHpFxE,115678
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -41,7 +41,7 @@ ingestr/src/clickup/helpers.py,sha256=RzDKMUAHccuDhocIQ2ToBXfCERo8CBJqA3t-IPltBC
|
|
|
41
41
|
ingestr/src/collector/spinner.py,sha256=_ZUqF5MI43hVIULdjF5s5mrAZbhEFXaiWirQmrv3Yk4,1201
|
|
42
42
|
ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
|
|
43
43
|
ingestr/src/elasticsearch/__init__.py,sha256=m-q93HgUmTwGDUwHOjHawstWL06TC3WIX3H05szybrY,2556
|
|
44
|
-
ingestr/src/facebook_ads/__init__.py,sha256=
|
|
44
|
+
ingestr/src/facebook_ads/__init__.py,sha256=_9929DYzcq5iLt-l3DmJ4VBZwmoEwgyPZbPstH0ySmI,9725
|
|
45
45
|
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
46
46
|
ingestr/src/facebook_ads/helpers.py,sha256=NshS21can1xhRKQzg_o-c6qSxWoC3NnE3FwgJxUnygE,8239
|
|
47
47
|
ingestr/src/facebook_ads/settings.py,sha256=Bsic8RcmH-NfEZ7r_NGospTCmwISK9XaMT5y2NZirtg,4938
|
|
@@ -51,11 +51,11 @@ ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-
|
|
|
51
51
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
52
52
|
ingestr/src/frankfurter/__init__.py,sha256=oVi4BiOxPRyckEVrBNunyMAHulPyMgyGRwBbhn-Xz6M,4987
|
|
53
53
|
ingestr/src/frankfurter/helpers.py,sha256=SyrkRTDqvKdQxRHTV5kcSeVG3FEnaK5zxHyNyqtumZ0,1445
|
|
54
|
-
ingestr/src/freshdesk/__init__.py,sha256=
|
|
55
|
-
ingestr/src/freshdesk/freshdesk_client.py,sha256=
|
|
54
|
+
ingestr/src/freshdesk/__init__.py,sha256=ukyorgCNsW_snzsYBDsr3Q0WB8f-to9Fk0enqHHFQlk,3087
|
|
55
|
+
ingestr/src/freshdesk/freshdesk_client.py,sha256=1nFf0K4MQ0KZbWwk4xSbYHaykVqmPLfN39miOFDpWVc,4385
|
|
56
56
|
ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
|
|
57
|
-
ingestr/src/github/__init__.py,sha256=
|
|
58
|
-
ingestr/src/github/helpers.py,sha256=
|
|
57
|
+
ingestr/src/github/__init__.py,sha256=C7b5j6CrxmTItS4tyDa3OYzdAw5c__xboOtoEJYe3wQ,7217
|
|
58
|
+
ingestr/src/github/helpers.py,sha256=rpv_3HzuOl4PQ-FUeA66pev-pgze9SaE8RUHIPYfZ_A,6759
|
|
59
59
|
ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
|
|
60
60
|
ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
|
|
61
61
|
ingestr/src/google_ads/__init__.py,sha256=bH0TtnRWcOUESezpvoA7VEUHAq_0ITGQeX4GGVBfl1I,3725
|
|
@@ -75,7 +75,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
|
|
|
75
75
|
ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
|
|
76
76
|
ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
|
|
77
77
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
78
|
-
ingestr/src/influxdb/__init__.py,sha256=
|
|
78
|
+
ingestr/src/influxdb/__init__.py,sha256=cYsGnDPNHRTe9pp14ogDQgPTCI9TOdyJm1MaNuQLHdk,1290
|
|
79
79
|
ingestr/src/influxdb/client.py,sha256=hCxSNREAWWEvvAV3RQbKaWp2-e_7EE8xmVRjTwLFEFo,1230
|
|
80
80
|
ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
|
|
81
81
|
ingestr/src/kafka/__init__.py,sha256=QUHsGmdv5_E-3z0GDHXvbk39puwuGDBsyYSDhvbA89E,3595
|
|
@@ -92,8 +92,8 @@ ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffW
|
|
|
92
92
|
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
93
93
|
ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
|
|
94
94
|
ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
|
|
95
|
-
ingestr/src/mongodb/__init__.py,sha256=
|
|
96
|
-
ingestr/src/mongodb/helpers.py,sha256=
|
|
95
|
+
ingestr/src/mongodb/__init__.py,sha256=5KNdR2mxJoHSOU1pt-FIJNg9HT4aHPwl6mI31xPBQLA,7487
|
|
96
|
+
ingestr/src/mongodb/helpers.py,sha256=VMGKkSN6FIQ4l-4TUqoc-Ou7r52_zPXuLF33ZN23B_I,30881
|
|
97
97
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
98
98
|
ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
|
|
99
99
|
ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -151,8 +151,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
151
151
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
152
152
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
153
153
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
154
|
-
ingestr-0.13.
|
|
155
|
-
ingestr-0.13.
|
|
156
|
-
ingestr-0.13.
|
|
157
|
-
ingestr-0.13.
|
|
158
|
-
ingestr-0.13.
|
|
154
|
+
ingestr-0.13.79.dist-info/METADATA,sha256=5dl0NFB3Ach1_lFtE4xOJpud_chn_w0qvepZnnMjRzo,15182
|
|
155
|
+
ingestr-0.13.79.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
156
|
+
ingestr-0.13.79.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
157
|
+
ingestr-0.13.79.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
158
|
+
ingestr-0.13.79.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|