ingestr 0.12.3__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -32,7 +32,7 @@ DATE_FORMATS = [
32
32
 
33
33
  # https://dlthub.com/docs/dlt-ecosystem/file-formats/parquet#supported-destinations
34
34
  PARQUET_SUPPORTED_DESTINATIONS = [
35
- "bigquery",
35
+ "athena" "bigquery",
36
36
  "duckdb",
37
37
  "snowflake",
38
38
  "databricks",
@@ -287,8 +287,14 @@ def ingest(
287
287
  envvar="SQL_EXCLUDE_COLUMNS",
288
288
  ),
289
289
  ] = [], # type: ignore
290
+ columns: Annotated[
291
+ Optional[list[str]],
292
+ typer.Option(
293
+ help="The column types to be used for the destination table in the format of 'column_name:column_type'",
294
+ envvar="COLUMNS",
295
+ ),
296
+ ] = None, # type: ignore
290
297
  ):
291
- # TODO(turtledev): can't we move this to the top of this file?
292
298
  import hashlib
293
299
  import tempfile
294
300
  from datetime import datetime
@@ -296,6 +302,7 @@ def ingest(
296
302
  import dlt
297
303
  import humanize
298
304
  import typer
305
+ from dlt.common.data_types import TDataType
299
306
  from dlt.common.destination import Destination
300
307
  from dlt.common.pipeline import LoadInfo
301
308
  from dlt.common.runtime.collector import Collector, LogCollector
@@ -345,7 +352,7 @@ def ingest(
345
352
  not in dlt_dest.capabilities().supported_loader_file_formats
346
353
  ):
347
354
  print(
348
- f"[red]Loader file format {loader_file_format.value} is not supported by the destination.[/red]"
355
+ f"[red]Loader file format {loader_file_format.value} is not supported by the destination, available formats: {dlt_dest.capabilities().supported_loader_file_formats}.[/red]"
349
356
  )
350
357
  raise typer.Abort()
351
358
 
@@ -357,6 +364,23 @@ def ingest(
357
364
  else:
358
365
  executable(source)
359
366
 
367
+ def parse_columns(columns: list[str]) -> dict[str, TDataType]:
368
+ from typing import cast, get_args
369
+
370
+ possible_types = get_args(TDataType)
371
+
372
+ types: dict[str, TDataType] = {}
373
+ for column in columns:
374
+ for candidate in column.split(","):
375
+ column_name, column_type = candidate.split(":")
376
+ if column_type not in possible_types:
377
+ print(
378
+ f"[red]Column type '{column_type}' is not supported, supported types: {possible_types}.[/red]"
379
+ )
380
+ raise typer.Abort()
381
+ types[column_name] = cast(TDataType, column_type)
382
+ return types
383
+
360
384
  track(
361
385
  "command_triggered",
362
386
  {
@@ -399,12 +423,20 @@ def ingest(
399
423
  column_hints: dict[str, TColumnSchema] = {}
400
424
  original_incremental_strategy = incremental_strategy
401
425
 
426
+ if columns:
427
+ column_types = parse_columns(columns)
428
+ for column_name, column_type in column_types.items():
429
+ column_hints[column_name] = {"data_type": column_type}
430
+
402
431
  merge_key = None
403
432
  if incremental_strategy == IncrementalStrategy.delete_insert:
404
433
  merge_key = incremental_key
405
434
  incremental_strategy = IncrementalStrategy.merge
406
435
  if incremental_key:
407
- column_hints[incremental_key] = {"merge_key": True}
436
+ if incremental_key not in column_hints:
437
+ column_hints[incremental_key] = {}
438
+
439
+ column_hints[incremental_key]["merge_key"] = True
408
440
 
409
441
  m = hashlib.sha256()
410
442
  m.update(dest_table.encode("utf-8"))
@@ -491,6 +523,21 @@ def ingest(
491
523
  if factory.source_scheme == "sqlite":
492
524
  source_table = "main." + source_table.split(".")[-1]
493
525
 
526
+ if (
527
+ incremental_key
528
+ and incremental_key in column_hints
529
+ and "data_type" in column_hints[incremental_key]
530
+ and column_hints[incremental_key]["data_type"] == "date"
531
+ ):
532
+ # By default, ingestr treats the start and end dates as datetime objects. While this worked fine for many cases, if the
533
+ # incremental field is a date, the start and end dates cannot be compared to the incremental field, and the ingestion would fail.
534
+ # In order to eliminate this, we have introduced a new option to ingestr, --columns, which allows the user to specify the column types for the destination table.
535
+ # This way, ingestr will know the data type of the incremental field, and will be able to convert the start and end dates to the correct data type before running the ingestion.
536
+ if interval_start:
537
+ interval_start = interval_start.date() # type: ignore
538
+ if interval_end:
539
+ interval_end = interval_end.date() # type: ignore
540
+
494
541
  dlt_source = source.dlt_source(
495
542
  uri=source_uri,
496
543
  table=source_table,
ingestr/src/factory.py CHANGED
@@ -24,6 +24,8 @@ from ingestr.src.sources import (
24
24
  ChessSource,
25
25
  DynamoDBSource,
26
26
  FacebookAdsSource,
27
+ GitHubSource,
28
+ GoogleAnalyticsSource,
27
29
  GoogleSheetsSource,
28
30
  GorgiasSource,
29
31
  HubspotSource,
@@ -102,6 +104,7 @@ class SourceDestinationFactory:
102
104
  "gsheets": GoogleSheetsSource,
103
105
  "shopify": ShopifySource,
104
106
  "gorgias": GorgiasSource,
107
+ "github": GitHubSource,
105
108
  "chess": ChessSource,
106
109
  "stripe": StripeAnalyticsSource,
107
110
  "facebookads": FacebookAdsSource,
@@ -118,6 +121,7 @@ class SourceDestinationFactory:
118
121
  "dynamodb": DynamoDBSource,
119
122
  "asana": AsanaSource,
120
123
  "tiktok": TikTokSource,
124
+ "googleanalytics": GoogleAnalyticsSource,
121
125
  }
122
126
  destinations: Dict[str, Type[DestinationProtocol]] = {
123
127
  "bigquery": BigQueryDestination,
@@ -0,0 +1,149 @@
1
+ """Source that load github issues, pull requests and reactions for a specific repository via customizable graphql query. Loads events incrementally."""
2
+
3
+ import urllib.parse
4
+ from typing import Iterator, Optional, Sequence
5
+
6
+ import dlt
7
+ from dlt.common.typing import TDataItems
8
+ from dlt.sources import DltResource
9
+
10
+ from .helpers import get_reactions_data, get_rest_pages, get_stargazers
11
+
12
+
13
+ @dlt.source
14
+ def github_reactions(
15
+ owner: str,
16
+ name: str,
17
+ access_token: str = dlt.secrets.value,
18
+ items_per_page: int = 100,
19
+ max_items: Optional[int] = None,
20
+ ) -> Sequence[DltResource]:
21
+ """Get reactions associated with issues, pull requests and comments in the repo `name` with owner `owner`.
22
+
23
+ This source uses graphql to retrieve all issues (`issues` resource) and pull requests (`pull requests` resource) with the associated reactions (up to 100),
24
+ comments (up to 100) and reactions to comments (also up to 100). Internally graphql is used to retrieve data. It is cost optimized and you are able to retrieve the
25
+ data for fairly large repos quickly and cheaply.
26
+ You can and should change the queries in `queries.py` to include for example additional fields or connections. The source can be hacked to add more resources for other
27
+ repository nodes easily.
28
+
29
+ Args:
30
+ owner (str): The repository owner
31
+ name (str): The repository name
32
+ access_token (str): The classic access token. Will be injected from secrets if not provided.
33
+ items_per_page (int, optional): How many issues/pull requests to get in single page. Defaults to 100.
34
+ max_items (int, optional): How many issues/pull requests to get in total. None means All.
35
+ max_item_age_seconds (float, optional): Do not get items older than this. Defaults to None. NOT IMPLEMENTED
36
+
37
+ Returns:
38
+ Sequence[DltResource]: Two DltResources: `issues` with issues and `pull_requests` with pull requests
39
+ """
40
+ return (
41
+ dlt.resource(
42
+ get_reactions_data(
43
+ "issues",
44
+ owner,
45
+ name,
46
+ access_token,
47
+ items_per_page,
48
+ max_items,
49
+ ),
50
+ name="issues",
51
+ write_disposition="replace",
52
+ ),
53
+ dlt.resource(
54
+ get_reactions_data(
55
+ "pullRequests",
56
+ owner,
57
+ name,
58
+ access_token,
59
+ items_per_page,
60
+ max_items,
61
+ ),
62
+ name="pull_requests",
63
+ write_disposition="replace",
64
+ ),
65
+ )
66
+
67
+
68
+ @dlt.source(max_table_nesting=0)
69
+ def github_repo_events(
70
+ owner: str, name: str, access_token: Optional[str] = None
71
+ ) -> DltResource:
72
+ """Gets events for repository `name` with owner `owner` incrementally.
73
+
74
+ This source contains a single resource `repo_events` that gets given repository's events and dispatches them to separate tables with names based on event type.
75
+ The data is loaded incrementally. Subsequent runs will get only new events and append them to tables.
76
+ Please note that Github allows only for 300 events to be retrieved for public repositories. You should get the events frequently for the active repos.
77
+
78
+ Args:
79
+ owner (str): The repository owner
80
+ name (str): The repository name
81
+ access_token (str): The classic or fine-grained access token. If not provided, calls are made anonymously
82
+
83
+ Returns:
84
+ DltSource: source with the `repo_events` resource
85
+
86
+ """
87
+
88
+ # use naming function in table name to generate separate tables for each event
89
+ @dlt.resource(primary_key="id", table_name=lambda i: i["type"])
90
+ def repo_events(
91
+ last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
92
+ "created_at", initial_value="1970-01-01T00:00:00Z", last_value_func=max
93
+ ),
94
+ ) -> Iterator[TDataItems]:
95
+ repos_path = (
96
+ f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
97
+ )
98
+
99
+ for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
100
+ yield page
101
+
102
+ # stop requesting pages if the last element was already older than initial value
103
+ # note: incremental will skip those items anyway, we just do not want to use the api limits
104
+ if last_created_at.start_out_of_range:
105
+ print(
106
+ f"Overlap with previous run created at {last_created_at.initial_value}"
107
+ )
108
+ break
109
+
110
+ return repo_events
111
+
112
+
113
+ @dlt.source
114
+ def github_stargazers(
115
+ owner: str,
116
+ name: str,
117
+ access_token: str = dlt.secrets.value,
118
+ items_per_page: int = 100,
119
+ max_items: Optional[int] = None,
120
+ ) -> Sequence[DltResource]:
121
+ """Get stargazers in the repo `name` with owner `owner`.
122
+
123
+ This source uses graphql to retrieve all stargazers with the associated starred date,
124
+ Internally graphql is used to retrieve data. It is cost optimized and you are able to retrieve the
125
+ data for fairly large repos quickly and cheaply.
126
+
127
+ Args:
128
+ owner (str): The repository owner
129
+ name (str): The repository name
130
+ access_token (str): The classic access token. Will be injected from secrets if not provided.
131
+ items_per_page (int, optional): How many issues/pull requests to get in single page. Defaults to 100.
132
+ max_items (int, optional): How many issues/pull requests to get in total. None means All.
133
+
134
+ Returns:
135
+ Sequence[DltResource]: One DltResource: `stargazers`
136
+ """
137
+ return (
138
+ dlt.resource(
139
+ get_stargazers(
140
+ owner,
141
+ name,
142
+ access_token,
143
+ items_per_page,
144
+ max_items,
145
+ ),
146
+ name="stargazers",
147
+ write_disposition="replace",
148
+ ),
149
+ )
@@ -0,0 +1,193 @@
1
+ from typing import Iterator, List, Optional, Tuple
2
+
3
+ from dlt.common.typing import DictStrAny, StrAny
4
+ from dlt.common.utils import chunks
5
+ from dlt.sources.helpers import requests
6
+
7
+ from .queries import COMMENT_REACTIONS_QUERY, ISSUES_QUERY, RATE_LIMIT, STARGAZERS_QUERY
8
+ from .settings import GRAPHQL_API_BASE_URL, REST_API_BASE_URL
9
+
10
+
11
+ #
12
+ # Shared
13
+ #
14
+ def _get_auth_header(access_token: Optional[str]) -> StrAny:
15
+ if access_token:
16
+ return {"Authorization": f"Bearer {access_token}"}
17
+ else:
18
+ # REST API works without access token (with high rate limits)
19
+ return {}
20
+
21
+
22
+ #
23
+ # Rest API helpers
24
+ #
25
+ def get_rest_pages(access_token: Optional[str], query: str) -> Iterator[List[StrAny]]:
26
+ def _request(page_url: str) -> requests.Response:
27
+ r = requests.get(page_url, headers=_get_auth_header(access_token))
28
+ print(
29
+ f"got page {page_url}, requests left: " + r.headers["x-ratelimit-remaining"]
30
+ )
31
+ return r
32
+
33
+ next_page_url = REST_API_BASE_URL + query
34
+ while True:
35
+ r: requests.Response = _request(next_page_url)
36
+ page_items = r.json()
37
+ if len(page_items) == 0:
38
+ break
39
+ yield page_items
40
+ if "next" not in r.links:
41
+ break
42
+ next_page_url = r.links["next"]["url"]
43
+
44
+
45
+ #
46
+ # GraphQL API helpers
47
+ #
48
+ def get_stargazers(
49
+ owner: str,
50
+ name: str,
51
+ access_token: str,
52
+ items_per_page: int,
53
+ max_items: Optional[int],
54
+ ) -> Iterator[Iterator[StrAny]]:
55
+ variables = {"owner": owner, "name": name, "items_per_page": items_per_page}
56
+ for page_items in _get_graphql_pages(
57
+ access_token, STARGAZERS_QUERY, variables, "stargazers", max_items
58
+ ):
59
+ yield map(
60
+ lambda item: {"starredAt": item["starredAt"], "user": item["node"]},
61
+ page_items,
62
+ )
63
+
64
+
65
+ def get_reactions_data(
66
+ node_type: str,
67
+ owner: str,
68
+ name: str,
69
+ access_token: str,
70
+ items_per_page: int,
71
+ max_items: Optional[int],
72
+ ) -> Iterator[Iterator[StrAny]]:
73
+ variables = {
74
+ "owner": owner,
75
+ "name": name,
76
+ "issues_per_page": items_per_page,
77
+ "first_reactions": 100,
78
+ "first_comments": 100,
79
+ "node_type": node_type,
80
+ }
81
+ for page_items in _get_graphql_pages(
82
+ access_token, ISSUES_QUERY % node_type, variables, node_type, max_items
83
+ ):
84
+ # use reactionGroups to query for reactions to comments that have any reactions. reduces cost by 10-50x
85
+ reacted_comment_ids = {}
86
+ for item in page_items:
87
+ for comment in item["comments"]["nodes"]:
88
+ if any(group["createdAt"] for group in comment["reactionGroups"]):
89
+ # print(f"for comment {comment['id']}: has reaction")
90
+ reacted_comment_ids[comment["id"]] = comment
91
+ # if "reactionGroups" in comment:
92
+ comment.pop("reactionGroups", None)
93
+
94
+ # get comment reactions by querying comment nodes separately
95
+ comment_reactions = _get_comment_reaction(
96
+ list(reacted_comment_ids.keys()), access_token
97
+ )
98
+ # attach the reaction nodes where they should be
99
+ for comment in comment_reactions.values():
100
+ comment_id = comment["id"]
101
+ reacted_comment_ids[comment_id]["reactions"] = comment["reactions"]
102
+ yield map(_extract_nested_nodes, page_items)
103
+
104
+
105
+ def _extract_top_connection(data: StrAny, node_type: str) -> StrAny:
106
+ assert (
107
+ isinstance(data, dict) and len(data) == 1
108
+ ), f"The data with list of {node_type} must be a dictionary and contain only one element"
109
+ data = next(iter(data.values()))
110
+ return data[node_type] # type: ignore
111
+
112
+
113
+ def _extract_nested_nodes(item: DictStrAny) -> DictStrAny:
114
+ """Recursively moves `nodes` and `totalCount` to reduce nesting."""
115
+ item["reactions_totalCount"] = item["reactions"].get("totalCount", 0)
116
+ item["reactions"] = item["reactions"]["nodes"]
117
+ comments = item["comments"]
118
+ item["comments_totalCount"] = item["comments"].get("totalCount", 0)
119
+ for comment in comments["nodes"]:
120
+ if "reactions" in comment:
121
+ comment["reactions_totalCount"] = comment["reactions"].get("totalCount", 0)
122
+ comment["reactions"] = comment["reactions"]["nodes"]
123
+ item["comments"] = comments["nodes"]
124
+ return item
125
+
126
+
127
+ def _run_graphql_query(
128
+ access_token: str, query: str, variables: DictStrAny
129
+ ) -> Tuple[StrAny, StrAny]:
130
+ def _request() -> requests.Response:
131
+ r = requests.post(
132
+ GRAPHQL_API_BASE_URL,
133
+ json={"query": query, "variables": variables},
134
+ headers=_get_auth_header(access_token),
135
+ )
136
+ return r
137
+
138
+ data = _request().json()
139
+ if "errors" in data:
140
+ raise ValueError(data)
141
+ data = data["data"]
142
+ # pop rate limits
143
+ rate_limit = data.pop("rateLimit", {"cost": 0, "remaining": 0})
144
+ return data, rate_limit
145
+
146
+
147
+ def _get_graphql_pages(
148
+ access_token: str, query: str, variables: DictStrAny, node_type: str, max_items: int
149
+ ) -> Iterator[List[DictStrAny]]:
150
+ items_count = 0
151
+ while True:
152
+ data, rate_limit = _run_graphql_query(access_token, query, variables)
153
+ top_connection = _extract_top_connection(data, node_type)
154
+ data_items = (
155
+ top_connection["nodes"]
156
+ if "nodes" in top_connection
157
+ else top_connection["edges"]
158
+ )
159
+ items_count += len(data_items)
160
+ print(
161
+ f'Got {len(data_items)}/{items_count} {node_type}s, query cost {rate_limit["cost"]}, remaining credits: {rate_limit["remaining"]}'
162
+ )
163
+ if data_items:
164
+ yield data_items
165
+ else:
166
+ return
167
+ # print(data["repository"][node_type]["pageInfo"]["endCursor"])
168
+ variables["page_after"] = _extract_top_connection(data, node_type)["pageInfo"][
169
+ "endCursor"
170
+ ]
171
+ if max_items and items_count >= max_items:
172
+ print(f"Max items limit reached: {items_count} >= {max_items}")
173
+ return
174
+
175
+
176
+ def _get_comment_reaction(comment_ids: List[str], access_token: str) -> StrAny:
177
+ """Builds a query from a list of comment nodes and returns associated reactions."""
178
+ idx = 0
179
+ data: DictStrAny = {}
180
+ for page_chunk in chunks(comment_ids, 50):
181
+ subs = []
182
+ for comment_id in page_chunk:
183
+ subs.append(COMMENT_REACTIONS_QUERY % (idx, comment_id))
184
+ idx += 1
185
+ subs.append(RATE_LIMIT)
186
+ query = "{" + ",\n".join(subs) + "}"
187
+ # print(query)
188
+ page, rate_limit = _run_graphql_query(access_token, query, {})
189
+ print(
190
+ f'Got {len(page)} comments, query cost {rate_limit["cost"]}, remaining credits: {rate_limit["remaining"]}'
191
+ )
192
+ data.update(page)
193
+ return data
@@ -0,0 +1,115 @@
1
+ RATE_LIMIT = """
2
+ rateLimit {
3
+ limit
4
+ cost
5
+ remaining
6
+ resetAt
7
+ }
8
+ """
9
+
10
+ ISSUES_QUERY = """
11
+ query($owner: String!, $name: String!, $issues_per_page: Int!, $first_reactions: Int!, $first_comments: Int!, $page_after: String) {
12
+ repository(owner: $owner, name: $name) {
13
+ %s(first: $issues_per_page, orderBy: {field: CREATED_AT, direction: DESC}, after: $page_after) {
14
+ totalCount
15
+ pageInfo {
16
+ endCursor
17
+ startCursor
18
+ }
19
+ nodes {
20
+ # id
21
+ number
22
+ url
23
+ title
24
+ body
25
+ author {login avatarUrl url}
26
+ authorAssociation
27
+ closed
28
+ closedAt
29
+ createdAt
30
+ state
31
+ updatedAt
32
+ reactions(first: $first_reactions) {
33
+ totalCount
34
+ nodes {
35
+ # id
36
+ user {login avatarUrl url}
37
+ content
38
+ createdAt
39
+ }
40
+ }
41
+ comments(first: $first_comments) {
42
+ totalCount
43
+ nodes {
44
+ id
45
+ url
46
+ body
47
+ author {avatarUrl login url}
48
+ authorAssociation
49
+ createdAt
50
+ reactionGroups {content createdAt}
51
+ # reactions(first: 0) {
52
+ # totalCount
53
+ # nodes {
54
+ # # id
55
+ # user {login avatarUrl url}
56
+ # content
57
+ # createdAt
58
+ # }
59
+ # }
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+ rateLimit {
66
+ limit
67
+ cost
68
+ remaining
69
+ resetAt
70
+ }
71
+ }
72
+ """
73
+
74
+ COMMENT_REACTIONS_QUERY = """
75
+ node_%s: node(id:"%s") {
76
+ ... on IssueComment {
77
+ id
78
+ reactions(first: 100) {
79
+ totalCount
80
+ nodes {
81
+ user {login avatarUrl url}
82
+ content
83
+ createdAt
84
+ }
85
+ }
86
+ }
87
+ }
88
+ """
89
+
90
+ STARGAZERS_QUERY = """
91
+ query($owner: String!, $name: String!, $items_per_page: Int!, $page_after: String) {
92
+ repository(owner: $owner, name: $name) {
93
+ stargazers(first: $items_per_page, orderBy: {field: STARRED_AT, direction: DESC}, after: $page_after) {
94
+ pageInfo {
95
+ endCursor
96
+ startCursor
97
+ }
98
+ edges {
99
+ starredAt
100
+ node {
101
+ login
102
+ avatarUrl
103
+ url
104
+ }
105
+ }
106
+ }
107
+ }
108
+ rateLimit {
109
+ limit
110
+ cost
111
+ remaining
112
+ resetAt
113
+ }
114
+ }
115
+ """
@@ -0,0 +1,10 @@
1
+ """Github source settings and constants."""
2
+
3
+ START_DATE = "1970-01-01T00:00:00Z"
4
+
5
+ # rest queries
6
+ REST_API_BASE_URL = "https://api.github.com"
7
+ REPO_EVENTS_PATH = "/repos/%s/%s/events"
8
+
9
+ # graphql queries
10
+ GRAPHQL_API_BASE_URL = "https://api.github.com/graphql"
@@ -0,0 +1,70 @@
1
+ """
2
+ Defines all the sources and resources needed for Google Analytics V4
3
+ """
4
+
5
+ from typing import List, Optional, Union
6
+
7
+ import dlt
8
+ from dlt.common.typing import DictStrAny
9
+ from dlt.sources import DltResource
10
+ from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
11
+ from google.analytics.data_v1beta import BetaAnalyticsDataClient
12
+
13
+ from .helpers import basic_report
14
+
15
+
16
+ @dlt.source(max_table_nesting=0)
17
+ def google_analytics(
18
+ datetime: str,
19
+ credentials: Union[
20
+ GcpOAuthCredentials, GcpServiceAccountCredentials
21
+ ] = dlt.secrets.value,
22
+ property_id: int = dlt.config.value,
23
+ queries: List[DictStrAny] = dlt.config.value,
24
+ start_date: Optional[str] = "2015-08-14",
25
+ rows_per_page: int = 10000,
26
+ ) -> List[DltResource]:
27
+ try:
28
+ property_id = int(property_id)
29
+ except ValueError:
30
+ raise ValueError(
31
+ f"{property_id} is an invalid google property id. Please use a numeric id, and not your Measurement ID like G-7F1AE12JLR"
32
+ )
33
+ if property_id == 0:
34
+ raise ValueError(
35
+ "Google Analytics property id is 0. Did you forget to configure it?"
36
+ )
37
+ if not rows_per_page:
38
+ raise ValueError("Rows per page cannot be 0")
39
+ # generate access token for credentials if we are using OAuth2.0
40
+ if isinstance(credentials, GcpOAuthCredentials):
41
+ credentials.auth("https://www.googleapis.com/auth/analytics.readonly")
42
+
43
+ # Build the service object for Google Analytics api.
44
+ client = BetaAnalyticsDataClient(credentials=credentials.to_native_credentials())
45
+ if len(queries) > 1:
46
+ raise ValueError(
47
+ "Google Analytics supports a single query ingestion at a time, please give only one query"
48
+ )
49
+ query = queries[0]
50
+
51
+ # always add "date" to dimensions so we are able to track the last day of a report
52
+ dimensions = query["dimensions"]
53
+ resource_name = query["resource_name"]
54
+
55
+ res = dlt.resource(
56
+ basic_report, name="basic_report", merge_key=datetime, write_disposition="merge"
57
+ )(
58
+ client=client,
59
+ rows_per_page=rows_per_page,
60
+ property_id=property_id,
61
+ dimensions=dimensions,
62
+ metrics=query["metrics"],
63
+ resource_name=resource_name,
64
+ start_date=start_date,
65
+ last_date=dlt.sources.incremental(
66
+ datetime
67
+ ), # pass empty primary key to avoid unique checks, a primary key defined by the resource will be used
68
+ )
69
+
70
+ return [res]
@@ -0,0 +1,70 @@
1
+ """Google analytics source helpers"""
2
+
3
+ from typing import Iterator, List
4
+
5
+ import dlt
6
+ from apiclient.discovery import Resource # type: ignore
7
+ from dlt.common import logger, pendulum
8
+ from dlt.common.typing import TDataItem
9
+ from google.analytics.data_v1beta.types import (
10
+ Dimension,
11
+ Metric,
12
+ )
13
+ from pendulum.datetime import DateTime
14
+
15
+ from .data_processing import get_report
16
+
17
+
18
+ def basic_report(
19
+ client: Resource,
20
+ rows_per_page: int,
21
+ dimensions: List[str],
22
+ metrics: List[str],
23
+ property_id: int,
24
+ resource_name: str,
25
+ start_date: str,
26
+ last_date: dlt.sources.incremental[DateTime],
27
+ ) -> Iterator[TDataItem]:
28
+ """
29
+ Retrieves the data for a report given dimensions, metrics, and filters required for the report.
30
+
31
+ Args:
32
+ client: The Google Analytics client used to make requests.
33
+ dimensions: Dimensions for the report. See metadata for the full list of dimensions.
34
+ metrics: Metrics for the report. See metadata for the full list of metrics.
35
+ property_id: A reference to the Google Analytics project.
36
+ More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
37
+ rows_per_page: Controls how many rows are retrieved per page in the reports.
38
+ Default is 10000, maximum possible is 100000.
39
+ resource_name: The resource name used to save incremental into dlt state.
40
+ start_date: Incremental load start_date.
41
+ Default is taken from dlt state if it exists.
42
+ last_date: Incremental load end date.
43
+ Default is taken from dlt state if it exists.
44
+
45
+ Returns:
46
+ Generator of all rows of data in the report.
47
+ """
48
+
49
+ # grab the start time from last dlt load if not filled, if that is also empty then use the first day of the millennium as the start time instead
50
+ if last_date.last_value:
51
+ if start_date != "2015-08-14":
52
+ logger.warning(
53
+ f"Using the starting date: {last_date.last_value} for incremental report: {resource_name} and ignoring start date passed as argument {start_date}"
54
+ )
55
+ start_date = last_date.last_value.to_date_string()
56
+ else:
57
+ start_date = start_date or "2015-08-14"
58
+
59
+ processed_response = get_report(
60
+ client=client,
61
+ property_id=property_id,
62
+ # fill dimensions and metrics with the proper api client objects
63
+ dimension_list=[Dimension(name=dimension) for dimension in dimensions],
64
+ metric_list=[Metric(name=metric) for metric in metrics],
65
+ limit=rows_per_page,
66
+ start_date=start_date,
67
+ # configure end_date to yesterday as a date string
68
+ end_date=pendulum.now().to_date_string(),
69
+ )
70
+ yield from processed_response
@@ -0,0 +1,176 @@
1
+ """
2
+ This module contains helpers that process data and make it ready for loading into the database
3
+ """
4
+
5
+ import json
6
+ from typing import Any, Iterator, List, Union
7
+
8
+ import proto
9
+ from dlt.common.exceptions import MissingDependencyException
10
+ from dlt.common.pendulum import pendulum
11
+ from dlt.common.typing import DictStrAny, TDataItem, TDataItems
12
+
13
+ try:
14
+ from google.analytics.data_v1beta import BetaAnalyticsDataClient # noqa: F401
15
+ from google.analytics.data_v1beta.types import (
16
+ DateRange,
17
+ Dimension,
18
+ DimensionExpression, # noqa: F401
19
+ DimensionMetadata, # noqa: F401
20
+ GetMetadataRequest, # noqa: F401
21
+ Metadata, # noqa: F401
22
+ Metric,
23
+ MetricMetadata, # noqa: F401
24
+ MetricType,
25
+ RunReportRequest,
26
+ RunReportResponse,
27
+ )
28
+ except ImportError:
29
+ raise MissingDependencyException(
30
+ "Google Analytics API Client", ["google-analytics-data"]
31
+ )
32
+ try:
33
+ from apiclient.discovery import Resource, build # type: ignore # noqa: F401
34
+ except ImportError:
35
+ raise MissingDependencyException("Google API Client", ["google-api-python-client"])
36
+
37
+
38
+ def to_dict(item: Any) -> Iterator[TDataItem]:
39
+ """
40
+ Processes a batch result (page of results per dimension) accordingly
41
+ :param batch:
42
+ :return:
43
+ """
44
+ item = json.loads(
45
+ proto.Message.to_json(
46
+ item,
47
+ preserving_proto_field_name=True,
48
+ use_integers_for_enums=False,
49
+ including_default_value_fields=False,
50
+ )
51
+ )
52
+ yield item
53
+
54
+
55
+ def get_report(
56
+ client: Resource,
57
+ property_id: int,
58
+ dimension_list: List[Dimension],
59
+ metric_list: List[Metric],
60
+ limit: int,
61
+ start_date: str,
62
+ end_date: str,
63
+ ) -> Iterator[TDataItem]:
64
+ """
65
+ Gets all the possible pages of reports with the given query parameters.
66
+ Processes every page and yields a dictionary for every row of the report.
67
+
68
+ Args:
69
+ client: The Google Analytics client used to make requests.
70
+ property_id: A reference to the Google Analytics project.
71
+ More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
72
+ dimension_list: A list of all the dimensions requested in the query.
73
+ metric_list: A list of all the metrics requested in the query.
74
+ limit: Describes how many rows there should be per page.
75
+ start_date: The starting date of the query.
76
+ end_date: The ending date of the query.
77
+
78
+ Yields:
79
+ Generator of all rows of data in the report.
80
+ """
81
+
82
+ request = RunReportRequest(
83
+ property=f"properties/{property_id}",
84
+ dimensions=dimension_list,
85
+ metrics=metric_list,
86
+ limit=limit,
87
+ date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
88
+ )
89
+ # process request
90
+ response = client.run_report(request)
91
+ processed_response_generator = process_report(response=response)
92
+ yield from processed_response_generator
93
+
94
+
95
+ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
96
+ """
97
+ Receives a single page for a report response, processes it, and returns a generator for every row of data in the report page.
98
+
99
+ Args:
100
+ response: The API response for a single page of the report.
101
+
102
+ Yields:
103
+ Generator of dictionaries for every row of the report page.
104
+ """
105
+
106
+ metrics_headers = [header.name for header in response.metric_headers]
107
+ dimensions_headers = [header.name for header in response.dimension_headers]
108
+
109
+ distinct_key_combinations = {}
110
+
111
+ for row in response.rows:
112
+ response_dict: DictStrAny = {
113
+ dimension_header: _resolve_dimension_value(
114
+ dimension_header, dimension_value.value
115
+ )
116
+ for dimension_header, dimension_value in zip(
117
+ dimensions_headers, row.dimension_values
118
+ )
119
+ }
120
+
121
+ for i in range(len(metrics_headers)):
122
+ # get metric type and process the value depending on type. Save metric name including type as well for the columns
123
+ metric_type = response.metric_headers[i].type_
124
+ metric_value = process_metric_value(
125
+ metric_type=metric_type, value=row.metric_values[i].value
126
+ )
127
+ response_dict[metrics_headers[i]] = metric_value
128
+
129
+ unique_key = "-".join(list(response_dict.keys()))
130
+ if unique_key not in distinct_key_combinations:
131
+ distinct_key_combinations[unique_key] = True
132
+
133
+ yield response_dict
134
+
135
+
136
+ def process_metric_value(metric_type: MetricType, value: str) -> Union[str, int, float]:
137
+ """
138
+ Processes the metric type, converts it from string to the correct type, and returns it.
139
+
140
+ Args:
141
+ metric_type: The type of the metric.
142
+ value: The value of the metric as a string.
143
+
144
+ Returns:
145
+ The given value converted to the correct data type.
146
+ """
147
+
148
+ # So far according to GA4 documentation these are the correct types: https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/MetricType
149
+ # 0 for strings, 1 for ints and 2-12 are different types of floating points.
150
+ if metric_type.value == 0:
151
+ return value
152
+ elif metric_type.value == 1:
153
+ return int(value)
154
+ else:
155
+ return float(value)
156
+
157
+
158
+ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
159
+ """
160
+ Helper function that receives a dimension's name and value and converts it to a datetime object if needed.
161
+
162
+ Args:
163
+ dimension_name: Name of the dimension.
164
+ dimension_value: Value of the dimension.
165
+
166
+ Returns:
167
+ The value of the dimension with the correct data type.
168
+ """
169
+ if dimension_name == "date":
170
+ return pendulum.from_format(dimension_value, "YYYYMMDD", tz="UTC")
171
+ elif dimension_name == "dateHour":
172
+ return pendulum.from_format(dimension_value, "YYYYMMDDHH", tz="UTC")
173
+ elif dimension_name == "dateHourMinute":
174
+ return pendulum.from_format(dimension_value, "YYYYMMDDHHmm", tz="UTC")
175
+ else:
176
+ return dimension_value
ingestr/src/sources.py CHANGED
@@ -29,7 +29,9 @@ from dlt.common.libs.sql_alchemy import (
29
29
  from dlt.common.time import ensure_pendulum_datetime
30
30
  from dlt.common.typing import TDataItem, TSecretStrValue
31
31
  from dlt.extract import Incremental
32
- from dlt.sources.credentials import ConnectionStringCredentials
32
+ from dlt.sources.credentials import (
33
+ ConnectionStringCredentials,
34
+ )
33
35
  from dlt.sources.sql_database import sql_table
34
36
  from dlt.sources.sql_database.helpers import TableLoader
35
37
  from dlt.sources.sql_database.schema_types import (
@@ -53,6 +55,8 @@ from ingestr.src.dynamodb import dynamodb
53
55
  from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
54
56
  from ingestr.src.filesystem import readers
55
57
  from ingestr.src.filters import table_adapter_exclude_columns
58
+ from ingestr.src.github import github_reactions, github_repo_events, github_stargazers
59
+ from ingestr.src.google_analytics import google_analytics
56
60
  from ingestr.src.google_sheets import google_spreadsheet
57
61
  from ingestr.src.gorgias import gorgias_source
58
62
  from ingestr.src.hubspot import hubspot
@@ -95,10 +99,8 @@ class SqlSource:
95
99
  if kwargs.get("incremental_key"):
96
100
  start_value = kwargs.get("interval_start")
97
101
  end_value = kwargs.get("interval_end")
98
-
99
102
  incremental = dlt.sources.incremental(
100
103
  kwargs.get("incremental_key", ""),
101
- # primary_key=(),
102
104
  initial_value=start_value,
103
105
  end_value=end_value,
104
106
  )
@@ -158,6 +160,7 @@ class SqlSource:
158
160
  switchDict = {
159
161
  int: sa.INTEGER,
160
162
  datetime: sa.TIMESTAMP,
163
+ date: sa.DATE,
161
164
  pendulum.Date: sa.DATE,
162
165
  pendulum.DateTime: sa.TIMESTAMP,
163
166
  }
@@ -1338,3 +1341,103 @@ class DynamoDBSource:
1338
1341
  )
1339
1342
 
1340
1343
  return dynamodb(table, creds, incremental)
1344
+
1345
+
1346
+ class GoogleAnalyticsSource:
1347
+ def handles_incrementality(self) -> bool:
1348
+ return True
1349
+
1350
+ def dlt_source(self, uri: str, table: str, **kwargs):
1351
+ parse_uri = urlparse(uri)
1352
+ source_fields = parse_qs(parse_uri.query)
1353
+ cred_path = source_fields.get("credentials_path")
1354
+
1355
+ if not cred_path:
1356
+ raise ValueError("credentials_path is required to connect Google Analytics")
1357
+ credentials = {}
1358
+
1359
+ with open(cred_path[0], "r") as f:
1360
+ credentials = json.load(f)
1361
+
1362
+ property_id = source_fields.get("property_id")
1363
+ if not property_id:
1364
+ raise ValueError("property_id is required to connect to Google Analytics")
1365
+
1366
+ interval_start = kwargs.get("interval_start")
1367
+ start_date = (
1368
+ interval_start.strftime("%Y-%m-%d") if interval_start else "2015-08-14"
1369
+ )
1370
+
1371
+ fields = table.split(":")
1372
+ if len(fields) != 3:
1373
+ raise ValueError(
1374
+ "Invalid table format. Expected format: custom:<dimensions>:<metrics>"
1375
+ )
1376
+
1377
+ dimensions = fields[1].replace(" ", "").split(",")
1378
+
1379
+ datetime = ""
1380
+ for dimension_datetime in ["date", "dateHourMinute", "dateHour"]:
1381
+ if dimension_datetime in dimensions:
1382
+ datetime = dimension_datetime
1383
+ break
1384
+ else:
1385
+ raise ValueError(
1386
+ "You must provide at least one dimension: [dateHour, dateHourMinute, date]"
1387
+ )
1388
+
1389
+ metrics = fields[2].replace(" ", "").split(",")
1390
+ queries = [
1391
+ {"resource_name": "custom", "dimensions": dimensions, "metrics": metrics}
1392
+ ]
1393
+
1394
+ return google_analytics(
1395
+ property_id=property_id[0],
1396
+ start_date=start_date,
1397
+ datetime=datetime,
1398
+ queries=queries,
1399
+ credentials=credentials,
1400
+ ).with_resources("basic_report")
1401
+
1402
+
1403
+ class GitHubSource:
1404
+ def handles_incrementality(self) -> bool:
1405
+ return True
1406
+
1407
+ def dlt_source(self, uri: str, table: str, **kwargs):
1408
+ if kwargs.get("incremental_key"):
1409
+ raise ValueError(
1410
+ "Github takes care of incrementality on its own, you should not provide incremental_key"
1411
+ )
1412
+ # github://?access_token=<access_token>&owner=<owner>&repo=<repo>
1413
+ parsed_uri = urlparse(uri)
1414
+ source_fields = parse_qs(parsed_uri.query)
1415
+
1416
+ owner = source_fields.get("owner", [None])[0]
1417
+ if not owner:
1418
+ raise ValueError(
1419
+ "owner of the repository is required to connect with GitHub"
1420
+ )
1421
+
1422
+ repo = source_fields.get("repo", [None])[0]
1423
+ if not repo:
1424
+ raise ValueError(
1425
+ "repo variable is required to retrieve data for a specific repository from GitHub."
1426
+ )
1427
+
1428
+ access_token = source_fields.get("access_token", [None])[0]
1429
+ if not access_token and table not in ["repo_events"]:
1430
+ raise ValueError("access_token is required to connect with GitHub")
1431
+
1432
+ if table in ["issues", "pull_requests"]:
1433
+ return github_reactions(
1434
+ owner=owner, name=repo, access_token=access_token
1435
+ ).with_resources(table)
1436
+ elif table == "repo_events":
1437
+ return github_repo_events(owner=owner, name=repo, access_token=access_token)
1438
+ elif table == "stargazers":
1439
+ return github_stargazers(owner=owner, name=repo, access_token=access_token)
1440
+ else:
1441
+ raise ValueError(
1442
+ f"Resource '{table}' is not supported for GitHub source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1443
+ )
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.3"
1
+ __version__ = "0.12.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.3
3
+ Version: 0.12.4
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -21,6 +21,7 @@ Requires-Dist: dlt==1.4.0
21
21
  Requires-Dist: duckdb-engine==0.13.5
22
22
  Requires-Dist: duckdb==1.1.3
23
23
  Requires-Dist: facebook-business==20.0.0
24
+ Requires-Dist: google-analytics-data==0.18.15
24
25
  Requires-Dist: google-api-python-client==2.130.0
25
26
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
26
27
  Requires-Dist: mysql-connector-python==9.1.0
@@ -1,12 +1,12 @@
1
- ingestr/main.py,sha256=wkU2uLMy1q8YarJ9mXNfJepeRjp6AuPDeNDOmMUt6n0,22309
1
+ ingestr/main.py,sha256=AG6ycOEpCyBN1qEOzW3j8sKK8KX0mrBAL-A25MdRldY,24712
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
- ingestr/src/factory.py,sha256=UyE1TzTHn_V8JZno5SSYfQsho1eFYzzvOylogw4S49E,4389
4
+ ingestr/src/factory.py,sha256=aE7TjHzONb4DKYcfh_6-CJJfvs4lmw7iUySvSm4yQbM,4516
5
5
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
6
- ingestr/src/sources.py,sha256=Jy1N5EfbxfTae0L7PiZmPVxVYWLvOuLlw3kJ6vbT50M,48027
6
+ ingestr/src/sources.py,sha256=zkK24y3jyucbrW2MU3i0Rx1SImZWatM9_A_8Wa7ExCM,51887
7
7
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
8
8
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
9
- ingestr/src/version.py,sha256=Qu8-91hLcRe7wfW37PwNdivTonHHKLrqtJPOAq3Jvhc,23
9
+ ingestr/src/version.py,sha256=DoMS9KOhsApLyuLYhLEsd5nmoLFQ_IvVkEs_jKRzFk8,23
10
10
  ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
11
11
  ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
12
12
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
@@ -27,6 +27,13 @@ ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_
27
27
  ingestr/src/filesystem/__init__.py,sha256=wHHaKFuAjsR_ZRjl6g_Flf6FhVs9qhwREthTr03_7cc,4162
28
28
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
29
29
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
30
+ ingestr/src/github/__init__.py,sha256=csA2VcjOxXrVrvp7zY-JodO9Lpy98bJ4AqRdHCLTcGM,5838
31
+ ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
32
+ ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
33
+ ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
34
+ ingestr/src/google_analytics/__init__.py,sha256=HjA13wfJm2MGfy3h_DiM5ekkNqM2dgwYCKJ3pprnDtI,2482
35
+ ingestr/src/google_analytics/helpers/__init__.py,sha256=y_q7dinlEwNBEpq6kCzjTa8lAhe2bb23bDPP0fcy7fY,2744
36
+ ingestr/src/google_analytics/helpers/data_processing.py,sha256=fIdEKr9CmZN_s1T2i9BL8IYTPPqNoK6Vaquq2y8StfE,6072
30
37
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
31
38
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
32
39
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
@@ -77,8 +84,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
77
84
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
78
85
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
79
86
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
80
- ingestr-0.12.3.dist-info/METADATA,sha256=Kh5v7a3mzxqmekWFp8ebU-uHKwzHRrN0HkNHPRV3_5U,7910
81
- ingestr-0.12.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
82
- ingestr-0.12.3.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
83
- ingestr-0.12.3.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
84
- ingestr-0.12.3.dist-info/RECORD,,
87
+ ingestr-0.12.4.dist-info/METADATA,sha256=VN9cqnH_rmALlSxePi6XOxOxndDGLYWTW0K6eafYVDw,7956
88
+ ingestr-0.12.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
89
+ ingestr-0.12.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
90
+ ingestr-0.12.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
91
+ ingestr-0.12.4.dist-info/RECORD,,