ingestify 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ingestify-0.1.0 → ingestify-0.1.2}/PKG-INFO +1 -1
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/__init__.py +1 -1
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/application/loader.py +7 -1
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/resources/dataset_resource.py +4 -1
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/fetch/http.py +29 -1
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify.egg-info/PKG-INFO +1 -1
- {ingestify-0.1.0 → ingestify-0.1.2}/README.md +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/application/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/application/dataset_store.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/application/ingestion_engine.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/application/secrets_manager.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/cmdline.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/data_spec_version_collection.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/collection.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/collection_metadata.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/dataset.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/dataset_repository.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/events.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/file.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/file_collection.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/file_repository.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/identifier.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/revision.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/dataset/selector.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/_old_event.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/dispatcher.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/domain_event.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/event_bus.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/publisher.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/event/subscriber.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/extract_job.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/fetch_policy.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/resources/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/sink.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/source.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/task/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/task/set.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/models/task/task.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/services/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/services/transformers/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/services/transformers/kloppy_to_pandas.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/exceptions.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/fetch/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/serialization/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/sink/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/sink/postgresql.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/source/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/source/statsbomb_github.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/source/wyscout.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/dataset/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/dataset/local_dataset_repository.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/dataset/sqlalchemy/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/dataset/sqlalchemy/mapping.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/dataset/sqlalchemy/repository.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/file/__init__.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/file/local_file_repository.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/file/s3_file_repository.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/main.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/server.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/source_base.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/statsbomb_github/README.md +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/statsbomb_github/config.yaml.jinja2 +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/statsbomb_github/database/README.md +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/statsbomb_github/query.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/wyscout/.env +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/wyscout/.gitignore +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/wyscout/README.md +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/wyscout/config.yaml.jinja2 +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/wyscout/database/README.md +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/wyscout/query.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify/utils.py +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify.egg-info/SOURCES.txt +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify.egg-info/dependency_links.txt +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify.egg-info/entry_points.txt +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify.egg-info/requires.txt +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/ingestify.egg-info/top_level.txt +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/setup.cfg +0 -0
- {ingestify-0.1.0 → ingestify-0.1.2}/setup.py +0 -0
|
@@ -84,9 +84,15 @@ def load_file(
|
|
|
84
84
|
file_data_serialization_format=file_resource.data_serialization_format
|
|
85
85
|
or "txt",
|
|
86
86
|
**http_options,
|
|
87
|
+
**file_resource.loader_kwargs,
|
|
87
88
|
)
|
|
88
89
|
else:
|
|
89
|
-
return file_resource.file_loader(
|
|
90
|
+
return file_resource.file_loader(
|
|
91
|
+
file_resource,
|
|
92
|
+
current_file,
|
|
93
|
+
# TODO: check how to fix this with typehints
|
|
94
|
+
**file_resource.loader_kwargs,
|
|
95
|
+
)
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
class UpdateDatasetTask(Task):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from typing import Optional, Callable, TYPE_CHECKING
|
|
4
4
|
|
|
@@ -29,6 +29,7 @@ class FileResource:
|
|
|
29
29
|
file_loader: Optional[
|
|
30
30
|
Callable[["FileResource", Optional["File"]], Optional["DraftFile"]]
|
|
31
31
|
] = None
|
|
32
|
+
loader_kwargs: dict = field(default_factory=dict)
|
|
32
33
|
|
|
33
34
|
def __post_init__(self):
|
|
34
35
|
if self.json_content is None and not self.url and not self.file_loader:
|
|
@@ -75,6 +76,7 @@ class DatasetResource:
|
|
|
75
76
|
Optional["DraftFile"],
|
|
76
77
|
]
|
|
77
78
|
] = None,
|
|
79
|
+
loader_kwargs: Optional[dict] = None,
|
|
78
80
|
):
|
|
79
81
|
file_id = f"{data_feed_key}__{data_spec_version}"
|
|
80
82
|
if file_id in self.files:
|
|
@@ -91,6 +93,7 @@ class DatasetResource:
|
|
|
91
93
|
http_options=http_options,
|
|
92
94
|
data_serialization_format=data_serialization_format,
|
|
93
95
|
file_loader=file_loader,
|
|
96
|
+
loader_kwargs=loader_kwargs or {},
|
|
94
97
|
)
|
|
95
98
|
|
|
96
99
|
self.files[file_id] = file_resource
|
|
@@ -6,10 +6,38 @@ from io import BytesIO
|
|
|
6
6
|
from typing import Optional, Callable, Tuple
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
|
+
from requests.adapters import HTTPAdapter
|
|
10
|
+
from urllib3 import Retry
|
|
9
11
|
|
|
10
12
|
from ingestify.domain.models import DraftFile, File
|
|
11
13
|
from ingestify.utils import utcnow
|
|
12
14
|
|
|
15
|
+
_session = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_session():
|
|
19
|
+
"""Initialize the session when it's needed. This will make sure it's initialized
|
|
20
|
+
within the correct context, and we don't get issues when the session is created
|
|
21
|
+
in process #1 and used in process #2
|
|
22
|
+
"""
|
|
23
|
+
global _session
|
|
24
|
+
|
|
25
|
+
if not _session:
|
|
26
|
+
retry_strategy = Retry(
|
|
27
|
+
total=4, # Maximum number of retries
|
|
28
|
+
backoff_factor=2, # Exponential backoff factor (e.g., 2 means 1, 2, 4, 8 seconds, ...)
|
|
29
|
+
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
33
|
+
|
|
34
|
+
# Create a new session object
|
|
35
|
+
_session = requests.Session()
|
|
36
|
+
_session.mount("http://", adapter)
|
|
37
|
+
_session.mount("https://", adapter)
|
|
38
|
+
|
|
39
|
+
return _session
|
|
40
|
+
|
|
13
41
|
|
|
14
42
|
def retrieve_http(
|
|
15
43
|
url,
|
|
@@ -41,7 +69,7 @@ def retrieve_http(
|
|
|
41
69
|
else:
|
|
42
70
|
raise Exception(f"Don't know how to use {key}")
|
|
43
71
|
|
|
44
|
-
response =
|
|
72
|
+
response = get_session().get(url, headers=headers, **http_kwargs)
|
|
45
73
|
response.raise_for_status()
|
|
46
74
|
if response.status_code == 304:
|
|
47
75
|
# Not modified
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ingestify-0.1.0 → ingestify-0.1.2}/ingestify/domain/services/transformers/kloppy_to_pandas.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ingestify-0.1.0 → ingestify-0.1.2}/ingestify/infra/store/dataset/local_dataset_repository.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/statsbomb_github/config.yaml.jinja2
RENAMED
|
File without changes
|
{ingestify-0.1.0 → ingestify-0.1.2}/ingestify/static/templates/statsbomb_github/database/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|