ingestify 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ingestify/__init__.py CHANGED
@@ -8,4 +8,4 @@ if not __INGESTIFY_SETUP__:
8
8
  from .infra import retrieve_http
9
9
  from .source_base import Source, DatasetResource
10
10
 
11
- __version__ = "0.1.0"
11
+ __version__ = "0.1.2"
@@ -84,9 +84,15 @@ def load_file(
84
84
  file_data_serialization_format=file_resource.data_serialization_format
85
85
  or "txt",
86
86
  **http_options,
87
+ **file_resource.loader_kwargs,
87
88
  )
88
89
  else:
89
- return file_resource.file_loader(file_resource, current_file)
90
+ return file_resource.file_loader(
91
+ file_resource,
92
+ current_file,
93
+ # TODO: check how to fix this with typehints
94
+ **file_resource.loader_kwargs,
95
+ )
90
96
 
91
97
 
92
98
  class UpdateDatasetTask(Task):
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from datetime import datetime
3
3
  from typing import Optional, Callable, TYPE_CHECKING
4
4
 
@@ -29,6 +29,7 @@ class FileResource:
29
29
  file_loader: Optional[
30
30
  Callable[["FileResource", Optional["File"]], Optional["DraftFile"]]
31
31
  ] = None
32
+ loader_kwargs: dict = field(default_factory=dict)
32
33
 
33
34
  def __post_init__(self):
34
35
  if self.json_content is None and not self.url and not self.file_loader:
@@ -75,6 +76,7 @@ class DatasetResource:
75
76
  Optional["DraftFile"],
76
77
  ]
77
78
  ] = None,
79
+ loader_kwargs: Optional[dict] = None,
78
80
  ):
79
81
  file_id = f"{data_feed_key}__{data_spec_version}"
80
82
  if file_id in self.files:
@@ -91,6 +93,7 @@ class DatasetResource:
91
93
  http_options=http_options,
92
94
  data_serialization_format=data_serialization_format,
93
95
  file_loader=file_loader,
96
+ loader_kwargs=loader_kwargs or {},
94
97
  )
95
98
 
96
99
  self.files[file_id] = file_resource
@@ -6,10 +6,38 @@ from io import BytesIO
6
6
  from typing import Optional, Callable, Tuple
7
7
 
8
8
  import requests
9
+ from requests.adapters import HTTPAdapter
10
+ from urllib3 import Retry
9
11
 
10
12
  from ingestify.domain.models import DraftFile, File
11
13
  from ingestify.utils import utcnow
12
14
 
15
+ _session = None
16
+
17
+
18
+ def get_session():
19
+ """Initialize the session when it's needed. This will make sure it's initialized
20
+ within the correct context, and we don't get issues when the session is created
21
+ in process #1 and used in process #2
22
+ """
23
+ global _session
24
+
25
+ if not _session:
26
+ retry_strategy = Retry(
27
+ total=4, # Maximum number of retries
28
+ backoff_factor=2, # Exponential backoff factor (e.g., 2 means 1, 2, 4, 8 seconds, ...)
29
+ status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
30
+ )
31
+
32
+ adapter = HTTPAdapter(max_retries=retry_strategy)
33
+
34
+ # Create a new session object
35
+ _session = requests.Session()
36
+ _session.mount("http://", adapter)
37
+ _session.mount("https://", adapter)
38
+
39
+ return _session
40
+
13
41
 
14
42
  def retrieve_http(
15
43
  url,
@@ -41,7 +69,7 @@ def retrieve_http(
41
69
  else:
42
70
  raise Exception(f"Don't know how to use {key}")
43
71
 
44
- response = requests.get(url, headers=headers, **http_kwargs)
72
+ response = get_session().get(url, headers=headers, **http_kwargs)
45
73
  response.raise_for_status()
46
74
  if response.status_code == 304:
47
75
  # Not modified
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Standardizing soccer tracking- and event data
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
@@ -1,4 +1,4 @@
1
- ingestify/__init__.py,sha256=DdhKleT3RggJUMj5Auq_ImGLmKm-3HHs5Yerx_VsH_w,301
1
+ ingestify/__init__.py,sha256=Jq8NUvWIQpVwqUiHza92T9gPyuEZtcg7sKNJkyG6oCE,301
2
2
  ingestify/cmdline.py,sha256=gLy79Cq3OnEyoEcI6koWIEbCwvgFZ1E8n3UU1sKS8FM,7143
3
3
  ingestify/exceptions.py,sha256=wMMuajl4AkQRfW60TLN7btJmQaH8-lUczXyW_2g9kOU,143
4
4
  ingestify/main.py,sha256=YjrAOiGzwurtoDyIf981DSJHHA6IT5q09k3QNzTKCC8,6814
@@ -8,7 +8,7 @@ ingestify/utils.py,sha256=eEHwulqNEb2YTRDrCMVxr6mWZYI6KOcNCAIWFTi74u0,8029
8
8
  ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  ingestify/application/dataset_store.py,sha256=NAW-XSvp118Lr2hXZd3qtuQr6VkPdWCLksIwd5MSs30,11489
10
10
  ingestify/application/ingestion_engine.py,sha256=GYIhb8a9ePkEcNOBPdfu-YawiD7eRZMRlxCA-6g9DRA,2249
11
- ingestify/application/loader.py,sha256=d7iXmdHN_yhDkEc2MMZ_6BLMEdRz9ChpBMy4yCWvxQo,13317
11
+ ingestify/application/loader.py,sha256=DSdSNFf7WynGsMCoK3iQGiMKkO76fZ_KIOBDEMZK3zU,13495
12
12
  ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
13
13
  ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
14
14
  ingestify/domain/models/__init__.py,sha256=xHVQZP57ZQYUKwAtbccnDKX89_yTOvBKAtn4XDVbEbY,930
@@ -37,7 +37,7 @@ ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmce
37
37
  ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
38
38
  ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
39
39
  ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
40
- ingestify/domain/models/resources/dataset_resource.py,sha256=g0tu9QZQEdAGR-dRXQPL3ddcbEEGI__pvkDJGoscUTE,3027
40
+ ingestify/domain/models/resources/dataset_resource.py,sha256=HH5wMqzoWvcL84GzNa7QL3YsciI757FG4iZu9DbXn_k,3181
41
41
  ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
42
42
  ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
43
43
  ingestify/domain/models/task/task.py,sha256=R6tEZub-N_Wjl4VjwlPySdFb3L9D7nH4St2CcDzFoKA,107
@@ -46,7 +46,7 @@ ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
46
46
  ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-_hWZJTMcduS1Gg7EM4X95Cqxi1QIM,809
47
47
  ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
48
48
  ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- ingestify/infra/fetch/http.py,sha256=gm7x0dACp3sTY1FMlbv8zRoQLZuZgtXmBg3HbhQ0syI,3086
49
+ ingestify/infra/fetch/http.py,sha256=4CcEkwtNzYkPspNIaQIfcthA5yLow0x_M9xpEsoucWw,3982
50
50
  ingestify/infra/serialization/__init__.py,sha256=LwfmRoO4qykZkJZXxVPSKpwoVIkg9qzXa7Egut9JjL4,1772
51
51
  ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
@@ -72,8 +72,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
72
72
  ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
73
73
  ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
74
74
  ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
75
- ingestify-0.1.0.dist-info/METADATA,sha256=ryin_4RwMcyvqa4l6nESqysjlutcffHHWwoBvcwU784,18822
76
- ingestify-0.1.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
77
- ingestify-0.1.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
78
- ingestify-0.1.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
79
- ingestify-0.1.0.dist-info/RECORD,,
75
+ ingestify-0.1.2.dist-info/METADATA,sha256=66WW34koisv72Sqvwn9gpYI0Le1wuwEW_nZorjYc0MY,18822
76
+ ingestify-0.1.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
77
+ ingestify-0.1.2.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
78
+ ingestify-0.1.2.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
79
+ ingestify-0.1.2.dist-info/RECORD,,