port-ocean 0.5.5__py3-none-any.whl → 0.17.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of port-ocean might be problematic. Click here for more details.
- integrations/_infra/Dockerfile.Deb +56 -0
- integrations/_infra/Dockerfile.alpine +108 -0
- integrations/_infra/Dockerfile.base.builder +26 -0
- integrations/_infra/Dockerfile.base.runner +13 -0
- integrations/_infra/Dockerfile.dockerignore +94 -0
- {port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}} → integrations/_infra}/Makefile +21 -8
- integrations/_infra/grpcio.sh +18 -0
- integrations/_infra/init.sh +5 -0
- port_ocean/bootstrap.py +1 -1
- port_ocean/cli/commands/defaults/clean.py +3 -1
- port_ocean/cli/commands/new.py +42 -7
- port_ocean/cli/commands/sail.py +7 -1
- port_ocean/cli/cookiecutter/cookiecutter.json +3 -0
- port_ocean/cli/cookiecutter/hooks/post_gen_project.py +20 -3
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/.env.example +6 -0
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/.port/resources/blueprints.json +41 -0
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/.port/resources/port-app-config.yml +16 -0
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/.port/spec.yaml +6 -7
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/CHANGELOG.md +1 -1
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/CONTRIBUTING.md +7 -0
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/changelog/.gitignore +1 -0
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/main.py +16 -1
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/pyproject.toml +21 -10
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/tests/test_sample.py +2 -0
- port_ocean/clients/port/authentication.py +16 -4
- port_ocean/clients/port/client.py +17 -0
- port_ocean/clients/port/mixins/blueprints.py +7 -8
- port_ocean/clients/port/mixins/entities.py +108 -53
- port_ocean/clients/port/mixins/integrations.py +23 -34
- port_ocean/clients/port/retry_transport.py +0 -5
- port_ocean/clients/port/utils.py +9 -3
- port_ocean/config/base.py +16 -16
- port_ocean/config/dynamic.py +2 -0
- port_ocean/config/settings.py +79 -11
- port_ocean/context/event.py +18 -5
- port_ocean/context/ocean.py +14 -3
- port_ocean/core/defaults/clean.py +10 -3
- port_ocean/core/defaults/common.py +25 -9
- port_ocean/core/defaults/initialize.py +111 -100
- port_ocean/core/event_listener/__init__.py +8 -0
- port_ocean/core/event_listener/base.py +49 -10
- port_ocean/core/event_listener/factory.py +9 -1
- port_ocean/core/event_listener/http.py +11 -3
- port_ocean/core/event_listener/kafka.py +24 -5
- port_ocean/core/event_listener/once.py +96 -4
- port_ocean/core/event_listener/polling.py +16 -14
- port_ocean/core/event_listener/webhooks_only.py +41 -0
- port_ocean/core/handlers/__init__.py +1 -2
- port_ocean/core/handlers/entities_state_applier/base.py +4 -1
- port_ocean/core/handlers/entities_state_applier/port/applier.py +29 -87
- port_ocean/core/handlers/entities_state_applier/port/order_by_entities_dependencies.py +5 -2
- port_ocean/core/handlers/entity_processor/base.py +26 -22
- port_ocean/core/handlers/entity_processor/jq_entity_processor.py +253 -45
- port_ocean/core/handlers/port_app_config/base.py +55 -15
- port_ocean/core/handlers/port_app_config/models.py +24 -5
- port_ocean/core/handlers/resync_state_updater/__init__.py +5 -0
- port_ocean/core/handlers/resync_state_updater/updater.py +84 -0
- port_ocean/core/integrations/base.py +5 -7
- port_ocean/core/integrations/mixins/events.py +3 -1
- port_ocean/core/integrations/mixins/sync.py +4 -2
- port_ocean/core/integrations/mixins/sync_raw.py +209 -74
- port_ocean/core/integrations/mixins/utils.py +1 -1
- port_ocean/core/models.py +44 -0
- port_ocean/core/ocean_types.py +29 -11
- port_ocean/core/utils/entity_topological_sorter.py +90 -0
- port_ocean/core/utils/utils.py +109 -0
- port_ocean/debug_cli.py +5 -0
- port_ocean/exceptions/core.py +4 -0
- port_ocean/exceptions/port_defaults.py +0 -2
- port_ocean/helpers/retry.py +85 -24
- port_ocean/log/handlers.py +23 -2
- port_ocean/log/logger_setup.py +8 -1
- port_ocean/log/sensetive.py +25 -10
- port_ocean/middlewares.py +10 -2
- port_ocean/ocean.py +57 -24
- port_ocean/run.py +10 -5
- port_ocean/tests/__init__.py +0 -0
- port_ocean/tests/clients/port/mixins/test_entities.py +53 -0
- port_ocean/tests/conftest.py +4 -0
- port_ocean/tests/core/defaults/test_common.py +166 -0
- port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +350 -0
- port_ocean/tests/core/handlers/mixins/test_sync_raw.py +552 -0
- port_ocean/tests/core/test_utils.py +73 -0
- port_ocean/tests/core/utils/test_entity_topological_sorter.py +99 -0
- port_ocean/tests/helpers/__init__.py +0 -0
- port_ocean/tests/helpers/fake_port_api.py +191 -0
- port_ocean/tests/helpers/fixtures.py +46 -0
- port_ocean/tests/helpers/integration.py +31 -0
- port_ocean/tests/helpers/ocean_app.py +66 -0
- port_ocean/tests/helpers/port_client.py +21 -0
- port_ocean/tests/helpers/smoke_test.py +82 -0
- port_ocean/tests/log/test_handlers.py +71 -0
- port_ocean/tests/test_smoke.py +74 -0
- port_ocean/tests/utils/test_async_iterators.py +45 -0
- port_ocean/tests/utils/test_cache.py +189 -0
- port_ocean/utils/async_iterators.py +109 -0
- port_ocean/utils/cache.py +37 -1
- port_ocean/utils/misc.py +22 -4
- port_ocean/utils/queue_utils.py +88 -0
- port_ocean/utils/signal.py +1 -4
- port_ocean/utils/time.py +54 -0
- {port_ocean-0.5.5.dist-info → port_ocean-0.17.8.dist-info}/METADATA +27 -19
- port_ocean-0.17.8.dist-info/RECORD +164 -0
- {port_ocean-0.5.5.dist-info → port_ocean-0.17.8.dist-info}/WHEEL +1 -1
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/.dockerignore +0 -94
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/Dockerfile +0 -15
- port_ocean/cli/cookiecutter/{{cookiecutter.integration_slug}}/config.yaml +0 -17
- port_ocean/core/handlers/entities_state_applier/port/validate_entity_relations.py +0 -40
- port_ocean/core/utils.py +0 -65
- port_ocean-0.5.5.dist-info/RECORD +0 -129
- {port_ocean-0.5.5.dist-info → port_ocean-0.17.8.dist-info}/LICENSE.md +0 -0
- {port_ocean-0.5.5.dist-info → port_ocean-0.17.8.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Any, AsyncGenerator
|
|
2
|
+
import asyncio
|
|
3
|
+
from port_ocean.utils.async_iterators import semaphore_async_iterator
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.mark.asyncio
|
|
8
|
+
async def test_semaphore_async_iterator() -> None:
|
|
9
|
+
max_concurrency = 5
|
|
10
|
+
semaphore = asyncio.BoundedSemaphore(max_concurrency)
|
|
11
|
+
|
|
12
|
+
concurrent_tasks = 0
|
|
13
|
+
max_concurrent_tasks = 0
|
|
14
|
+
lock = asyncio.Lock() # Protect shared variables
|
|
15
|
+
|
|
16
|
+
num_tasks = 20
|
|
17
|
+
|
|
18
|
+
async def mock_function() -> AsyncGenerator[str, None]:
|
|
19
|
+
nonlocal concurrent_tasks, max_concurrent_tasks
|
|
20
|
+
|
|
21
|
+
async with lock:
|
|
22
|
+
concurrent_tasks += 1
|
|
23
|
+
if concurrent_tasks > max_concurrent_tasks:
|
|
24
|
+
max_concurrent_tasks = concurrent_tasks
|
|
25
|
+
|
|
26
|
+
await asyncio.sleep(0.1)
|
|
27
|
+
yield "result"
|
|
28
|
+
|
|
29
|
+
async with lock:
|
|
30
|
+
concurrent_tasks -= 1
|
|
31
|
+
|
|
32
|
+
async def consume_iterator(async_iterator: Any) -> None:
|
|
33
|
+
async for _ in async_iterator:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
tasks = [
|
|
37
|
+
consume_iterator(semaphore_async_iterator(semaphore, mock_function))
|
|
38
|
+
for _ in range(num_tasks)
|
|
39
|
+
]
|
|
40
|
+
await asyncio.gather(*tasks)
|
|
41
|
+
|
|
42
|
+
assert (
|
|
43
|
+
max_concurrent_tasks <= max_concurrency
|
|
44
|
+
), f"Max concurrent tasks {max_concurrent_tasks} exceeded semaphore limit {max_concurrency}"
|
|
45
|
+
assert concurrent_tasks == 0, "Not all tasks have completed"
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
import asyncio
|
|
3
|
+
from port_ocean.utils import cache # Import the module where 'event' is used
|
|
4
|
+
import pytest
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import AsyncGenerator, AsyncIterator, List, TypeVar
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class EventContext:
|
|
11
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def event() -> EventContext:
|
|
16
|
+
return EventContext()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
T = TypeVar("T")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
async def collect_iterator_results(iterator: AsyncIterator[List[T]]) -> List[T]:
|
|
23
|
+
results = []
|
|
24
|
+
async for item in iterator:
|
|
25
|
+
results.extend(item)
|
|
26
|
+
return results
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
async def test_cache_iterator_result(event: EventContext, monkeypatch: Any) -> None:
|
|
31
|
+
monkeypatch.setattr(cache, "event", event)
|
|
32
|
+
|
|
33
|
+
call_count = 0
|
|
34
|
+
|
|
35
|
+
@cache.cache_iterator_result()
|
|
36
|
+
async def sample_iterator(x: int) -> AsyncGenerator[List[int], None]:
|
|
37
|
+
nonlocal call_count
|
|
38
|
+
call_count += 1
|
|
39
|
+
for i in range(x):
|
|
40
|
+
await asyncio.sleep(0.1)
|
|
41
|
+
yield [i]
|
|
42
|
+
|
|
43
|
+
result1 = await collect_iterator_results(sample_iterator(3))
|
|
44
|
+
assert result1 == [0, 1, 2]
|
|
45
|
+
assert call_count == 1
|
|
46
|
+
|
|
47
|
+
result2 = await collect_iterator_results(sample_iterator(3))
|
|
48
|
+
assert result2 == [0, 1, 2]
|
|
49
|
+
assert call_count == 1
|
|
50
|
+
|
|
51
|
+
result3 = await collect_iterator_results(sample_iterator(4))
|
|
52
|
+
assert result3 == [0, 1, 2, 3]
|
|
53
|
+
assert call_count == 2
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.asyncio
|
|
57
|
+
async def test_cache_iterator_result_with_kwargs(
|
|
58
|
+
event: EventContext, monkeypatch: Any
|
|
59
|
+
) -> None:
|
|
60
|
+
monkeypatch.setattr(cache, "event", event)
|
|
61
|
+
|
|
62
|
+
call_count = 0
|
|
63
|
+
|
|
64
|
+
@cache.cache_iterator_result()
|
|
65
|
+
async def sample_iterator(x: int, y: int = 1) -> AsyncGenerator[List[int], None]:
|
|
66
|
+
nonlocal call_count
|
|
67
|
+
call_count += 1
|
|
68
|
+
for i in range(x * y):
|
|
69
|
+
await asyncio.sleep(0.1)
|
|
70
|
+
yield [i]
|
|
71
|
+
|
|
72
|
+
result1 = await collect_iterator_results(sample_iterator(2, y=2))
|
|
73
|
+
assert result1 == [0, 1, 2, 3]
|
|
74
|
+
assert call_count == 1
|
|
75
|
+
|
|
76
|
+
result2 = await collect_iterator_results(sample_iterator(2, y=2))
|
|
77
|
+
assert result2 == [0, 1, 2, 3]
|
|
78
|
+
assert call_count == 1
|
|
79
|
+
|
|
80
|
+
result3 = await collect_iterator_results(sample_iterator(2, y=3))
|
|
81
|
+
assert result3 == [0, 1, 2, 3, 4, 5]
|
|
82
|
+
assert call_count == 2
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@pytest.mark.asyncio
|
|
86
|
+
async def test_cache_iterator_result_no_cache(
|
|
87
|
+
event: EventContext, monkeypatch: Any
|
|
88
|
+
) -> None:
|
|
89
|
+
monkeypatch.setattr(cache, "event", event)
|
|
90
|
+
|
|
91
|
+
call_count = 0
|
|
92
|
+
|
|
93
|
+
@cache.cache_iterator_result()
|
|
94
|
+
async def sample_iterator(x: int) -> AsyncGenerator[List[int], None]:
|
|
95
|
+
nonlocal call_count
|
|
96
|
+
call_count += 1
|
|
97
|
+
for i in range(x):
|
|
98
|
+
await asyncio.sleep(0.1)
|
|
99
|
+
yield [i]
|
|
100
|
+
|
|
101
|
+
result1 = await collect_iterator_results(sample_iterator(3))
|
|
102
|
+
assert result1 == [0, 1, 2]
|
|
103
|
+
assert call_count == 1
|
|
104
|
+
|
|
105
|
+
event.attributes.clear()
|
|
106
|
+
|
|
107
|
+
result2 = await collect_iterator_results(sample_iterator(3))
|
|
108
|
+
assert result2 == [0, 1, 2]
|
|
109
|
+
assert call_count == 2
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@pytest.mark.asyncio
|
|
113
|
+
async def test_cache_coroutine_result(event: EventContext, monkeypatch: Any) -> None:
|
|
114
|
+
monkeypatch.setattr(cache, "event", event)
|
|
115
|
+
|
|
116
|
+
call_count = 0
|
|
117
|
+
|
|
118
|
+
@cache.cache_coroutine_result()
|
|
119
|
+
async def sample_coroutine(x: int) -> int:
|
|
120
|
+
nonlocal call_count
|
|
121
|
+
call_count += 1
|
|
122
|
+
await asyncio.sleep(0.1)
|
|
123
|
+
return x * 2
|
|
124
|
+
|
|
125
|
+
result1 = await sample_coroutine(2)
|
|
126
|
+
assert result1 == 4
|
|
127
|
+
assert call_count == 1
|
|
128
|
+
|
|
129
|
+
result2 = await sample_coroutine(2)
|
|
130
|
+
assert result2 == 4
|
|
131
|
+
assert call_count == 1
|
|
132
|
+
|
|
133
|
+
result3 = await sample_coroutine(3)
|
|
134
|
+
assert result3 == 6
|
|
135
|
+
assert call_count == 2
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@pytest.mark.asyncio
|
|
139
|
+
async def test_cache_coroutine_result_with_kwargs(
|
|
140
|
+
event: EventContext, monkeypatch: Any
|
|
141
|
+
) -> None:
|
|
142
|
+
monkeypatch.setattr(cache, "event", event)
|
|
143
|
+
|
|
144
|
+
call_count = 0
|
|
145
|
+
|
|
146
|
+
@cache.cache_coroutine_result()
|
|
147
|
+
async def sample_coroutine(x: int, y: int = 1) -> int:
|
|
148
|
+
nonlocal call_count
|
|
149
|
+
call_count += 1
|
|
150
|
+
await asyncio.sleep(0.1)
|
|
151
|
+
return x * y
|
|
152
|
+
|
|
153
|
+
result1 = await sample_coroutine(2, y=3)
|
|
154
|
+
assert result1 == 6
|
|
155
|
+
assert call_count == 1
|
|
156
|
+
|
|
157
|
+
result2 = await sample_coroutine(2, y=3)
|
|
158
|
+
assert result2 == 6
|
|
159
|
+
assert call_count == 1
|
|
160
|
+
|
|
161
|
+
result3 = await sample_coroutine(2, y=4)
|
|
162
|
+
assert result3 == 8
|
|
163
|
+
assert call_count == 2
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@pytest.mark.asyncio
|
|
167
|
+
async def test_cache_coroutine_result_no_cache(
|
|
168
|
+
event: EventContext, monkeypatch: Any
|
|
169
|
+
) -> None:
|
|
170
|
+
monkeypatch.setattr(cache, "event", event)
|
|
171
|
+
|
|
172
|
+
call_count = 0
|
|
173
|
+
|
|
174
|
+
@cache.cache_coroutine_result()
|
|
175
|
+
async def sample_coroutine(x: int) -> int:
|
|
176
|
+
nonlocal call_count
|
|
177
|
+
call_count += 1
|
|
178
|
+
await asyncio.sleep(0.1)
|
|
179
|
+
return x * 2
|
|
180
|
+
|
|
181
|
+
result1 = await sample_coroutine(2)
|
|
182
|
+
assert result1 == 4
|
|
183
|
+
assert call_count == 1
|
|
184
|
+
|
|
185
|
+
event.attributes.clear()
|
|
186
|
+
|
|
187
|
+
result2 = await sample_coroutine(2)
|
|
188
|
+
assert result2 == 4
|
|
189
|
+
assert call_count == 2
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
import aiostream
|
|
4
|
+
|
|
5
|
+
if typing.TYPE_CHECKING:
|
|
6
|
+
from asyncio import Semaphore
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def stream_async_iterators_tasks(
|
|
10
|
+
*tasks: typing.AsyncIterable[typing.Any],
|
|
11
|
+
) -> typing.AsyncIterable[typing.Any]:
|
|
12
|
+
"""
|
|
13
|
+
This function takes a list of async iterators and streams the results of each iterator as they are available.
|
|
14
|
+
By using this function you can combine multiple async iterators into a single stream of results, instead of waiting
|
|
15
|
+
for each iterator to finish before starting the next one.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
```python
|
|
19
|
+
async def async_iterator1():
|
|
20
|
+
for i in range(10):
|
|
21
|
+
yield i
|
|
22
|
+
await asyncio.sleep(1)
|
|
23
|
+
|
|
24
|
+
async def async_iterator2():
|
|
25
|
+
for i in range(10, 20):
|
|
26
|
+
yield i
|
|
27
|
+
await asyncio.sleep(1)
|
|
28
|
+
|
|
29
|
+
async def main():
|
|
30
|
+
async for result in stream_async_iterators_tasks([async_iterator1(), async_iterator2()]):
|
|
31
|
+
print(result)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Caution - Before using this function, make sure that the third-party API you are calling allows the number of
|
|
35
|
+
concurrent requests you are making. If the API has a rate limit, you may need to adjust the number of concurrent
|
|
36
|
+
requests to avoid hitting the rate limit.
|
|
37
|
+
|
|
38
|
+
:param tasks: A list of async iterators
|
|
39
|
+
:return: A stream of results
|
|
40
|
+
"""
|
|
41
|
+
if not tasks:
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if len(tasks) == 1:
|
|
45
|
+
async for batch_items in tasks[0]:
|
|
46
|
+
yield batch_items
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
combine = aiostream.stream.merge(tasks[0], *tasks[1:])
|
|
50
|
+
async with combine.stream() as streamer:
|
|
51
|
+
async for batch_items in streamer:
|
|
52
|
+
yield batch_items
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def semaphore_async_iterator(
|
|
56
|
+
semaphore: "Semaphore",
|
|
57
|
+
function: typing.Callable[[], typing.AsyncIterator[typing.Any]],
|
|
58
|
+
) -> typing.AsyncIterator[typing.Any]:
|
|
59
|
+
"""
|
|
60
|
+
Executes an asynchronous iterator function under a semaphore to limit concurrency.
|
|
61
|
+
|
|
62
|
+
This function ensures that the provided asynchronous iterator function is executed
|
|
63
|
+
while respecting the concurrency limit imposed by the semaphore. It acquires the
|
|
64
|
+
semaphore before executing the function and releases it after the function completes,
|
|
65
|
+
thus controlling the number of concurrent executions.
|
|
66
|
+
|
|
67
|
+
Parameters:
|
|
68
|
+
semaphore (asyncio.Semaphore | asyncio.BoundedSemaphore): The semaphore used to limit concurrency.
|
|
69
|
+
function (Callable[[], AsyncIterator[Any]]): A nullary asynchronous function, - apply arguments with `functools.partial` or an anonymous function (lambda)
|
|
70
|
+
that returns an asynchronous iterator. This function is executed under the semaphore.
|
|
71
|
+
|
|
72
|
+
Yields:
|
|
73
|
+
Any: The items yielded by the asynchronous iterator function.
|
|
74
|
+
|
|
75
|
+
Usage:
|
|
76
|
+
```python
|
|
77
|
+
import asyncio
|
|
78
|
+
|
|
79
|
+
async def async_iterator_function(param1, param2):
|
|
80
|
+
# Your async code here
|
|
81
|
+
yield ...
|
|
82
|
+
|
|
83
|
+
async def async_generator_function():
|
|
84
|
+
# Your async code to retrieve items
|
|
85
|
+
param1 = "your_param1"
|
|
86
|
+
yield param1
|
|
87
|
+
|
|
88
|
+
async def main():
|
|
89
|
+
semaphore = asyncio.BoundedSemaphore(50)
|
|
90
|
+
param2 = "your_param2"
|
|
91
|
+
|
|
92
|
+
tasks = [
|
|
93
|
+
semaphore_async_iterator(
|
|
94
|
+
semaphore,
|
|
95
|
+
lambda: async_iterator_function(param1, param2) # functools.partial(async_iterator_function, param1, param2)
|
|
96
|
+
)
|
|
97
|
+
async for param1 in async_generator_function()
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
async for batch in stream_async_iterators_tasks(*tasks):
|
|
101
|
+
# Process each batch
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
asyncio.run(main())
|
|
105
|
+
```
|
|
106
|
+
"""
|
|
107
|
+
async with semaphore:
|
|
108
|
+
async for result in function():
|
|
109
|
+
yield result
|
port_ocean/utils/cache.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import hashlib
|
|
3
|
-
from typing import Callable, AsyncIterator, Any
|
|
3
|
+
from typing import Callable, AsyncIterator, Awaitable, Any
|
|
4
4
|
from port_ocean.context.event import event
|
|
5
5
|
|
|
6
6
|
AsyncIteratorCallable = Callable[..., AsyncIterator[list[Any]]]
|
|
7
|
+
AsyncCallable = Callable[..., Awaitable[Any]]
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def hash_func(function_name: str, *args: Any, **kwargs: Any) -> str:
|
|
@@ -59,3 +60,38 @@ def cache_iterator_result() -> Callable[[AsyncIteratorCallable], AsyncIteratorCa
|
|
|
59
60
|
return wrapper
|
|
60
61
|
|
|
61
62
|
return decorator
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def cache_coroutine_result() -> Callable[[AsyncCallable], AsyncCallable]:
|
|
66
|
+
"""Coroutine version of `cache_iterator_result` from port_ocean.utils.cache
|
|
67
|
+
|
|
68
|
+
Decorator that caches the result of a coroutine function.
|
|
69
|
+
It checks if the result is already in the cache, and if not,
|
|
70
|
+
fetches the result, caches it, and returns the cached value.
|
|
71
|
+
|
|
72
|
+
The cache is stored in the scope of the running event and is
|
|
73
|
+
removed when the event is finished.
|
|
74
|
+
|
|
75
|
+
Usage:
|
|
76
|
+
```python
|
|
77
|
+
@cache_coroutine_result()
|
|
78
|
+
async def my_coroutine_function():
|
|
79
|
+
# Your code here
|
|
80
|
+
```
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def decorator(func: AsyncCallable) -> AsyncCallable:
|
|
84
|
+
@functools.wraps(func)
|
|
85
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
86
|
+
cache_key = hash_func(func.__name__, *args, **kwargs)
|
|
87
|
+
|
|
88
|
+
if cache := event.attributes.get(cache_key):
|
|
89
|
+
return cache
|
|
90
|
+
|
|
91
|
+
result = await func(*args, **kwargs)
|
|
92
|
+
event.attributes[cache_key] = result
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
return wrapper
|
|
96
|
+
|
|
97
|
+
return decorator
|
port_ocean/utils/misc.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from enum import Enum
|
|
1
2
|
import inspect
|
|
2
3
|
from importlib.util import spec_from_file_location, module_from_spec
|
|
3
4
|
from pathlib import Path
|
|
@@ -5,11 +6,16 @@ from time import time
|
|
|
5
6
|
from types import ModuleType
|
|
6
7
|
from typing import Callable, Any
|
|
7
8
|
from uuid import uuid4
|
|
8
|
-
|
|
9
9
|
import tomli
|
|
10
10
|
import yaml
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
class IntegrationStateStatus(Enum):
|
|
14
|
+
Running = "running"
|
|
15
|
+
Failed = "failed"
|
|
16
|
+
Completed = "completed"
|
|
17
|
+
|
|
18
|
+
|
|
13
19
|
def get_time(seconds_precision: bool = True) -> float:
|
|
14
20
|
"""Return current time as Unix/Epoch timestamp, in seconds.
|
|
15
21
|
:param seconds_precision: if True, return with seconds precision as integer (default).
|
|
@@ -29,13 +35,25 @@ def get_function_location(func: Callable[..., Any]) -> str:
|
|
|
29
35
|
return f"{file_path}:{line_number}"
|
|
30
36
|
|
|
31
37
|
|
|
32
|
-
def
|
|
38
|
+
def get_pyproject_data() -> dict[str, Any] | None:
|
|
33
39
|
try:
|
|
34
40
|
with open("./pyproject.toml", "rb") as toml_file:
|
|
35
41
|
pyproject_data = tomli.load(toml_file)
|
|
36
|
-
return pyproject_data["tool"]["poetry"]
|
|
42
|
+
return pyproject_data["tool"]["poetry"]
|
|
37
43
|
except (FileNotFoundError, KeyError):
|
|
38
|
-
return
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_integration_version() -> str:
|
|
48
|
+
if data := get_pyproject_data():
|
|
49
|
+
return data["version"]
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_integration_name() -> str:
|
|
54
|
+
if data := get_pyproject_data():
|
|
55
|
+
return data["name"]
|
|
56
|
+
return ""
|
|
39
57
|
|
|
40
58
|
|
|
41
59
|
def get_spec_file(path: Path = Path(".")) -> dict[str, Any] | None:
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from asyncio import Queue, Task
|
|
3
|
+
from typing import Any, TypeVar, Callable, Coroutine
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
T = TypeVar("T")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def _start_processor_worker(
|
|
11
|
+
queue: Queue[Any | None],
|
|
12
|
+
func: Callable[..., Coroutine[Any, Any, T]],
|
|
13
|
+
results: list[T],
|
|
14
|
+
errors: list[Exception],
|
|
15
|
+
) -> None:
|
|
16
|
+
while True:
|
|
17
|
+
try:
|
|
18
|
+
raw_params = await queue.get()
|
|
19
|
+
if raw_params is None:
|
|
20
|
+
return
|
|
21
|
+
logger.debug("Processing async task")
|
|
22
|
+
results.append(await func(*raw_params))
|
|
23
|
+
except Exception as e:
|
|
24
|
+
logger.error(f"Error processing task: {e}")
|
|
25
|
+
errors.append(e)
|
|
26
|
+
finally:
|
|
27
|
+
queue.task_done()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def process_in_queue(
|
|
31
|
+
objects_to_process: list[Any],
|
|
32
|
+
func: Callable[..., Coroutine[Any, Any, T]],
|
|
33
|
+
*args: Any,
|
|
34
|
+
concurrency: int = 50,
|
|
35
|
+
) -> list[T]:
|
|
36
|
+
"""
|
|
37
|
+
This function executes multiple asynchronous tasks in a bounded way
|
|
38
|
+
(e.g. having 200 tasks to execute, while running only 20 concurrently),
|
|
39
|
+
to prevent overload and memory issues when dealing with large sets of data and tasks.
|
|
40
|
+
read more -> https://stackoverflow.com/questions/38831322/making-1-milion-requests-with-aiohttp-asyncio-literally
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
```python
|
|
44
|
+
async def incrementBy(num: int, increment_by: int) -> int:
|
|
45
|
+
await asyncio.sleep(3)
|
|
46
|
+
return num + increment_by
|
|
47
|
+
|
|
48
|
+
async def main():
|
|
49
|
+
raw_objects = [1, 2, 3, 4, 5]
|
|
50
|
+
processed_objects = await process_in_queue(
|
|
51
|
+
raw_objects,
|
|
52
|
+
incrementBy,
|
|
53
|
+
5
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
:param objects_to_process: A list of the raw objects to process
|
|
58
|
+
:param func: An async function that turns raw object into result object
|
|
59
|
+
:param args: Static arguments to pass to the func when called
|
|
60
|
+
:param concurrency: An integer specifying the concurrent workers count
|
|
61
|
+
:return: A list of result objects
|
|
62
|
+
"""
|
|
63
|
+
queue: Queue[Any | None] = Queue(maxsize=concurrency * 2)
|
|
64
|
+
tasks: list[Task[Any]] = []
|
|
65
|
+
processing_results: list[T] = []
|
|
66
|
+
errors: list[Exception] = []
|
|
67
|
+
|
|
68
|
+
for i in range(concurrency):
|
|
69
|
+
tasks.append(
|
|
70
|
+
asyncio.create_task(
|
|
71
|
+
_start_processor_worker(queue, func, processing_results, errors)
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
for i in range(len(objects_to_process)):
|
|
76
|
+
await queue.put((objects_to_process[i], *args))
|
|
77
|
+
|
|
78
|
+
for i in range(concurrency):
|
|
79
|
+
# We put None value into the queue, so the workers will know that we
|
|
80
|
+
# are done sending more input and they can terminate
|
|
81
|
+
await queue.put(None)
|
|
82
|
+
|
|
83
|
+
await queue.join()
|
|
84
|
+
await asyncio.gather(*tasks)
|
|
85
|
+
if errors:
|
|
86
|
+
raise ExceptionGroup("Error processing tasks", errors)
|
|
87
|
+
|
|
88
|
+
return processing_results
|
port_ocean/utils/signal.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import signal
|
|
2
1
|
from typing import Callable, Any
|
|
3
2
|
|
|
4
3
|
from werkzeug.local import LocalProxy, LocalStack
|
|
@@ -13,10 +12,8 @@ from port_ocean.utils.misc import generate_uuid
|
|
|
13
12
|
class SignalHandler:
|
|
14
13
|
def __init__(self) -> None:
|
|
15
14
|
self._handlers: dict[str, Callable[[], Any]] = {}
|
|
16
|
-
signal.signal(signal.SIGINT, lambda _, __: self._exit())
|
|
17
|
-
signal.signal(signal.SIGTERM, lambda _, __: self._exit())
|
|
18
15
|
|
|
19
|
-
def
|
|
16
|
+
def exit(self) -> None:
|
|
20
17
|
"""
|
|
21
18
|
Handles the exit signal.
|
|
22
19
|
"""
|
port_ocean/utils/time.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from loguru import logger
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def convert_str_to_utc_datetime(time_str: str) -> datetime.datetime | None:
|
|
6
|
+
"""
|
|
7
|
+
Convert a string representing time to a datetime object.
|
|
8
|
+
:param time_str: a string representing time in the format "2021-09-01T12:00:00Z"
|
|
9
|
+
"""
|
|
10
|
+
aware_date = datetime.datetime.fromisoformat(time_str)
|
|
11
|
+
if time_str.endswith("Z"):
|
|
12
|
+
aware_date = datetime.datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
|
13
|
+
return aware_date.astimezone(datetime.timezone.utc)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def convert_to_minutes(s: str) -> int:
|
|
17
|
+
minutes_per_unit = {"s": 1 / 60, "m": 1, "h": 60, "d": 1440, "w": 10080}
|
|
18
|
+
try:
|
|
19
|
+
return int(int(s[:-1]) * minutes_per_unit[s[-1]])
|
|
20
|
+
except Exception:
|
|
21
|
+
logger.error(f"Failed converting string to minutes, {s}")
|
|
22
|
+
raise ValueError(
|
|
23
|
+
f"Invalid format. Expected a string ending with {minutes_per_unit.keys()}"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_next_occurrence(
|
|
28
|
+
interval_seconds: int,
|
|
29
|
+
start_time: datetime.datetime,
|
|
30
|
+
now: datetime.datetime | None = None,
|
|
31
|
+
) -> datetime.datetime:
|
|
32
|
+
"""
|
|
33
|
+
Predict the next occurrence of an event based on interval, start time, and current time.
|
|
34
|
+
|
|
35
|
+
:param interval_minutes: Interval between occurrences in minutes.
|
|
36
|
+
:param start_time: Start time of the event as a datetime object.
|
|
37
|
+
:param now: Current time as a datetime object.
|
|
38
|
+
:return: The next occurrence time as a datetime object.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
if now is None:
|
|
42
|
+
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
43
|
+
# Calculate the total seconds elapsed since the start time
|
|
44
|
+
elapsed_seconds = (now - start_time).total_seconds()
|
|
45
|
+
|
|
46
|
+
# Calculate the number of intervals that have passed
|
|
47
|
+
intervals_passed = int(elapsed_seconds // interval_seconds)
|
|
48
|
+
|
|
49
|
+
# Calculate the next occurrence time
|
|
50
|
+
next_occurrence = start_time + datetime.timedelta(
|
|
51
|
+
seconds=(intervals_passed + 1) * interval_seconds
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return next_occurrence
|