julee 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +1 -1
- julee/api/tests/routers/test_assembly_specifications.py +2 -0
- julee/api/tests/routers/test_documents.py +8 -6
- julee/api/tests/routers/test_knowledge_service_configs.py +2 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +2 -0
- julee/api/tests/routers/test_system.py +2 -0
- julee/api/tests/routers/test_workflows.py +2 -0
- julee/api/tests/test_app.py +2 -0
- julee/api/tests/test_dependencies.py +2 -0
- julee/api/tests/test_requests.py +2 -0
- julee/contrib/polling/__init__.py +22 -19
- julee/contrib/polling/apps/__init__.py +17 -0
- julee/contrib/polling/apps/worker/__init__.py +17 -0
- julee/contrib/polling/apps/worker/pipelines.py +288 -0
- julee/contrib/polling/domain/__init__.py +7 -9
- julee/contrib/polling/domain/models/__init__.py +6 -7
- julee/contrib/polling/domain/models/polling_config.py +18 -1
- julee/contrib/polling/domain/services/__init__.py +6 -5
- julee/contrib/polling/domain/services/poller.py +1 -1
- julee/contrib/polling/infrastructure/__init__.py +9 -8
- julee/contrib/polling/infrastructure/services/__init__.py +6 -5
- julee/contrib/polling/infrastructure/services/polling/__init__.py +6 -5
- julee/contrib/polling/infrastructure/services/polling/http/__init__.py +6 -5
- julee/contrib/polling/infrastructure/services/polling/http/http_poller_service.py +5 -2
- julee/contrib/polling/infrastructure/temporal/__init__.py +12 -12
- julee/contrib/polling/infrastructure/temporal/activities.py +1 -1
- julee/contrib/polling/infrastructure/temporal/manager.py +291 -0
- julee/contrib/polling/infrastructure/temporal/proxies.py +1 -1
- julee/contrib/polling/tests/unit/apps/worker/test_pipelines.py +580 -0
- julee/contrib/polling/tests/unit/infrastructure/services/polling/http/test_http_poller_service.py +40 -2
- julee/contrib/polling/tests/unit/infrastructure/temporal/__init__.py +7 -0
- julee/contrib/polling/tests/unit/infrastructure/temporal/test_manager.py +475 -0
- julee/docs/sphinx_hcd/__init__.py +4 -10
- julee/docs/sphinx_hcd/accelerators.py +277 -180
- julee/docs/sphinx_hcd/apps.py +78 -59
- julee/docs/sphinx_hcd/config.py +16 -16
- julee/docs/sphinx_hcd/epics.py +47 -42
- julee/docs/sphinx_hcd/integrations.py +53 -49
- julee/docs/sphinx_hcd/journeys.py +124 -110
- julee/docs/sphinx_hcd/personas.py +75 -53
- julee/docs/sphinx_hcd/stories.py +99 -71
- julee/docs/sphinx_hcd/utils.py +23 -18
- julee/domain/models/assembly/tests/test_assembly.py +2 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +2 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +2 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +2 -0
- julee/domain/models/document/document.py +12 -21
- julee/domain/models/document/tests/test_document.py +16 -34
- julee/domain/models/policy/tests/test_document_policy_validation.py +2 -0
- julee/domain/models/policy/tests/test_policy.py +2 -0
- julee/domain/use_cases/extract_assemble_data.py +1 -1
- julee/domain/use_cases/initialize_system_data.py +75 -21
- julee/domain/use_cases/tests/test_extract_assemble_data.py +2 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +2 -0
- julee/domain/use_cases/tests/test_validate_document.py +2 -0
- julee/fixtures/documents.yaml +4 -43
- julee/fixtures/knowledge_service_queries.yaml +9 -0
- julee/maintenance/release.py +90 -30
- julee/repositories/memory/document.py +19 -13
- julee/repositories/memory/tests/test_document.py +20 -18
- julee/repositories/memory/tests/test_document_policy_validation.py +2 -0
- julee/repositories/memory/tests/test_policy.py +2 -0
- julee/repositories/minio/document.py +25 -22
- julee/repositories/minio/tests/test_assembly.py +2 -0
- julee/repositories/minio/tests/test_assembly_specification.py +2 -0
- julee/repositories/minio/tests/test_client_protocol.py +3 -0
- julee/repositories/minio/tests/test_document.py +18 -16
- julee/repositories/minio/tests/test_document_policy_validation.py +2 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +2 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +2 -0
- julee/repositories/minio/tests/test_policy.py +2 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +2 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +2 -0
- julee/services/knowledge_service/test_factory.py +2 -0
- julee/util/tests/test_decorators.py +2 -0
- julee-0.1.5.dist-info/METADATA +103 -0
- {julee-0.1.3.dist-info → julee-0.1.5.dist-info}/RECORD +80 -74
- julee/fixtures/assembly_specifications.yaml +0 -70
- julee-0.1.3.dist-info/METADATA +0 -198
- {julee-0.1.3.dist-info → julee-0.1.5.dist-info}/WHEEL +0 -0
- {julee-0.1.3.dist-info → julee-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {julee-0.1.3.dist-info → julee-0.1.5.dist-info}/top_level.txt +0 -0
julee/__init__.py
CHANGED
|
@@ -18,6 +18,8 @@ from julee.api.routers.documents import router
|
|
|
18
18
|
from julee.domain.models.document import Document, DocumentStatus
|
|
19
19
|
from julee.repositories.memory import MemoryDocumentRepository
|
|
20
20
|
|
|
21
|
+
pytestmark = pytest.mark.unit
|
|
22
|
+
|
|
21
23
|
|
|
22
24
|
@pytest.fixture
|
|
23
25
|
def memory_repo() -> MemoryDocumentRepository:
|
|
@@ -61,7 +63,7 @@ def sample_documents() -> list[Document]:
|
|
|
61
63
|
created_at=datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
|
|
62
64
|
updated_at=datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
|
|
63
65
|
additional_metadata={"type": "test"},
|
|
64
|
-
|
|
66
|
+
content_bytes="test content",
|
|
65
67
|
),
|
|
66
68
|
Document(
|
|
67
69
|
document_id="doc-2",
|
|
@@ -73,7 +75,7 @@ def sample_documents() -> list[Document]:
|
|
|
73
75
|
created_at=datetime(2024, 1, 2, 12, 0, 0, tzinfo=timezone.utc),
|
|
74
76
|
updated_at=datetime(2024, 1, 2, 12, 0, 0, tzinfo=timezone.utc),
|
|
75
77
|
additional_metadata={"type": "report"},
|
|
76
|
-
|
|
78
|
+
content_bytes="pdf content",
|
|
77
79
|
),
|
|
78
80
|
]
|
|
79
81
|
|
|
@@ -203,7 +205,7 @@ class TestGetDocument:
|
|
|
203
205
|
assert data["additional_metadata"] == doc.additional_metadata
|
|
204
206
|
|
|
205
207
|
# Content should NOT be included in metadata endpoint
|
|
206
|
-
assert data["
|
|
208
|
+
assert data["content_bytes"] is None
|
|
207
209
|
# Content field is excluded from JSON response
|
|
208
210
|
assert "content" not in data
|
|
209
211
|
|
|
@@ -273,7 +275,7 @@ class TestGetDocumentContent:
|
|
|
273
275
|
memory_repo: MemoryDocumentRepository,
|
|
274
276
|
) -> None:
|
|
275
277
|
"""Test content retrieval when document has no content."""
|
|
276
|
-
# Create document with
|
|
278
|
+
# Create document with content_bytes first to pass validation
|
|
277
279
|
doc = Document(
|
|
278
280
|
document_id="doc-no-content",
|
|
279
281
|
original_filename="empty.txt",
|
|
@@ -282,7 +284,7 @@ class TestGetDocumentContent:
|
|
|
282
284
|
content_multihash="empty_hash",
|
|
283
285
|
status=DocumentStatus.CAPTURED,
|
|
284
286
|
additional_metadata={"type": "empty"},
|
|
285
|
-
|
|
287
|
+
content_bytes="temp",
|
|
286
288
|
)
|
|
287
289
|
|
|
288
290
|
# Save document normally, then manually remove content from storage
|
|
@@ -290,7 +292,7 @@ class TestGetDocumentContent:
|
|
|
290
292
|
stored_doc = memory_repo.storage_dict[doc.document_id]
|
|
291
293
|
# Remove content from the stored document
|
|
292
294
|
memory_repo.storage_dict[doc.document_id] = stored_doc.model_copy(
|
|
293
|
-
update={"content": None, "
|
|
295
|
+
update={"content": None, "content_bytes": None}
|
|
294
296
|
)
|
|
295
297
|
|
|
296
298
|
# Make request
|
julee/api/tests/test_app.py
CHANGED
julee/api/tests/test_requests.py
CHANGED
|
@@ -12,8 +12,10 @@ The polling module includes:
|
|
|
12
12
|
- Co-located tests and examples
|
|
13
13
|
|
|
14
14
|
Example usage:
|
|
15
|
-
from julee.contrib.polling import PollingConfig,
|
|
16
|
-
from julee.contrib.polling import
|
|
15
|
+
from julee.contrib.polling.domain.models.polling_config import PollingConfig, PollingProtocol
|
|
16
|
+
from julee.contrib.polling.infrastructure.services.polling.http import HttpPollerService
|
|
17
|
+
from julee.contrib.polling.domain.services.poller import PollerService
|
|
18
|
+
from julee.contrib.polling.domain.models.polling_config import PollingResult
|
|
17
19
|
|
|
18
20
|
# Configure polling
|
|
19
21
|
config = PollingConfig(
|
|
@@ -26,22 +28,23 @@ Example usage:
|
|
|
26
28
|
# Poll the endpoint
|
|
27
29
|
service = HttpPollerService()
|
|
28
30
|
result = await service.poll_endpoint(config)
|
|
31
|
+
|
|
32
|
+
Note: All imports must be explicit to avoid import chains that can pull
|
|
33
|
+
non-deterministic code into Temporal workflows. Import directly from
|
|
34
|
+
the specific modules you need rather than using this convenience module.
|
|
29
35
|
"""
|
|
30
36
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
"TemporalPollerService",
|
|
46
|
-
"WorkflowPollerServiceProxy",
|
|
47
|
-
]
|
|
37
|
+
# No re-exports to avoid import chains that pull non-deterministic code
|
|
38
|
+
# into Temporal workflows. Import from specific submodules instead:
|
|
39
|
+
#
|
|
40
|
+
# Domain:
|
|
41
|
+
# - from julee.contrib.polling.domain.models.polling_config import PollingConfig, PollingProtocol, PollingResult
|
|
42
|
+
# - from julee.contrib.polling.domain.services.poller import PollerService
|
|
43
|
+
#
|
|
44
|
+
# Infrastructure:
|
|
45
|
+
# - from julee.contrib.polling.infrastructure.services.polling.http import HttpPollerService
|
|
46
|
+
# - from julee.contrib.polling.infrastructure.temporal.manager import PollingManager
|
|
47
|
+
# - from julee.contrib.polling.infrastructure.temporal.proxies import WorkflowPollerServiceProxy
|
|
48
|
+
# - from julee.contrib.polling.infrastructure.temporal.activities import TemporalPollerService
|
|
49
|
+
|
|
50
|
+
__all__ = []
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Application entry points for the polling contrib module.
|
|
3
|
+
|
|
4
|
+
This module contains the application-layer components that provide entry points
|
|
5
|
+
for the polling contrib module, including worker pipelines, API routes, and
|
|
6
|
+
CLI commands.
|
|
7
|
+
|
|
8
|
+
Following the ADR contrib module structure, this layer wires together domain
|
|
9
|
+
services and infrastructure implementations into runnable applications.
|
|
10
|
+
|
|
11
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
12
|
+
into Temporal workflows. Import directly from specific modules:
|
|
13
|
+
|
|
14
|
+
- from julee.contrib.polling.apps.worker.pipelines import NewDataDetectionPipeline
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
__all__ = []
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Worker applications for the polling contrib module.
|
|
3
|
+
|
|
4
|
+
This module contains worker-specific entry points for the polling contrib module,
|
|
5
|
+
including Temporal workflows (pipelines) that orchestrate polling operations
|
|
6
|
+
with durability guarantees.
|
|
7
|
+
|
|
8
|
+
The worker applications in this module can be registered with Temporal workers
|
|
9
|
+
to provide polling capabilities within workflow contexts.
|
|
10
|
+
|
|
11
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
12
|
+
into Temporal workflows. Import directly from specific modules:
|
|
13
|
+
|
|
14
|
+
- from julee.contrib.polling.apps.worker.pipelines import NewDataDetectionPipeline
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
__all__ = []
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal workflows for polling operations in the Julee polling contrib module.
|
|
3
|
+
|
|
4
|
+
This module contains workflows that orchestrate polling operations with
|
|
5
|
+
Temporal's durability guarantees, providing retry logic, state management,
|
|
6
|
+
and reliable execution for endpoint polling and change detection.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from temporalio import workflow
|
|
14
|
+
|
|
15
|
+
from julee.contrib.polling.domain.models.polling_config import PollingConfig
|
|
16
|
+
from julee.contrib.polling.infrastructure.temporal.proxies import (
|
|
17
|
+
WorkflowPollerServiceProxy,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@workflow.defn
|
|
24
|
+
class NewDataDetectionPipeline:
|
|
25
|
+
"""
|
|
26
|
+
Temporal workflow for endpoint polling with new data detection.
|
|
27
|
+
|
|
28
|
+
This workflow:
|
|
29
|
+
1. Polls an endpoint using the configured polling service
|
|
30
|
+
2. Compares result with previous completion to detect changes
|
|
31
|
+
3. Triggers downstream processing when new data is detected
|
|
32
|
+
4. Returns completion result for next scheduled execution
|
|
33
|
+
|
|
34
|
+
The workflow uses Temporal's schedule last completion result feature
|
|
35
|
+
to automatically receive the previous execution's result for comparison.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self) -> None:
|
|
39
|
+
self.current_step = "initialized"
|
|
40
|
+
self.endpoint_id: str | None = None
|
|
41
|
+
self.has_new_data: bool = False
|
|
42
|
+
|
|
43
|
+
@workflow.query
|
|
44
|
+
def get_current_step(self) -> str:
|
|
45
|
+
"""Query method to get the current workflow step."""
|
|
46
|
+
return self.current_step
|
|
47
|
+
|
|
48
|
+
@workflow.query
|
|
49
|
+
def get_endpoint_id(self) -> str | None:
|
|
50
|
+
"""Query method to get the endpoint ID being polled."""
|
|
51
|
+
return self.endpoint_id
|
|
52
|
+
|
|
53
|
+
@workflow.query
|
|
54
|
+
def get_has_new_data(self) -> bool:
|
|
55
|
+
"""Query method to check if new data was detected."""
|
|
56
|
+
return self.has_new_data
|
|
57
|
+
|
|
58
|
+
async def trigger_downstream_pipeline(
|
|
59
|
+
self,
|
|
60
|
+
downstream_pipeline: str,
|
|
61
|
+
previous_data: bytes | None,
|
|
62
|
+
new_data: bytes,
|
|
63
|
+
) -> bool:
|
|
64
|
+
"""
|
|
65
|
+
Trigger downstream pipeline workflow.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
downstream_pipeline: Name of the downstream workflow to trigger
|
|
69
|
+
previous_data: Previous content (None if first run)
|
|
70
|
+
new_data: New content that was detected
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
True if successfully triggered, False otherwise
|
|
74
|
+
"""
|
|
75
|
+
try:
|
|
76
|
+
# Start external workflow for downstream processing (fire-and-forget)
|
|
77
|
+
await workflow.start_child_workflow(
|
|
78
|
+
downstream_pipeline, # This would be the workflow class name
|
|
79
|
+
args=[previous_data, new_data],
|
|
80
|
+
id=f"downstream-{self.endpoint_id}-{workflow.info().workflow_id}",
|
|
81
|
+
task_queue="downstream-processing-queue",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
workflow.logger.info(
|
|
85
|
+
"Downstream pipeline triggered successfully",
|
|
86
|
+
extra={
|
|
87
|
+
"endpoint_id": self.endpoint_id,
|
|
88
|
+
"downstream_pipeline": downstream_pipeline,
|
|
89
|
+
},
|
|
90
|
+
)
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
workflow.logger.error(
|
|
95
|
+
"Failed to trigger downstream pipeline",
|
|
96
|
+
extra={
|
|
97
|
+
"endpoint_id": self.endpoint_id,
|
|
98
|
+
"downstream_pipeline": downstream_pipeline,
|
|
99
|
+
"error": str(e),
|
|
100
|
+
"error_type": type(e).__name__,
|
|
101
|
+
},
|
|
102
|
+
)
|
|
103
|
+
# Don't fail the polling workflow if downstream trigger fails
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
@workflow.run
|
|
107
|
+
async def run(
|
|
108
|
+
self,
|
|
109
|
+
config: PollingConfig | dict[str, Any],
|
|
110
|
+
downstream_pipeline: str | None = None,
|
|
111
|
+
) -> dict[str, Any]:
|
|
112
|
+
"""
|
|
113
|
+
Execute the new data detection workflow.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
config: Configuration for the polling operation (PollingConfig or dict from schedule)
|
|
117
|
+
downstream_pipeline: Optional pipeline to trigger when new data detected
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Completion result containing polling result and detection metadata
|
|
121
|
+
|
|
122
|
+
Raises:
|
|
123
|
+
RuntimeError: If polling or downstream processing fails after retries
|
|
124
|
+
"""
|
|
125
|
+
# Convert dict to PollingConfig if needed (for schedule compatibility)
|
|
126
|
+
# Temporal schedules serialize arguments as dicts, not Pydantic models
|
|
127
|
+
if isinstance(config, dict):
|
|
128
|
+
config = PollingConfig.model_validate(config)
|
|
129
|
+
|
|
130
|
+
self.endpoint_id = config.endpoint_identifier
|
|
131
|
+
|
|
132
|
+
# Fetch previous completion result from Temporal
|
|
133
|
+
previous_completion = workflow.get_last_completion_result()
|
|
134
|
+
|
|
135
|
+
workflow.logger.info(
|
|
136
|
+
"Starting new data detection pipeline",
|
|
137
|
+
extra={
|
|
138
|
+
"endpoint_id": self.endpoint_id,
|
|
139
|
+
"polling_protocol": config.polling_protocol.value,
|
|
140
|
+
"has_previous_completion": previous_completion is not None,
|
|
141
|
+
"workflow_id": workflow.info().workflow_id,
|
|
142
|
+
"run_id": workflow.info().run_id,
|
|
143
|
+
},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
self.current_step = "polling_endpoint"
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Step 1: Poll the endpoint
|
|
150
|
+
polling_service = WorkflowPollerServiceProxy()
|
|
151
|
+
polling_result = await polling_service.poll_endpoint(config)
|
|
152
|
+
|
|
153
|
+
# Extract the timestamp from when polling actually happened
|
|
154
|
+
polled_at = polling_result.polled_at.isoformat()
|
|
155
|
+
|
|
156
|
+
workflow.logger.debug(
|
|
157
|
+
"Polling completed",
|
|
158
|
+
extra={
|
|
159
|
+
"endpoint_id": self.endpoint_id,
|
|
160
|
+
"polling_success": polling_result.success,
|
|
161
|
+
"content_length": len(polling_result.content),
|
|
162
|
+
},
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
self.current_step = "detecting_changes"
|
|
166
|
+
|
|
167
|
+
# Step 2: Detect new data using hash comparison
|
|
168
|
+
current_content = polling_result.content
|
|
169
|
+
current_hash = hashlib.sha256(current_content).hexdigest()
|
|
170
|
+
|
|
171
|
+
previous_hash = None
|
|
172
|
+
if previous_completion and "polling_result" in previous_completion:
|
|
173
|
+
previous_hash = previous_completion["polling_result"].get(
|
|
174
|
+
"content_hash"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
has_new_data = previous_hash != current_hash
|
|
178
|
+
self.has_new_data = has_new_data
|
|
179
|
+
|
|
180
|
+
workflow.logger.info(
|
|
181
|
+
f"DEBUG: Change detection - has_new_data: {has_new_data}, "
|
|
182
|
+
f"is_first_run: {previous_hash is None}, "
|
|
183
|
+
f"current_hash: {current_hash[:8]}..., "
|
|
184
|
+
f"previous_hash: {previous_hash[:8] if previous_hash else 'None'}..."
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Step 3: Trigger downstream processing if new data detected
|
|
188
|
+
downstream_triggered = False
|
|
189
|
+
if has_new_data and downstream_pipeline:
|
|
190
|
+
self.current_step = "triggering_downstream"
|
|
191
|
+
|
|
192
|
+
workflow.logger.info(
|
|
193
|
+
"Triggering downstream pipeline",
|
|
194
|
+
extra={
|
|
195
|
+
"endpoint_id": self.endpoint_id,
|
|
196
|
+
"downstream_pipeline": downstream_pipeline,
|
|
197
|
+
"content_length": len(current_content),
|
|
198
|
+
},
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Get previous data for comparison
|
|
202
|
+
previous_data = None
|
|
203
|
+
if previous_completion and "polling_result" in previous_completion:
|
|
204
|
+
prev_content_str = previous_completion["polling_result"].get(
|
|
205
|
+
"content"
|
|
206
|
+
)
|
|
207
|
+
if prev_content_str:
|
|
208
|
+
try:
|
|
209
|
+
previous_data = prev_content_str.encode("utf-8")
|
|
210
|
+
except (UnicodeDecodeError, AttributeError) as e:
|
|
211
|
+
workflow.logger.error(
|
|
212
|
+
"Failed to decode previous content for downstream pipeline",
|
|
213
|
+
extra={
|
|
214
|
+
"endpoint_id": self.endpoint_id,
|
|
215
|
+
"error": str(e),
|
|
216
|
+
"error_type": type(e).__name__,
|
|
217
|
+
},
|
|
218
|
+
)
|
|
219
|
+
raise RuntimeError(
|
|
220
|
+
f"Previous content is corrupted or invalid: {e}"
|
|
221
|
+
)
|
|
222
|
+
elif previous_hash:
|
|
223
|
+
# We have previous run but no content - this is an error
|
|
224
|
+
workflow.logger.error(
|
|
225
|
+
"Previous content not available for downstream pipeline but previous hash exists",
|
|
226
|
+
extra={
|
|
227
|
+
"endpoint_id": self.endpoint_id,
|
|
228
|
+
"previous_hash": previous_hash,
|
|
229
|
+
},
|
|
230
|
+
)
|
|
231
|
+
raise RuntimeError(
|
|
232
|
+
"Previous content is missing from completion result but is required for downstream pipeline"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
downstream_triggered = await self.trigger_downstream_pipeline(
|
|
236
|
+
downstream_pipeline,
|
|
237
|
+
previous_data,
|
|
238
|
+
current_content,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
self.current_step = "completed"
|
|
242
|
+
|
|
243
|
+
# Step 4: Return completion result for next scheduled execution
|
|
244
|
+
completion_result = {
|
|
245
|
+
"polling_result": {
|
|
246
|
+
"success": polling_result.success,
|
|
247
|
+
"content_hash": current_hash,
|
|
248
|
+
"content": current_content.decode("utf-8", errors="ignore"),
|
|
249
|
+
"polled_at": polled_at,
|
|
250
|
+
"content_length": len(current_content),
|
|
251
|
+
},
|
|
252
|
+
"detection_result": {
|
|
253
|
+
"has_new_data": has_new_data,
|
|
254
|
+
"previous_hash": previous_hash,
|
|
255
|
+
"current_hash": current_hash,
|
|
256
|
+
},
|
|
257
|
+
"downstream_triggered": downstream_triggered,
|
|
258
|
+
"endpoint_id": self.endpoint_id,
|
|
259
|
+
"completed_at": workflow.now().isoformat(),
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
workflow.logger.info(
|
|
263
|
+
"New data detection pipeline completed successfully",
|
|
264
|
+
extra={
|
|
265
|
+
"endpoint_id": self.endpoint_id,
|
|
266
|
+
"has_new_data": has_new_data,
|
|
267
|
+
"downstream_triggered": downstream_triggered,
|
|
268
|
+
},
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return completion_result
|
|
272
|
+
|
|
273
|
+
except Exception as e:
|
|
274
|
+
self.current_step = "failed"
|
|
275
|
+
|
|
276
|
+
workflow.logger.error(
|
|
277
|
+
"New data detection pipeline failed",
|
|
278
|
+
extra={
|
|
279
|
+
"endpoint_id": self.endpoint_id,
|
|
280
|
+
"error": str(e),
|
|
281
|
+
"error_type": type(e).__name__,
|
|
282
|
+
"current_step": self.current_step,
|
|
283
|
+
},
|
|
284
|
+
exc_info=True,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Re-raise to let Temporal handle retry logic
|
|
288
|
+
raise
|
|
@@ -4,14 +4,12 @@ Domain layer for the polling contrib module.
|
|
|
4
4
|
This module contains the core domain models, services, and business rules
|
|
5
5
|
for the polling contrib module. It defines the fundamental concepts and
|
|
6
6
|
protocols that govern polling operations.
|
|
7
|
-
"""
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
9
|
+
into Temporal workflows. Import directly from specific modules:
|
|
10
|
+
|
|
11
|
+
- from julee.contrib.polling.domain.models.polling_config import PollingConfig, PollingProtocol, PollingResult
|
|
12
|
+
- from julee.contrib.polling.domain.services.poller import PollerService
|
|
13
|
+
"""
|
|
11
14
|
|
|
12
|
-
__all__ = [
|
|
13
|
-
"PollingConfig",
|
|
14
|
-
"PollingProtocol",
|
|
15
|
-
"PollingResult",
|
|
16
|
-
"PollerService",
|
|
17
|
-
]
|
|
15
|
+
__all__ = []
|
|
@@ -2,12 +2,11 @@
|
|
|
2
2
|
Polling domain models.
|
|
3
3
|
|
|
4
4
|
This module contains the core domain models for the polling contrib module.
|
|
5
|
-
"""
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
7
|
+
into Temporal workflows. Import directly from specific modules:
|
|
8
|
+
|
|
9
|
+
- from julee.contrib.polling.domain.models.polling_config import PollingConfig, PollingProtocol, PollingResult
|
|
10
|
+
"""
|
|
8
11
|
|
|
9
|
-
__all__ = [
|
|
10
|
-
"PollingConfig",
|
|
11
|
-
"PollingProtocol",
|
|
12
|
-
"PollingResult",
|
|
13
|
-
]
|
|
12
|
+
__all__ = []
|
|
@@ -9,7 +9,7 @@ from datetime import datetime, timezone
|
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
-
from pydantic import BaseModel, Field
|
|
12
|
+
from pydantic import BaseModel, Field, field_validator
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PollingProtocol(str, Enum):
|
|
@@ -37,3 +37,20 @@ class PollingResult(BaseModel):
|
|
|
37
37
|
polled_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
38
38
|
content_hash: str | None = None
|
|
39
39
|
error_message: str | None = None
|
|
40
|
+
|
|
41
|
+
@field_validator("content", mode="before")
|
|
42
|
+
@classmethod
|
|
43
|
+
def validate_content(cls, v):
|
|
44
|
+
"""Convert list of integers to bytes (for Temporal serialization compatibility)."""
|
|
45
|
+
if isinstance(v, list):
|
|
46
|
+
# Temporal may serialize bytes as list of integers
|
|
47
|
+
return bytes(v)
|
|
48
|
+
elif isinstance(v, str):
|
|
49
|
+
# Handle string input
|
|
50
|
+
return v.encode("utf-8")
|
|
51
|
+
elif isinstance(v, bytes):
|
|
52
|
+
return v
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"Content must be bytes, string, or list of integers, got {type(v)}"
|
|
56
|
+
)
|
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
Polling domain services.
|
|
3
3
|
|
|
4
4
|
This module contains the service protocols for the polling contrib module.
|
|
5
|
-
"""
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
7
|
+
into Temporal workflows. Import directly from specific modules:
|
|
8
|
+
|
|
9
|
+
- from julee.contrib.polling.domain.services.poller import PollerService
|
|
10
|
+
"""
|
|
8
11
|
|
|
9
|
-
__all__ = [
|
|
10
|
-
"PollerService",
|
|
11
|
-
]
|
|
12
|
+
__all__ = []
|
|
@@ -3,13 +3,14 @@ Infrastructure layer for the polling contrib module.
|
|
|
3
3
|
|
|
4
4
|
This module contains the concrete implementations of domain protocols
|
|
5
5
|
and external system integrations for the polling contrib module.
|
|
6
|
-
"""
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
8
|
+
into Temporal workflows. Import directly from specific modules:
|
|
9
|
+
|
|
10
|
+
- from julee.contrib.polling.infrastructure.services.polling.http import HttpPollerService
|
|
11
|
+
- from julee.contrib.polling.infrastructure.temporal.manager import PollingManager
|
|
12
|
+
- from julee.contrib.polling.infrastructure.temporal.proxies import WorkflowPollerServiceProxy
|
|
13
|
+
- from julee.contrib.polling.infrastructure.temporal.activities import TemporalPollerService
|
|
14
|
+
"""
|
|
10
15
|
|
|
11
|
-
__all__ = [
|
|
12
|
-
"HttpPollerService",
|
|
13
|
-
"TemporalPollerService",
|
|
14
|
-
"WorkflowPollerServiceProxy",
|
|
15
|
-
]
|
|
16
|
+
__all__ = []
|
|
@@ -3,10 +3,11 @@ Infrastructure services for the polling contrib module.
|
|
|
3
3
|
|
|
4
4
|
This module contains the concrete implementations of domain services
|
|
5
5
|
for the polling contrib module.
|
|
6
|
-
"""
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
No re-exports to avoid import chains that pull non-deterministic code
|
|
8
|
+
into Temporal workflows. Import directly from specific modules:
|
|
9
|
+
|
|
10
|
+
- from julee.contrib.polling.infrastructure.services.polling.http import HttpPollerService
|
|
11
|
+
"""
|
|
9
12
|
|
|
10
|
-
__all__ = [
|
|
11
|
-
"HttpPollerService",
|
|
12
|
-
]
|
|
13
|
+
__all__ = []
|