PyPI - julee - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

julee 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

julee/api/tests/routers/test_documents.py +6 -6
julee/docs/sphinx_hcd/__init__.py +4 -10
julee/docs/sphinx_hcd/accelerators.py +277 -180
julee/docs/sphinx_hcd/apps.py +78 -59
julee/docs/sphinx_hcd/config.py +16 -16
julee/docs/sphinx_hcd/epics.py +47 -42
julee/docs/sphinx_hcd/integrations.py +53 -49
julee/docs/sphinx_hcd/journeys.py +124 -110
julee/docs/sphinx_hcd/personas.py +75 -53
julee/docs/sphinx_hcd/stories.py +99 -71
julee/docs/sphinx_hcd/utils.py +23 -18
julee/domain/models/document/document.py +12 -21
julee/domain/models/document/tests/test_document.py +14 -34
julee/domain/use_cases/extract_assemble_data.py +1 -1
julee/domain/use_cases/initialize_system_data.py +75 -21
julee/fixtures/documents.yaml +4 -43
julee/fixtures/knowledge_service_queries.yaml +9 -0
julee/maintenance/release.py +85 -30
julee/repositories/memory/document.py +19 -13
julee/repositories/memory/tests/test_document.py +18 -18
julee/repositories/minio/document.py +25 -22
julee/repositories/minio/tests/test_document.py +16 -16
{julee-0.1.3.dist-info → julee-0.1.4.dist-info}/METADATA +2 -3
{julee-0.1.3.dist-info → julee-0.1.4.dist-info}/RECORD +27 -28
julee/fixtures/assembly_specifications.yaml +0 -70
{julee-0.1.3.dist-info → julee-0.1.4.dist-info}/WHEEL +0 -0
{julee-0.1.3.dist-info → julee-0.1.4.dist-info}/licenses/LICENSE +0 -0
{julee-0.1.3.dist-info → julee-0.1.4.dist-info}/top_level.txt +0 -0

julee/domain/models/document/tests/test_document.py CHANGED Viewed

@@ -203,32 +203,12 @@ class TestDocumentSerialization:
 class TestDocumentContentValidation:
-    """Test Document content and content_string validation rules."""
+    """Test Document content and content_bytes validation rules."""
-    def test_document_with_both_content_and_content_string_fails(
-        self,
-    ) -> None:
-        """Test that both content and content_string raises error."""
-        content_stream = ContentStreamFactory.build()
-        content_string = '{"type": "string"}'
-        with pytest.raises(
-            ValueError, match="cannot have both content and content_string"
-        ):
-            Document(
-                document_id="test-doc-both",
-                original_filename="both.json",
-                content_type="application/json",
-                size_bytes=100,
-                content_multihash="test_hash",
-                content=content_stream,
-                content_string=content_string,
-            )
-    def test_document_without_content_or_content_string_fails(self) -> None:
-        """Test that no content or content_string raises error."""
+    def test_document_without_content_or_content_bytes_fails(self) -> None:
+        """Test that no content or content_bytes raises error."""
         with pytest.raises(
-            ValueError, match="must have either content or content_string"
+            ValueError, match="must have one of: content, or content_bytes."
         ):
             Document(
                 document_id="test-doc-no-content",
@@ -237,7 +217,7 @@ class TestDocumentContentValidation:
                 size_bytes=100,
                 content_multihash="test_hash",
                 content=None,
-                content_string=None,
+                content_bytes=None,
             )
     def test_document_with_content_only_succeeds(self) -> None:
@@ -251,15 +231,15 @@ class TestDocumentContentValidation:
             size_bytes=100,
             content_multihash="test_hash",
             content=content_stream,
-            content_string=None,
+            content_bytes=None,
         )
         assert doc.content is not None
-        assert doc.content_string is None
+        assert doc.content_bytes is None
-    def test_document_with_content_string_only_succeeds(self) -> None:
-        """Test that document with only content_string field succeeds."""
-        content_string = '{"type": "string"}'
+    def test_document_with_content_bytes_only_succeeds(self) -> None:
+        """Test that document with only content_bytes field succeeds."""
+        content_bytes = b'{"type": "string"}'
         doc = Document(
             document_id="test-doc-string",
@@ -268,11 +248,11 @@ class TestDocumentContentValidation:
             size_bytes=100,
             content_multihash="test_hash",
             content=None,
-            content_string=content_string,
+            content_bytes=content_bytes,
         )
         assert doc.content is None
-        assert doc.content_string == content_string
+        assert doc.content_bytes == content_bytes
     def test_document_deserialization_with_empty_content_succeeds(
         self,
@@ -287,7 +267,7 @@ class TestDocumentContentValidation:
             "size_bytes": 100,
             "content_multihash": "test_hash",
             "content": None,
-            "content_string": None,
+            "content_bytes": None,
         }
         # Should succeed with temporal_validation context
@@ -297,4 +277,4 @@ class TestDocumentContentValidation:
         assert doc.document_id == "test-temporal"
         assert doc.content is None
-        assert doc.content_string is None
+        assert doc.content_bytes is None

julee/domain/use_cases/extract_assemble_data.py CHANGED Viewed

@@ -594,7 +594,7 @@ text or markdown formatting."""
             size_bytes=len(content_bytes),
             content_multihash=self._calculate_multihash_from_content(content_bytes),
             status=DocumentStatus.ASSEMBLED,
-            content_string=assembled_content,  # Use content_string for small
+            content_bytes=assembled_content,
             created_at=self.now_fn(),
             updated_at=self.now_fn(),
         )

julee/domain/use_cases/initialize_system_data.py CHANGED Viewed

@@ -13,6 +13,7 @@ The use case follows clean architecture principles:
 """
 import hashlib
+import json
 import logging
 from datetime import datetime, timezone
 from pathlib import Path
@@ -535,7 +536,7 @@ class InitializeSystemDataUseCase:
     def _load_fixture_assembly_specifications(self) -> list[dict[str, Any]]:
         """
-        Load assembly specifications from the YAML fixture file.
+        Load assembly specifications from a YAML or JSON fixture file.
         Returns:
             List of specification dictionaries from the fixture file
@@ -543,23 +544,34 @@ class InitializeSystemDataUseCase:
         Raises:
             FileNotFoundError: If the fixture file doesn't exist
             yaml.YAMLError: If the fixture file is invalid YAML
+            json.JSONDecodeError: If the fixture file is invalid JSON
             KeyError: If required fields are missing from the fixture
+            ValueError: If the specification section is malformed
         """
-        fixture_path = self._get_demo_fixture_path("assembly_specifications.yaml")
+        # Accept both .yaml and .json files
+        fixture_path = None
+        for ext in ("json", "yaml"):
+            candidate = self._get_demo_fixture_path(f"assembly_specifications.{ext}")
+            if candidate.exists():
+                fixture_path = candidate
+                break
+        if fixture_path is None:
+            raise FileNotFoundError(
+                "Assembly specifications fixture file not found (.yaml or .json)"
+            )
         self.logger.debug(
             "Loading assembly specifications fixture file",
             extra={"fixture_path": str(fixture_path)},
         )
-        if not fixture_path.exists():
-            raise FileNotFoundError(
-                f"Assembly specifications fixture file not found: {fixture_path}"
-            )
         try:
             with open(fixture_path, encoding="utf-8") as f:
-                fixture_data = yaml.safe_load(f)
+                if fixture_path.suffix.lower() == ".json":
+                    fixture_data = json.load(f)
+                else:
+                    fixture_data = yaml.safe_load(f)
             if not fixture_data or "assembly_specifications" not in fixture_data:
                 raise KeyError(
@@ -569,8 +581,7 @@ class InitializeSystemDataUseCase:
             specs = fixture_data["assembly_specifications"]
             if not isinstance(specs, list):
                 raise ValueError(
-                    "'assembly_specifications' must be a list of "
-                    "specification configurations"
+                    "'assembly_specifications' must be a list of specification configurations"
                 )
             self.logger.debug(
@@ -585,6 +596,13 @@ class InitializeSystemDataUseCase:
                 f"Invalid YAML in assembly specifications fixture file: {e}"
             )
+        except json.JSONDecodeError as e:
+            raise json.JSONDecodeError(
+                f"Invalid JSON in assembly specifications fixture file: {e}",
+                e.doc,
+                e.pos,
+            )
     def _create_assembly_spec_from_fixture_data(
         self, spec_data: dict[str, Any]
     ) -> AssemblySpecification:
@@ -782,24 +800,62 @@ class InitializeSystemDataUseCase:
             "document_id",
             "original_filename",
             "content_type",
-            "content",
         ]
-        # Validate required fields
         for field in required_fields:
             if field not in doc_data:
                 raise KeyError(f"Required field '{field}' missing from document")
-        # Get content and calculate hash
-        content = doc_data["content"]
-        content_bytes = content.encode("utf-8")
-        size_bytes = len(content_bytes)
+        content_type = doc_data["content_type"]
+        is_text = content_type.startswith("text/") or content_type in {
+            "application/json",
+            "application/xml",
+            "application/javascript",
+        }
+        if "content" in doc_data:
+            content = doc_data["content"]
+            if isinstance(content, bytes):
+                content_bytes = content
+            elif isinstance(content, str):
+                content_bytes = content.encode("utf-8")
+            else:
+                raise TypeError(
+                    f"Unsupported type for 'content': {type(content)!r}. Expected str or bytes."
+                )
+        else:
+            current_file = Path(__file__)
+            julee_dir = current_file.parent.parent.parent
+            fixture_path = julee_dir / "fixtures" / doc_data["original_filename"]
-        # Create multihash (using SHA-256)
+            open_mode = "r" if is_text else "rb"
+            encoding = "utf-8" if is_text else None
+            try:
+                with fixture_path.open(open_mode, encoding=encoding) as f:
+                    content = f.read()
+            except FileNotFoundError as e:
+                self.logger.error(
+                    "Fixture file not found for document",
+                    extra={
+                        "document_id": doc_data["document_id"],
+                        "fixture_path": str(fixture_path),
+                    },
+                )
+                raise FileNotFoundError(
+                    f"Fixture file '{fixture_path}' not found for document "
+                    f"{doc_data['document_id']}"
+                ) from e
+            content_bytes = content.encode("utf-8") if is_text else content
+            self.logger.info(content_bytes)
+        size_bytes = len(content_bytes)
         sha256_hash = hashlib.sha256(content_bytes).hexdigest()
         content_multihash = f"sha256-{sha256_hash}"
-        # Parse status
         status = DocumentStatus.CAPTURED
         if "status" in doc_data:
             try:
@@ -809,12 +865,10 @@ class InitializeSystemDataUseCase:
                     f"Invalid status '{doc_data['status']}', using default 'captured'"
                 )
-        # Get optional fields
         knowledge_service_id = doc_data.get("knowledge_service_id")
         assembly_types = doc_data.get("assembly_types", [])
         additional_metadata = doc_data.get("additional_metadata", {})
-        # Create document
         document = Document(
             document_id=doc_data["document_id"],
             original_filename=doc_data["original_filename"],
@@ -827,7 +881,7 @@ class InitializeSystemDataUseCase:
             created_at=datetime.now(timezone.utc),
             updated_at=datetime.now(timezone.utc),
             additional_metadata=additional_metadata,
-            content_string=content,  # Store content as string for fixtures
+            content_bytes=content_bytes,
         )
         self.logger.debug(

julee/fixtures/documents.yaml CHANGED Viewed

@@ -9,6 +9,10 @@
 # repository if they don't already exist (idempotent loading).
 documents:
+  - document_id: "product-spec-sheet"
+    original_filename: "Spec-Sheet-BondorPanel-v17.pdf"
+    content_type: "application/pdf"
+    status: "captured"
   - document_id: "meeting-transcript-q1-planning"
     original_filename: "q1_planning_meeting.txt"
     content_type: "text/plain"
@@ -19,49 +23,6 @@ documents:
       attendee_count: 3
       duration_minutes: 90
       department: "product"
-    content: |
-      Meeting Transcript - Q1 Planning Session
-      Date: March 15, 2024
-      Time: 2:00 PM - 3:30 PM
-      Attendees: Sarah Chen (Product Manager), Mike Rodriguez (Engineering Lead),
-      Lisa Wang (Designer)
-      Sarah: Thanks everyone for joining. Let's kick off our Q1 planning. Mike,
-      can you give us an update on the current sprint?
-      Mike: Sure, we're about 80% through sprint 23. We've completed the user
-      authentication module and are working on the data migration tool. Should be
-      done by Friday.
-      Lisa: Great! I've finished the mockups for the dashboard redesign. Sarah,
-      have you had a chance to review them?
-      Sarah: Yes, they look fantastic. I especially like the new navigation
-      structure. When can we start implementation?
-      Mike: I'd estimate 2 weeks for the frontend work, plus another week for
-      backend API changes.
-      Lisa: I can start on the component library updates while Mike works on the
-      APIs.
-      Sarah: Perfect. Let's also discuss the customer feedback integration. We had
-      47 responses to our survey.
-      Mike: The main requests were for better reporting and mobile optimization.
-      Sarah: Those should be our next priorities then. Lisa, can you start
-      sketching mobile designs?
-      Lisa: Absolutely. I'll have initial concepts by next Tuesday.
-      Sarah: Excellent. Any other items?
-      Mike: Just a heads up that we'll need to schedule downtime for the database
-      migration, probably next weekend.
-      Sarah: Noted. I'll coordinate with support. Meeting adjourned at 3:30 PM.
   - document_id: "customer-feedback-survey-q4"
     original_filename: "customer_survey_results_q4_2023.txt"
     content_type: "text/plain"

julee/fixtures/knowledge_service_queries.yaml CHANGED Viewed

@@ -1,4 +1,13 @@
 knowledge_service_queries:
+  - query_id: "generate-dpp"
+    name: "Generate Digital Product Passport"
+    knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
+    prompt: "From this product specification sheet, extract the product information to generate a Digital Product Passport, that conforms to the provided schema, including the issuer, the credential subject and the validation dates. Please make sure that the DPP conforms to the provided schema and types and that you don't add any other fields."
+    assistant_prompt: "Looking at the product specification sheet, here's the digital product passport that conforms to the provided schema, without surrounding ```json ... ``` markers:"
+    query_metadata:
+      max_tokens: 3000
+      temperature: 0.1
   - query_id: "extract-meeting-info-query"
     name: "Extract Meeting Information"
     knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"

julee/maintenance/release.py CHANGED Viewed

@@ -3,17 +3,21 @@
 Release preparation and tagging script.
 Usage:
-    release.py prepare X.Y.Z  # Create release branch and PR
-    release.py tag X.Y.Z      # Tag after PR is merged
+    release.py prepare X.Y.Z [--message-file FILE]  # Create release branch and PR
+    release.py tag X.Y.Z                            # Tag after PR is merged
 """
+import argparse
 import re
 import subprocess
 import sys
+import tempfile
 from pathlib import Path
-def run(cmd: str, check: bool = True, capture: bool = True) -> subprocess.CompletedProcess:
+def run(
+    cmd: str, check: bool = True, capture: bool = True
+) -> subprocess.CompletedProcess:
     """Run a shell command."""
     result = subprocess.run(cmd, shell=True, capture_output=capture, text=True)
     if check and result.returncode != 0:
@@ -35,7 +39,9 @@ def get_package_init(repo_root: Path) -> Path | None:
     src_dir = repo_root / "src"
     if not src_dir.exists():
         return None
-    packages = [p for p in src_dir.iterdir() if p.is_dir() and not p.name.startswith("_")]
+    packages = [
+        p for p in src_dir.iterdir() if p.is_dir() and not p.name.startswith("_")
+    ]
     if len(packages) != 1:
         # Multiple packages (bounded contexts) - no single __init__.py to update
         return None
@@ -48,7 +54,10 @@ def get_package_init(repo_root: Path) -> Path | None:
 def validate_version(version: str) -> None:
     """Validate version string format."""
     if not re.match(r"^\d+\.\d+\.\d+$", version):
-        print(f"ERROR: Invalid version format '{version}'. Expected X.Y.Z", file=sys.stderr)
+        print(
+            f"ERROR: Invalid version format '{version}'. Expected X.Y.Z",
+            file=sys.stderr,
+        )
         sys.exit(1)
@@ -65,18 +74,26 @@ def validate_git_state(require_master: bool = True) -> None:
         result = run("git branch --show-current")
         branch = result.stdout.strip()
         if branch not in ("master", "main"):
-            print(f"ERROR: Must be on master or main branch, currently on '{branch}'", file=sys.stderr)
+            print(
+                f"ERROR: Must be on master or main branch, currently on '{branch}'",
+                file=sys.stderr,
+            )
             sys.exit(1)
         # Check we're up to date with remote
         run("git fetch origin")
-        result = run("git rev-list HEAD...origin/master --count 2>/dev/null || git rev-list HEAD...origin/main --count", check=False)
+        result = run(
+            "git rev-list HEAD...origin/master --count 2>/dev/null || git rev-list HEAD...origin/main --count",
+            check=False,
+        )
         if result.stdout.strip() != "0":
             print("ERROR: Branch is not up to date with remote", file=sys.stderr)
             sys.exit(1)
-def update_version_in_file(file_path: Path, version: str, pattern: str, replacement: str) -> None:
+def update_version_in_file(
+    file_path: Path, version: str, pattern: str, replacement: str
+) -> None:
     """Update version string in a file."""
     content = file_path.read_text()
     new_content = re.sub(pattern, replacement, content, flags=re.MULTILINE)
@@ -85,11 +102,19 @@ def update_version_in_file(file_path: Path, version: str, pattern: str, replacem
     file_path.write_text(new_content)
-def prepare(version: str) -> None:
+def prepare(version: str, message_file: Path | None = None) -> None:
     """Prepare a release: create branch, update versions, push, create PR."""
     validate_version(version)
     validate_git_state(require_master=True)
+    # Read release notes if provided
+    release_notes = None
+    if message_file:
+        if not message_file.exists():
+            print(f"ERROR: Message file not found: {message_file}", file=sys.stderr)
+            sys.exit(1)
+        release_notes = message_file.read_text().strip()
     repo_root = get_repo_root()
     branch_name = f"release/v{version}"
@@ -118,24 +143,47 @@ def prepare(version: str) -> None:
             f'__version__ = "{version}"',
         )
-    # Commit
+    # Commit with release notes or default message
     print("Committing version bump...")
-    run(f'git add -A && git commit -m "release: bump version to {version}"')
+    if release_notes:
+        commit_msg = f"release: v{version}\n\n{release_notes}"
+    else:
+        commit_msg = f"release: bump version to {version}"
+    # Use a temp file for the commit message to handle multiline properly
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
+        f.write(commit_msg)
+        commit_msg_file = f.name
+    try:
+        run(f'git add -A && git commit -F "{commit_msg_file}"')
+    finally:
+        Path(commit_msg_file).unlink()
     # Push
     print(f"Pushing {branch_name}...")
     run(f"git push -u origin {branch_name}")
-    # Create PR
+    # Create PR with release notes as body
     print("Creating pull request...")
-    result = run(
-        f'gh pr create --title "Release v{version}" --body "Bump version to {version}"',
-        check=False,
-    )
+    pr_body = release_notes if release_notes else f"Bump version to {version}"
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
+        f.write(pr_body)
+        pr_body_file = f.name
+    try:
+        result = run(
+            f'gh pr create --title "Release v{version}" --body-file "{pr_body_file}"',
+            check=False,
+        )
+    finally:
+        Path(pr_body_file).unlink()
     if result.returncode != 0:
         print(f"\nTo create PR manually:\n  gh pr create --title 'Release v{version}'")
-    print(f"\nRelease branch ready. After PR is merged, run:\n  ./maintenance/release.py tag {version}")
+    print(
+        f"\nRelease branch ready. After PR is merged, run:\n  ./maintenance/release.py tag {version}"
+    )
 def tag(version: str) -> None:
@@ -167,21 +215,28 @@ def tag(version: str) -> None:
 def main() -> None:
-    if len(sys.argv) < 3:
-        print(__doc__)
-        sys.exit(1)
+    parser = argparse.ArgumentParser(description="Release preparation and tagging script")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    # prepare subcommand
+    prepare_parser = subparsers.add_parser("prepare", help="Create release branch and PR")
+    prepare_parser.add_argument("version", help="Version number (X.Y.Z)")
+    prepare_parser.add_argument(
+        "--message-file", "-m",
+        type=Path,
+        help="File containing release notes for commit message and PR body",
+    )
-    command = sys.argv[1]
-    version = sys.argv[2]
+    # tag subcommand
+    tag_parser = subparsers.add_parser("tag", help="Tag after PR is merged")
+    tag_parser.add_argument("version", help="Version number (X.Y.Z)")
-    if command == "prepare":
-        prepare(version)
-    elif command == "tag":
-        tag(version)
-    else:
-        print(f"Unknown command: {command}", file=sys.stderr)
-        print(__doc__)
-        sys.exit(1)
+    args = parser.parse_args()
+    if args.command == "prepare":
+        prepare(args.version, args.message_file)
+    elif args.command == "tag":
+        tag(args.version)
 if __name__ == "__main__":

julee/repositories/memory/document.py CHANGED Viewed

@@ -60,46 +60,52 @@ class MemoryDocumentRepository(DocumentRepository, MemoryRepositoryMixin[Documen
     async def save(self, document: Document) -> None:
         """Save a document with its content and metadata.
-        If the document has content_string, it will be converted to a
-        ContentStream and the content hash will be calculated automatically.
+        If the document has content_bytes, it will be normalized to bytes
+        (encoding str as UTF-8), converted to a ContentStream and the
+        content hash will be calculated automatically.
         Args:
             document: Document object to save
         Raises:
-            ValueError: If document has no content or content_string
+            ValueError: If document has no content or content_bytes
+            TypeError: If content_bytes is not bytes or str
         """
         # Handle content_string conversion (only if no content provided)
-        if document.content_string is not None:
-            # Convert content_string to ContentStream
-            assert document.content_string is not None  # For MyPy
-            content_bytes = document.content_string.encode("utf-8")
-            content_stream = ContentStream(io.BytesIO(content_bytes))
+        if document.content_bytes is not None:
+            if isinstance(document.content_bytes, str):
+                raw_bytes = document.content_bytes.encode("utf-8")
+            elif isinstance(document.content_bytes, bytes):
+                raw_bytes = document.content_bytes
+            else:
+                raise TypeError("content_bytes must be of type 'bytes' or 'str'.")
+            content_stream = ContentStream(io.BytesIO(raw_bytes))
             # Calculate content hash
-            content_hash = hashlib.sha256(content_bytes).hexdigest()
+            content_hash = hashlib.sha256(raw_bytes).hexdigest()
             # Create new document with ContentStream and calculated hash
             document = document.model_copy(
                 update={
                     "content": content_stream,
                     "content_multihash": content_hash,
-                    "size_bytes": len(content_bytes),
+                    "size_bytes": len(raw_bytes),
                 }
             )
             self.logger.debug(
-                "Converted content_string to ContentStream for document save",
+                "Converted content_bytes to ContentStream for document save",
                 extra={
                     "document_id": document.document_id,
                     "content_hash": content_hash,
-                    "content_length": len(content_bytes),
+                    "content_length": len(raw_bytes),
                 },
             )
         # Create a copy without content_string (content saved
         # in separate content-addressable storage)
-        document_for_storage = document.model_copy(update={"content_string": None})
+        document_for_storage = document.model_copy(update={"content_bytes": None})
         self.save_entity(document_for_storage, "document_id")
     async def generate_id(self) -> str:

julee 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

julee 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl