julee 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -203,32 +203,12 @@ class TestDocumentSerialization:
203
203
 
204
204
 
205
205
  class TestDocumentContentValidation:
206
- """Test Document content and content_string validation rules."""
206
+ """Test Document content and content_bytes validation rules."""
207
207
 
208
- def test_document_with_both_content_and_content_string_fails(
209
- self,
210
- ) -> None:
211
- """Test that both content and content_string raises error."""
212
- content_stream = ContentStreamFactory.build()
213
- content_string = '{"type": "string"}'
214
-
215
- with pytest.raises(
216
- ValueError, match="cannot have both content and content_string"
217
- ):
218
- Document(
219
- document_id="test-doc-both",
220
- original_filename="both.json",
221
- content_type="application/json",
222
- size_bytes=100,
223
- content_multihash="test_hash",
224
- content=content_stream,
225
- content_string=content_string,
226
- )
227
-
228
- def test_document_without_content_or_content_string_fails(self) -> None:
229
- """Test that no content or content_string raises error."""
208
+ def test_document_without_content_or_content_bytes_fails(self) -> None:
209
+ """Test that no content or content_bytes raises error."""
230
210
  with pytest.raises(
231
- ValueError, match="must have either content or content_string"
211
+ ValueError, match="must have one of: content, or content_bytes."
232
212
  ):
233
213
  Document(
234
214
  document_id="test-doc-no-content",
@@ -237,7 +217,7 @@ class TestDocumentContentValidation:
237
217
  size_bytes=100,
238
218
  content_multihash="test_hash",
239
219
  content=None,
240
- content_string=None,
220
+ content_bytes=None,
241
221
  )
242
222
 
243
223
  def test_document_with_content_only_succeeds(self) -> None:
@@ -251,15 +231,15 @@ class TestDocumentContentValidation:
251
231
  size_bytes=100,
252
232
  content_multihash="test_hash",
253
233
  content=content_stream,
254
- content_string=None,
234
+ content_bytes=None,
255
235
  )
256
236
 
257
237
  assert doc.content is not None
258
- assert doc.content_string is None
238
+ assert doc.content_bytes is None
259
239
 
260
- def test_document_with_content_string_only_succeeds(self) -> None:
261
- """Test that document with only content_string field succeeds."""
262
- content_string = '{"type": "string"}'
240
+ def test_document_with_content_bytes_only_succeeds(self) -> None:
241
+ """Test that document with only content_bytes field succeeds."""
242
+ content_bytes = b'{"type": "string"}'
263
243
 
264
244
  doc = Document(
265
245
  document_id="test-doc-string",
@@ -268,11 +248,11 @@ class TestDocumentContentValidation:
268
248
  size_bytes=100,
269
249
  content_multihash="test_hash",
270
250
  content=None,
271
- content_string=content_string,
251
+ content_bytes=content_bytes,
272
252
  )
273
253
 
274
254
  assert doc.content is None
275
- assert doc.content_string == content_string
255
+ assert doc.content_bytes == content_bytes
276
256
 
277
257
  def test_document_deserialization_with_empty_content_succeeds(
278
258
  self,
@@ -287,7 +267,7 @@ class TestDocumentContentValidation:
287
267
  "size_bytes": 100,
288
268
  "content_multihash": "test_hash",
289
269
  "content": None,
290
- "content_string": None,
270
+ "content_bytes": None,
291
271
  }
292
272
 
293
273
  # Should succeed with temporal_validation context
@@ -297,4 +277,4 @@ class TestDocumentContentValidation:
297
277
 
298
278
  assert doc.document_id == "test-temporal"
299
279
  assert doc.content is None
300
- assert doc.content_string is None
280
+ assert doc.content_bytes is None
@@ -594,7 +594,7 @@ text or markdown formatting."""
594
594
  size_bytes=len(content_bytes),
595
595
  content_multihash=self._calculate_multihash_from_content(content_bytes),
596
596
  status=DocumentStatus.ASSEMBLED,
597
- content_string=assembled_content, # Use content_string for small
597
+ content_bytes=assembled_content,
598
598
  created_at=self.now_fn(),
599
599
  updated_at=self.now_fn(),
600
600
  )
@@ -13,6 +13,7 @@ The use case follows clean architecture principles:
13
13
  """
14
14
 
15
15
  import hashlib
16
+ import json
16
17
  import logging
17
18
  from datetime import datetime, timezone
18
19
  from pathlib import Path
@@ -535,7 +536,7 @@ class InitializeSystemDataUseCase:
535
536
 
536
537
  def _load_fixture_assembly_specifications(self) -> list[dict[str, Any]]:
537
538
  """
538
- Load assembly specifications from the YAML fixture file.
539
+ Load assembly specifications from a YAML or JSON fixture file.
539
540
 
540
541
  Returns:
541
542
  List of specification dictionaries from the fixture file
@@ -543,23 +544,34 @@ class InitializeSystemDataUseCase:
543
544
  Raises:
544
545
  FileNotFoundError: If the fixture file doesn't exist
545
546
  yaml.YAMLError: If the fixture file is invalid YAML
547
+ json.JSONDecodeError: If the fixture file is invalid JSON
546
548
  KeyError: If required fields are missing from the fixture
549
+ ValueError: If the specification section is malformed
547
550
  """
548
- fixture_path = self._get_demo_fixture_path("assembly_specifications.yaml")
551
+ # Accept both .yaml and .json files
552
+ fixture_path = None
553
+ for ext in ("json", "yaml"):
554
+ candidate = self._get_demo_fixture_path(f"assembly_specifications.{ext}")
555
+ if candidate.exists():
556
+ fixture_path = candidate
557
+ break
558
+
559
+ if fixture_path is None:
560
+ raise FileNotFoundError(
561
+ "Assembly specifications fixture file not found (.yaml or .json)"
562
+ )
549
563
 
550
564
  self.logger.debug(
551
565
  "Loading assembly specifications fixture file",
552
566
  extra={"fixture_path": str(fixture_path)},
553
567
  )
554
568
 
555
- if not fixture_path.exists():
556
- raise FileNotFoundError(
557
- f"Assembly specifications fixture file not found: {fixture_path}"
558
- )
559
-
560
569
  try:
561
570
  with open(fixture_path, encoding="utf-8") as f:
562
- fixture_data = yaml.safe_load(f)
571
+ if fixture_path.suffix.lower() == ".json":
572
+ fixture_data = json.load(f)
573
+ else:
574
+ fixture_data = yaml.safe_load(f)
563
575
 
564
576
  if not fixture_data or "assembly_specifications" not in fixture_data:
565
577
  raise KeyError(
@@ -569,8 +581,7 @@ class InitializeSystemDataUseCase:
569
581
  specs = fixture_data["assembly_specifications"]
570
582
  if not isinstance(specs, list):
571
583
  raise ValueError(
572
- "'assembly_specifications' must be a list of "
573
- "specification configurations"
584
+ "'assembly_specifications' must be a list of specification configurations"
574
585
  )
575
586
 
576
587
  self.logger.debug(
@@ -585,6 +596,13 @@ class InitializeSystemDataUseCase:
585
596
  f"Invalid YAML in assembly specifications fixture file: {e}"
586
597
  )
587
598
 
599
+ except json.JSONDecodeError as e:
600
+ raise json.JSONDecodeError(
601
+ f"Invalid JSON in assembly specifications fixture file: {e}",
602
+ e.doc,
603
+ e.pos,
604
+ )
605
+
588
606
  def _create_assembly_spec_from_fixture_data(
589
607
  self, spec_data: dict[str, Any]
590
608
  ) -> AssemblySpecification:
@@ -782,24 +800,62 @@ class InitializeSystemDataUseCase:
782
800
  "document_id",
783
801
  "original_filename",
784
802
  "content_type",
785
- "content",
786
803
  ]
787
804
 
788
- # Validate required fields
789
805
  for field in required_fields:
790
806
  if field not in doc_data:
791
807
  raise KeyError(f"Required field '{field}' missing from document")
792
808
 
793
- # Get content and calculate hash
794
- content = doc_data["content"]
795
- content_bytes = content.encode("utf-8")
796
- size_bytes = len(content_bytes)
809
+ content_type = doc_data["content_type"]
810
+ is_text = content_type.startswith("text/") or content_type in {
811
+ "application/json",
812
+ "application/xml",
813
+ "application/javascript",
814
+ }
815
+
816
+ if "content" in doc_data:
817
+ content = doc_data["content"]
818
+
819
+ if isinstance(content, bytes):
820
+ content_bytes = content
821
+ elif isinstance(content, str):
822
+ content_bytes = content.encode("utf-8")
823
+ else:
824
+ raise TypeError(
825
+ f"Unsupported type for 'content': {type(content)!r}. Expected str or bytes."
826
+ )
827
+ else:
828
+ current_file = Path(__file__)
829
+ julee_dir = current_file.parent.parent.parent
830
+ fixture_path = julee_dir / "fixtures" / doc_data["original_filename"]
797
831
 
798
- # Create multihash (using SHA-256)
832
+ open_mode = "r" if is_text else "rb"
833
+ encoding = "utf-8" if is_text else None
834
+
835
+ try:
836
+ with fixture_path.open(open_mode, encoding=encoding) as f:
837
+ content = f.read()
838
+ except FileNotFoundError as e:
839
+ self.logger.error(
840
+ "Fixture file not found for document",
841
+ extra={
842
+ "document_id": doc_data["document_id"],
843
+ "fixture_path": str(fixture_path),
844
+ },
845
+ )
846
+ raise FileNotFoundError(
847
+ f"Fixture file '{fixture_path}' not found for document "
848
+ f"{doc_data['document_id']}"
849
+ ) from e
850
+
851
+ content_bytes = content.encode("utf-8") if is_text else content
852
+
853
+ self.logger.info(content_bytes)
854
+
855
+ size_bytes = len(content_bytes)
799
856
  sha256_hash = hashlib.sha256(content_bytes).hexdigest()
800
857
  content_multihash = f"sha256-{sha256_hash}"
801
858
 
802
- # Parse status
803
859
  status = DocumentStatus.CAPTURED
804
860
  if "status" in doc_data:
805
861
  try:
@@ -809,12 +865,10 @@ class InitializeSystemDataUseCase:
809
865
  f"Invalid status '{doc_data['status']}', using default 'captured'"
810
866
  )
811
867
 
812
- # Get optional fields
813
868
  knowledge_service_id = doc_data.get("knowledge_service_id")
814
869
  assembly_types = doc_data.get("assembly_types", [])
815
870
  additional_metadata = doc_data.get("additional_metadata", {})
816
871
 
817
- # Create document
818
872
  document = Document(
819
873
  document_id=doc_data["document_id"],
820
874
  original_filename=doc_data["original_filename"],
@@ -827,7 +881,7 @@ class InitializeSystemDataUseCase:
827
881
  created_at=datetime.now(timezone.utc),
828
882
  updated_at=datetime.now(timezone.utc),
829
883
  additional_metadata=additional_metadata,
830
- content_string=content, # Store content as string for fixtures
884
+ content_bytes=content_bytes,
831
885
  )
832
886
 
833
887
  self.logger.debug(
@@ -9,6 +9,10 @@
9
9
  # repository if they don't already exist (idempotent loading).
10
10
 
11
11
  documents:
12
+ - document_id: "product-spec-sheet"
13
+ original_filename: "Spec-Sheet-BondorPanel-v17.pdf"
14
+ content_type: "application/pdf"
15
+ status: "captured"
12
16
  - document_id: "meeting-transcript-q1-planning"
13
17
  original_filename: "q1_planning_meeting.txt"
14
18
  content_type: "text/plain"
@@ -19,49 +23,6 @@ documents:
19
23
  attendee_count: 3
20
24
  duration_minutes: 90
21
25
  department: "product"
22
- content: |
23
- Meeting Transcript - Q1 Planning Session
24
- Date: March 15, 2024
25
- Time: 2:00 PM - 3:30 PM
26
- Attendees: Sarah Chen (Product Manager), Mike Rodriguez (Engineering Lead),
27
- Lisa Wang (Designer)
28
-
29
- Sarah: Thanks everyone for joining. Let's kick off our Q1 planning. Mike,
30
- can you give us an update on the current sprint?
31
-
32
- Mike: Sure, we're about 80% through sprint 23. We've completed the user
33
- authentication module and are working on the data migration tool. Should be
34
- done by Friday.
35
-
36
- Lisa: Great! I've finished the mockups for the dashboard redesign. Sarah,
37
- have you had a chance to review them?
38
-
39
- Sarah: Yes, they look fantastic. I especially like the new navigation
40
- structure. When can we start implementation?
41
-
42
- Mike: I'd estimate 2 weeks for the frontend work, plus another week for
43
- backend API changes.
44
-
45
- Lisa: I can start on the component library updates while Mike works on the
46
- APIs.
47
-
48
- Sarah: Perfect. Let's also discuss the customer feedback integration. We had
49
- 47 responses to our survey.
50
-
51
- Mike: The main requests were for better reporting and mobile optimization.
52
-
53
- Sarah: Those should be our next priorities then. Lisa, can you start
54
- sketching mobile designs?
55
-
56
- Lisa: Absolutely. I'll have initial concepts by next Tuesday.
57
-
58
- Sarah: Excellent. Any other items?
59
-
60
- Mike: Just a heads up that we'll need to schedule downtime for the database
61
- migration, probably next weekend.
62
-
63
- Sarah: Noted. I'll coordinate with support. Meeting adjourned at 3:30 PM.
64
-
65
26
  - document_id: "customer-feedback-survey-q4"
66
27
  original_filename: "customer_survey_results_q4_2023.txt"
67
28
  content_type: "text/plain"
@@ -1,4 +1,13 @@
1
1
  knowledge_service_queries:
2
+ - query_id: "generate-dpp"
3
+ name: "Generate Digital Product Passport"
4
+ knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
5
+ prompt: "From this product specification sheet, extract the product information to generate a Digital Product Passport, that conforms to the provided schema, including the issuer, the credential subject and the validation dates. Please make sure that the DPP conforms to the provided schema and types and that you don't add any other fields."
6
+ assistant_prompt: "Looking at the product specification sheet, here's the digital product passport that conforms to the provided schema, without surrounding ```json ... ``` markers:"
7
+ query_metadata:
8
+ max_tokens: 3000
9
+ temperature: 0.1
10
+
2
11
  - query_id: "extract-meeting-info-query"
3
12
  name: "Extract Meeting Information"
4
13
  knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
@@ -3,17 +3,21 @@
3
3
  Release preparation and tagging script.
4
4
 
5
5
  Usage:
6
- release.py prepare X.Y.Z # Create release branch and PR
7
- release.py tag X.Y.Z # Tag after PR is merged
6
+ release.py prepare X.Y.Z [--message-file FILE] # Create release branch and PR
7
+ release.py tag X.Y.Z # Tag after PR is merged
8
8
  """
9
9
 
10
+ import argparse
10
11
  import re
11
12
  import subprocess
12
13
  import sys
14
+ import tempfile
13
15
  from pathlib import Path
14
16
 
15
17
 
16
- def run(cmd: str, check: bool = True, capture: bool = True) -> subprocess.CompletedProcess:
18
+ def run(
19
+ cmd: str, check: bool = True, capture: bool = True
20
+ ) -> subprocess.CompletedProcess:
17
21
  """Run a shell command."""
18
22
  result = subprocess.run(cmd, shell=True, capture_output=capture, text=True)
19
23
  if check and result.returncode != 0:
@@ -35,7 +39,9 @@ def get_package_init(repo_root: Path) -> Path | None:
35
39
  src_dir = repo_root / "src"
36
40
  if not src_dir.exists():
37
41
  return None
38
- packages = [p for p in src_dir.iterdir() if p.is_dir() and not p.name.startswith("_")]
42
+ packages = [
43
+ p for p in src_dir.iterdir() if p.is_dir() and not p.name.startswith("_")
44
+ ]
39
45
  if len(packages) != 1:
40
46
  # Multiple packages (bounded contexts) - no single __init__.py to update
41
47
  return None
@@ -48,7 +54,10 @@ def get_package_init(repo_root: Path) -> Path | None:
48
54
  def validate_version(version: str) -> None:
49
55
  """Validate version string format."""
50
56
  if not re.match(r"^\d+\.\d+\.\d+$", version):
51
- print(f"ERROR: Invalid version format '{version}'. Expected X.Y.Z", file=sys.stderr)
57
+ print(
58
+ f"ERROR: Invalid version format '{version}'. Expected X.Y.Z",
59
+ file=sys.stderr,
60
+ )
52
61
  sys.exit(1)
53
62
 
54
63
 
@@ -65,18 +74,26 @@ def validate_git_state(require_master: bool = True) -> None:
65
74
  result = run("git branch --show-current")
66
75
  branch = result.stdout.strip()
67
76
  if branch not in ("master", "main"):
68
- print(f"ERROR: Must be on master or main branch, currently on '{branch}'", file=sys.stderr)
77
+ print(
78
+ f"ERROR: Must be on master or main branch, currently on '{branch}'",
79
+ file=sys.stderr,
80
+ )
69
81
  sys.exit(1)
70
82
 
71
83
  # Check we're up to date with remote
72
84
  run("git fetch origin")
73
- result = run("git rev-list HEAD...origin/master --count 2>/dev/null || git rev-list HEAD...origin/main --count", check=False)
85
+ result = run(
86
+ "git rev-list HEAD...origin/master --count 2>/dev/null || git rev-list HEAD...origin/main --count",
87
+ check=False,
88
+ )
74
89
  if result.stdout.strip() != "0":
75
90
  print("ERROR: Branch is not up to date with remote", file=sys.stderr)
76
91
  sys.exit(1)
77
92
 
78
93
 
79
- def update_version_in_file(file_path: Path, version: str, pattern: str, replacement: str) -> None:
94
+ def update_version_in_file(
95
+ file_path: Path, version: str, pattern: str, replacement: str
96
+ ) -> None:
80
97
  """Update version string in a file."""
81
98
  content = file_path.read_text()
82
99
  new_content = re.sub(pattern, replacement, content, flags=re.MULTILINE)
@@ -85,11 +102,19 @@ def update_version_in_file(file_path: Path, version: str, pattern: str, replacem
85
102
  file_path.write_text(new_content)
86
103
 
87
104
 
88
- def prepare(version: str) -> None:
105
+ def prepare(version: str, message_file: Path | None = None) -> None:
89
106
  """Prepare a release: create branch, update versions, push, create PR."""
90
107
  validate_version(version)
91
108
  validate_git_state(require_master=True)
92
109
 
110
+ # Read release notes if provided
111
+ release_notes = None
112
+ if message_file:
113
+ if not message_file.exists():
114
+ print(f"ERROR: Message file not found: {message_file}", file=sys.stderr)
115
+ sys.exit(1)
116
+ release_notes = message_file.read_text().strip()
117
+
93
118
  repo_root = get_repo_root()
94
119
  branch_name = f"release/v{version}"
95
120
 
@@ -118,24 +143,47 @@ def prepare(version: str) -> None:
118
143
  f'__version__ = "{version}"',
119
144
  )
120
145
 
121
- # Commit
146
+ # Commit with release notes or default message
122
147
  print("Committing version bump...")
123
- run(f'git add -A && git commit -m "release: bump version to {version}"')
148
+ if release_notes:
149
+ commit_msg = f"release: v{version}\n\n{release_notes}"
150
+ else:
151
+ commit_msg = f"release: bump version to {version}"
152
+
153
+ # Use a temp file for the commit message to handle multiline properly
154
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
155
+ f.write(commit_msg)
156
+ commit_msg_file = f.name
157
+ try:
158
+ run(f'git add -A && git commit -F "{commit_msg_file}"')
159
+ finally:
160
+ Path(commit_msg_file).unlink()
124
161
 
125
162
  # Push
126
163
  print(f"Pushing {branch_name}...")
127
164
  run(f"git push -u origin {branch_name}")
128
165
 
129
- # Create PR
166
+ # Create PR with release notes as body
130
167
  print("Creating pull request...")
131
- result = run(
132
- f'gh pr create --title "Release v{version}" --body "Bump version to {version}"',
133
- check=False,
134
- )
168
+ pr_body = release_notes if release_notes else f"Bump version to {version}"
169
+
170
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
171
+ f.write(pr_body)
172
+ pr_body_file = f.name
173
+ try:
174
+ result = run(
175
+ f'gh pr create --title "Release v{version}" --body-file "{pr_body_file}"',
176
+ check=False,
177
+ )
178
+ finally:
179
+ Path(pr_body_file).unlink()
180
+
135
181
  if result.returncode != 0:
136
182
  print(f"\nTo create PR manually:\n gh pr create --title 'Release v{version}'")
137
183
 
138
- print(f"\nRelease branch ready. After PR is merged, run:\n ./maintenance/release.py tag {version}")
184
+ print(
185
+ f"\nRelease branch ready. After PR is merged, run:\n ./maintenance/release.py tag {version}"
186
+ )
139
187
 
140
188
 
141
189
  def tag(version: str) -> None:
@@ -167,21 +215,28 @@ def tag(version: str) -> None:
167
215
 
168
216
 
169
217
  def main() -> None:
170
- if len(sys.argv) < 3:
171
- print(__doc__)
172
- sys.exit(1)
218
+ parser = argparse.ArgumentParser(description="Release preparation and tagging script")
219
+ subparsers = parser.add_subparsers(dest="command", required=True)
220
+
221
+ # prepare subcommand
222
+ prepare_parser = subparsers.add_parser("prepare", help="Create release branch and PR")
223
+ prepare_parser.add_argument("version", help="Version number (X.Y.Z)")
224
+ prepare_parser.add_argument(
225
+ "--message-file", "-m",
226
+ type=Path,
227
+ help="File containing release notes for commit message and PR body",
228
+ )
173
229
 
174
- command = sys.argv[1]
175
- version = sys.argv[2]
230
+ # tag subcommand
231
+ tag_parser = subparsers.add_parser("tag", help="Tag after PR is merged")
232
+ tag_parser.add_argument("version", help="Version number (X.Y.Z)")
176
233
 
177
- if command == "prepare":
178
- prepare(version)
179
- elif command == "tag":
180
- tag(version)
181
- else:
182
- print(f"Unknown command: {command}", file=sys.stderr)
183
- print(__doc__)
184
- sys.exit(1)
234
+ args = parser.parse_args()
235
+
236
+ if args.command == "prepare":
237
+ prepare(args.version, args.message_file)
238
+ elif args.command == "tag":
239
+ tag(args.version)
185
240
 
186
241
 
187
242
  if __name__ == "__main__":
@@ -60,46 +60,52 @@ class MemoryDocumentRepository(DocumentRepository, MemoryRepositoryMixin[Documen
60
60
  async def save(self, document: Document) -> None:
61
61
  """Save a document with its content and metadata.
62
62
 
63
- If the document has content_string, it will be converted to a
64
- ContentStream and the content hash will be calculated automatically.
63
+ If the document has content_bytes, it will be normalized to bytes
64
+ (encoding str as UTF-8), converted to a ContentStream and the
65
+ content hash will be calculated automatically.
65
66
 
66
67
  Args:
67
68
  document: Document object to save
68
69
 
69
70
  Raises:
70
- ValueError: If document has no content or content_string
71
+ ValueError: If document has no content or content_bytes
72
+ TypeError: If content_bytes is not bytes or str
71
73
  """
72
74
  # Handle content_string conversion (only if no content provided)
73
- if document.content_string is not None:
74
- # Convert content_string to ContentStream
75
- assert document.content_string is not None # For MyPy
76
- content_bytes = document.content_string.encode("utf-8")
77
- content_stream = ContentStream(io.BytesIO(content_bytes))
75
+ if document.content_bytes is not None:
76
+ if isinstance(document.content_bytes, str):
77
+ raw_bytes = document.content_bytes.encode("utf-8")
78
+ elif isinstance(document.content_bytes, bytes):
79
+ raw_bytes = document.content_bytes
80
+ else:
81
+ raise TypeError("content_bytes must be of type 'bytes' or 'str'.")
82
+
83
+ content_stream = ContentStream(io.BytesIO(raw_bytes))
78
84
 
79
85
  # Calculate content hash
80
- content_hash = hashlib.sha256(content_bytes).hexdigest()
86
+ content_hash = hashlib.sha256(raw_bytes).hexdigest()
81
87
 
82
88
  # Create new document with ContentStream and calculated hash
83
89
  document = document.model_copy(
84
90
  update={
85
91
  "content": content_stream,
86
92
  "content_multihash": content_hash,
87
- "size_bytes": len(content_bytes),
93
+ "size_bytes": len(raw_bytes),
88
94
  }
89
95
  )
90
96
 
91
97
  self.logger.debug(
92
- "Converted content_string to ContentStream for document save",
98
+ "Converted content_bytes to ContentStream for document save",
93
99
  extra={
94
100
  "document_id": document.document_id,
95
101
  "content_hash": content_hash,
96
- "content_length": len(content_bytes),
102
+ "content_length": len(raw_bytes),
97
103
  },
98
104
  )
99
105
 
100
106
  # Create a copy without content_string (content saved
101
107
  # in separate content-addressable storage)
102
- document_for_storage = document.model_copy(update={"content_string": None})
108
+ document_for_storage = document.model_copy(update={"content_bytes": None})
103
109
  self.save_entity(document_for_storage, "document_id")
104
110
 
105
111
  async def generate_id(self) -> str: