biblicus 0.16.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biblicus/corpus.py CHANGED
@@ -11,6 +11,7 @@ import shutil
11
11
  import uuid
12
12
  from pathlib import Path
13
13
  from typing import Any, Dict, List, Optional, Sequence
14
+ from urllib.parse import quote, unquote, urlparse
14
15
 
15
16
  import yaml
16
17
  from pydantic import ValidationError
@@ -24,6 +25,7 @@ from .constants import (
24
25
  SCHEMA_VERSION,
25
26
  SIDECAR_SUFFIX,
26
27
  )
28
+ from .errors import IngestCollisionError
27
29
  from .frontmatter import parse_front_matter, render_front_matter
28
30
  from .hook_manager import HookManager
29
31
  from .hooks import HookPoint
@@ -110,7 +112,10 @@ def _preferred_extension_for_media_type(media_type: str) -> Optional[str]:
110
112
  """
111
113
  media_type_overrides = {
112
114
  "image/jpeg": ".jpg",
115
+ "audio/mpeg": ".mp3",
113
116
  "audio/ogg": ".ogg",
117
+ "audio/wav": ".wav",
118
+ "audio/x-wav": ".wav",
114
119
  }
115
120
  if media_type in media_type_overrides:
116
121
  return media_type_overrides[media_type]
@@ -136,7 +141,16 @@ def _ensure_filename_extension(filename: str, *, media_type: str) -> str:
136
141
  return raw_name + ".md"
137
142
 
138
143
  if Path(raw_name).suffix:
139
- return raw_name
144
+ if "%2F" in raw_name or "%3A" in raw_name:
145
+ decoded = unquote(raw_name)
146
+ parsed = urlparse(decoded)
147
+ decoded_path = parsed.path if parsed.scheme else decoded
148
+ if not Path(decoded_path).suffix:
149
+ pass
150
+ else:
151
+ return raw_name
152
+ else:
153
+ return raw_name
140
154
 
141
155
  ext = _preferred_extension_for_media_type(media_type)
142
156
  if not ext:
@@ -144,6 +158,55 @@ def _ensure_filename_extension(filename: str, *, media_type: str) -> str:
144
158
  return raw_name + ext
145
159
 
146
160
 
161
+ def _encode_source_uri_for_filename(source_uri: str) -> str:
162
+ """
163
+ Percent-encode a source uniform resource identifier for filename use.
164
+
165
+ :param source_uri: Source uniform resource identifier to encode.
166
+ :type source_uri: str
167
+ :return: Percent-encoded uniform resource identifier safe for filenames.
168
+ :rtype: str
169
+ """
170
+ return quote(source_uri, safe="")
171
+
172
+
173
+ def _storage_filename_for_ingest(
174
+ *, filename: Optional[str], media_type: str, source_uri: Optional[str]
175
+ ) -> str:
176
+ """
177
+ Derive a collision-safe filename for corpus storage.
178
+
179
+ If a source uniform resource identifier is provided, the full uniform resource identifier is
180
+ percent-encoded to namespace the stored file, preventing collisions between identical basenames
181
+ from different sources. When no uniform resource identifier is available, fall back to a
182
+ sanitized filename.
183
+
184
+ :param filename: Optional filename hint from the caller.
185
+ :type filename: str or None
186
+ :param media_type: Media type of the payload.
187
+ :type media_type: str
188
+ :param source_uri: Optional source uniform resource identifier for provenance.
189
+ :type source_uri: str or None
190
+ :return: Storage filename with an appropriate extension, or an empty string when no hint exists.
191
+ :rtype: str
192
+ """
193
+ base_name = ""
194
+ if source_uri:
195
+ base_name = _encode_source_uri_for_filename(source_uri)
196
+ if filename and not source_uri.startswith("file:"):
197
+ sanitized = _sanitize_filename(filename)
198
+ if sanitized:
199
+ base_name = f"{base_name}--{sanitized}"
200
+ if not base_name and filename:
201
+ base_name = _sanitize_filename(filename)
202
+ if not base_name:
203
+ return ""
204
+ if len(base_name) > 180:
205
+ digest = hashlib.sha256(base_name.encode("utf-8")).hexdigest()
206
+ base_name = f"hash-{digest}"
207
+ return _ensure_filename_extension(base_name, media_type=media_type)
208
+
209
+
147
210
  def _merge_tags(explicit: Sequence[str], from_frontmatter: Any) -> List[str]:
148
211
  """
149
212
  Merge tags from explicit input and front matter values.
@@ -520,6 +583,24 @@ class Corpus:
520
583
  temp_path.write_text(catalog.model_dump_json(indent=2) + "\n", encoding="utf-8")
521
584
  temp_path.replace(self.catalog_path)
522
585
 
586
+ def _find_item_by_source_uri(self, source_uri: str) -> Optional[CatalogItem]:
587
+ """
588
+ Locate an existing catalog item by source uniform resource identifier.
589
+
590
+ :param source_uri: Source uniform resource identifier to search for.
591
+ :type source_uri: str
592
+ :return: Matching catalog item or None.
593
+ :rtype: CatalogItem or None
594
+ """
595
+ if not source_uri:
596
+ return None
597
+ self._init_catalog()
598
+ catalog = self._load_catalog()
599
+ for item in catalog.items.values():
600
+ if item.source_uri == source_uri:
601
+ return item
602
+ return None
603
+
523
604
  @property
524
605
  def runs_dir(self) -> Path:
525
606
  """
@@ -817,18 +898,26 @@ class Corpus:
817
898
  :return: Ingestion result summary.
818
899
  :rtype: IngestResult
819
900
  :raises ValueError: If markdown is not Unicode Transformation Format 8.
901
+ :raises IngestCollisionError: If a source uniform resource identifier is already ingested.
820
902
  """
821
- item_id = str(uuid.uuid4())
822
- safe_filename = _sanitize_filename(filename) if filename else ""
903
+ existing_item = self._find_item_by_source_uri(source_uri)
904
+ if existing_item is not None:
905
+ raise IngestCollisionError(
906
+ source_uri=source_uri,
907
+ existing_item_id=existing_item.id,
908
+ existing_relpath=existing_item.relpath,
909
+ )
823
910
 
824
- if safe_filename:
825
- safe_filename = _ensure_filename_extension(safe_filename, media_type=media_type)
911
+ item_id = str(uuid.uuid4())
912
+ storage_filename = _storage_filename_for_ingest(
913
+ filename=filename, media_type=media_type, source_uri=source_uri
914
+ )
826
915
 
827
916
  if media_type == "text/markdown":
828
- output_name = f"{item_id}--{safe_filename}" if safe_filename else f"{item_id}.md"
917
+ output_name = f"{item_id}--{storage_filename}" if storage_filename else f"{item_id}.md"
829
918
  else:
830
- if safe_filename:
831
- output_name = f"{item_id}--{safe_filename}"
919
+ if storage_filename:
920
+ output_name = f"{item_id}--{storage_filename}"
832
921
  else:
833
922
  extension = _preferred_extension_for_media_type(media_type) or ""
834
923
  output_name = f"{item_id}{extension}" if extension else f"{item_id}"
@@ -991,13 +1080,21 @@ class Corpus:
991
1080
  if media_type == "text/markdown":
992
1081
  raise ValueError("Stream ingestion is not supported for Markdown")
993
1082
 
1083
+ existing_item = self._find_item_by_source_uri(source_uri)
1084
+ if existing_item is not None:
1085
+ raise IngestCollisionError(
1086
+ source_uri=source_uri,
1087
+ existing_item_id=existing_item.id,
1088
+ existing_relpath=existing_item.relpath,
1089
+ )
1090
+
994
1091
  item_id = str(uuid.uuid4())
995
- safe_filename = _sanitize_filename(filename) if filename else ""
996
- if safe_filename:
997
- safe_filename = _ensure_filename_extension(safe_filename, media_type=media_type)
1092
+ storage_filename = _storage_filename_for_ingest(
1093
+ filename=filename, media_type=media_type, source_uri=source_uri
1094
+ )
998
1095
 
999
- if safe_filename:
1000
- output_name = f"{item_id}--{safe_filename}"
1096
+ if storage_filename:
1097
+ output_name = f"{item_id}--{storage_filename}"
1001
1098
  else:
1002
1099
  extension = _preferred_extension_for_media_type(media_type) or ""
1003
1100
  output_name = f"{item_id}{extension}" if extension else f"{item_id}"
@@ -1085,7 +1182,7 @@ class Corpus:
1085
1182
  *,
1086
1183
  title: Optional[str] = None,
1087
1184
  tags: Sequence[str] = (),
1088
- source_uri: str = "text",
1185
+ source_uri: Optional[str] = None,
1089
1186
  ) -> IngestResult:
1090
1187
  """
1091
1188
  Ingest a text note as Markdown.
@@ -1096,11 +1193,15 @@ class Corpus:
1096
1193
  :type title: str or None
1097
1194
  :param tags: Tags to associate with the note.
1098
1195
  :type tags: Sequence[str]
1099
- :param source_uri: Source uniform resource identifier for provenance.
1100
- :type source_uri: str
1196
+ :param source_uri: Optional source uniform resource identifier for provenance.
1197
+ :type source_uri: str or None
1101
1198
  :return: Ingestion result summary.
1102
1199
  :rtype: IngestResult
1103
1200
  """
1201
+ if source_uri is None:
1202
+ digest_source = (title or "") + "\n" + text
1203
+ digest = hashlib.sha256(digest_source.encode("utf-8")).hexdigest()
1204
+ source_uri = f"text:{digest}"
1104
1205
  data = text.encode("utf-8")
1105
1206
  return self.ingest_item(
1106
1207
  data,
biblicus/errors.py CHANGED
@@ -13,3 +13,27 @@ class ExtractionRunFatalError(RuntimeError):
13
13
  rather than a per-item extraction failure. For example, a selection extractor that depends
14
14
  on referenced extraction run manifests treats missing manifests as fatal.
15
15
  """
16
+
17
+
18
+ class IngestCollisionError(RuntimeError):
19
+ """
20
+ Ingest collision for an already ingested source.
21
+
22
+ :param source_uri: Source uniform resource identifier that caused the collision.
23
+ :type source_uri: str
24
+ :param existing_item_id: Identifier of the existing catalog item.
25
+ :type existing_item_id: str
26
+ :param existing_relpath: Raw storage relpath of the existing item.
27
+ :type existing_relpath: str
28
+ """
29
+
30
+ def __init__(self, *, source_uri: str, existing_item_id: str, existing_relpath: str) -> None:
31
+ self.source_uri = source_uri
32
+ self.existing_item_id = existing_item_id
33
+ self.existing_relpath = existing_relpath
34
+ message = (
35
+ "Source already ingested"
36
+ f": source_uri={source_uri} existing_item_id={existing_item_id}"
37
+ f" existing_relpath={existing_relpath}"
38
+ )
39
+ super().__init__(message)
@@ -44,7 +44,7 @@ class KnowledgeBaseDefaults(BaseModel):
44
44
  query_budget: QueryBudget = Field(
45
45
  default_factory=lambda: QueryBudget(
46
46
  max_total_items=5,
47
- max_total_characters=2000,
47
+ maximum_total_characters=2000,
48
48
  max_items_per_source=None,
49
49
  )
50
50
  )
biblicus/models.py CHANGED
@@ -234,8 +234,8 @@ class QueryBudget(BaseModel):
234
234
  This enables simple pagination by re-running the same query with a
235
235
  higher offset.
236
236
  :vartype offset: int
237
- :ivar max_total_characters: Optional maximum total characters across evidence text.
238
- :vartype max_total_characters: int or None
237
+ :ivar maximum_total_characters: Optional maximum total characters across evidence text.
238
+ :vartype maximum_total_characters: int or None
239
239
  :ivar max_items_per_source: Optional cap per source uniform resource identifier.
240
240
  :vartype max_items_per_source: int or None
241
241
  """
@@ -244,7 +244,7 @@ class QueryBudget(BaseModel):
244
244
 
245
245
  max_total_items: int = Field(ge=1)
246
246
  offset: int = Field(default=0, ge=0)
247
- max_total_characters: Optional[int] = Field(default=None, ge=1)
247
+ maximum_total_characters: Optional[int] = Field(default=None, ge=1)
248
248
  max_items_per_source: Optional[int] = Field(default=None, ge=1)
249
249
 
250
250
 
@@ -278,6 +278,8 @@ class Evidence(BaseModel):
278
278
  :vartype recipe_id: str
279
279
  :ivar run_id: Retrieval run identifier.
280
280
  :vartype run_id: str
281
+ :ivar metadata: Optional metadata payload from the catalog item.
282
+ :vartype metadata: dict[str, Any]
281
283
  :ivar hash: Optional content hash for provenance.
282
284
  :vartype hash: str or None
283
285
  """
@@ -297,6 +299,7 @@ class Evidence(BaseModel):
297
299
  stage_scores: Optional[Dict[str, float]] = None
298
300
  recipe_id: str
299
301
  run_id: str
302
+ metadata: Dict[str, Any] = Field(default_factory=dict)
300
303
  hash: Optional[str] = None
301
304
 
302
305
  @model_validator(mode="after")
biblicus/retrieval.py CHANGED
@@ -124,8 +124,8 @@ def apply_budget(evidence: Iterable[Evidence], budget: QueryBudget) -> List[Evid
124
124
  continue
125
125
 
126
126
  text_character_count = len(candidate_evidence.text or "")
127
- if budget.max_total_characters is not None:
128
- if total_characters + text_character_count > budget.max_total_characters:
127
+ if budget.maximum_total_characters is not None:
128
+ if total_characters + text_character_count > budget.maximum_total_characters:
129
129
  continue
130
130
 
131
131
  selected_evidence.append(candidate_evidence)
biblicus/sources.py CHANGED
@@ -8,7 +8,7 @@ import mimetypes
8
8
  from dataclasses import dataclass
9
9
  from pathlib import Path
10
10
  from typing import Optional
11
- from urllib.parse import unquote, urlparse
11
+ from urllib.parse import quote, unquote, urlparse
12
12
  from urllib.request import Request, urlopen
13
13
 
14
14
 
@@ -37,6 +37,27 @@ def _filename_from_url_path(path: str) -> str:
37
37
  return filename or "download"
38
38
 
39
39
 
40
+ def _sanitize_filename_component(name: str) -> str:
41
+ allowed_characters = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._() ")
42
+ sanitized_name = "".join(
43
+ (character if character in allowed_characters else "_") for character in name
44
+ ).strip()
45
+ return sanitized_name or "file"
46
+
47
+
48
+ def _namespaced_filename(
49
+ *, source_uri: Optional[str], fallback_name: Optional[str], media_type: str
50
+ ) -> str:
51
+ base_name = ""
52
+ if source_uri:
53
+ base_name = quote(source_uri, safe="")
54
+ if not base_name and fallback_name:
55
+ base_name = _sanitize_filename_component(fallback_name)
56
+ if not base_name:
57
+ base_name = "file"
58
+ return _ensure_extension_for_media_type(base_name, media_type)
59
+
60
+
40
61
  def _media_type_from_filename(name: str) -> str:
41
62
  """
42
63
  Guess media type from a filename.
@@ -119,8 +140,16 @@ def _ensure_extension_for_media_type(filename: str, media_type: str) -> str:
119
140
  """
120
141
  if Path(filename).suffix:
121
142
  return filename
122
- if media_type == "audio/ogg":
123
- ext = ".ogg"
143
+ media_type_overrides = {
144
+ "audio/mpeg": ".mp3",
145
+ "audio/ogg": ".ogg",
146
+ "audio/wav": ".wav",
147
+ "audio/x-wav": ".wav",
148
+ "image/jpeg": ".jpg",
149
+ "text/html": ".html",
150
+ }
151
+ if media_type in media_type_overrides:
152
+ ext = media_type_overrides[media_type]
124
153
  else:
125
154
  ext = mimetypes.guess_extension(media_type) or ""
126
155
  return filename + ext if ext else filename
@@ -165,11 +194,12 @@ def load_source(source: str | Path, *, source_uri: Optional[str] = None) -> Sour
165
194
  media_type = _media_type_from_filename(path.name)
166
195
  if path.suffix.lower() in {".md", ".markdown"}:
167
196
  media_type = "text/markdown"
197
+ resolved_source_uri = source_uri or path.as_uri()
168
198
  return SourcePayload(
169
199
  data=path.read_bytes(),
170
200
  filename=path.name,
171
201
  media_type=media_type,
172
- source_uri=source_uri or path.as_uri(),
202
+ source_uri=resolved_source_uri,
173
203
  )
174
204
 
175
205
  if _looks_like_uri(source):
@@ -187,21 +217,26 @@ def load_source(source: str | Path, *, source_uri: Optional[str] = None) -> Sour
187
217
  with urlopen(request, timeout=30) as response:
188
218
  response_bytes = response.read()
189
219
  content_type = response.headers.get("Content-Type", "").split(";", 1)[0].strip()
190
- filename = _filename_from_url_path(parsed.path)
191
- media_type = content_type or _media_type_from_filename(filename)
220
+ fallback_filename = _filename_from_url_path(parsed.path)
221
+ media_type = content_type or _media_type_from_filename(fallback_filename)
192
222
  if media_type == "application/octet-stream":
193
223
  sniffed = _sniff_media_type_from_bytes(response_bytes)
194
224
  if sniffed:
195
225
  media_type = sniffed
196
- filename = _ensure_extension_for_media_type(filename, media_type)
197
- media_type = _normalize_media_type(filename=filename, media_type=media_type)
198
- if Path(filename).suffix.lower() in {".md", ".markdown"}:
226
+ fallback_filename = _ensure_extension_for_media_type(
227
+ fallback_filename, media_type
228
+ )
229
+ media_type = _normalize_media_type(
230
+ filename=fallback_filename, media_type=media_type
231
+ )
232
+ if Path(fallback_filename).suffix.lower() in {".md", ".markdown"}:
199
233
  media_type = "text/markdown"
234
+ resolved_source_uri = source_uri or source
200
235
  return SourcePayload(
201
236
  data=response_bytes,
202
- filename=filename,
237
+ filename=fallback_filename,
203
238
  media_type=media_type,
204
- source_uri=source_uri or source,
239
+ source_uri=resolved_source_uri,
205
240
  )
206
241
 
207
242
  raise NotImplementedError(
biblicus/text/link.py CHANGED
@@ -159,6 +159,8 @@ def _apply_link_replace(text: str, old_str: str, new_str: str) -> str:
159
159
 
160
160
 
161
161
  def _validate_replace_text(old_str: str, new_str: str) -> None:
162
+ if "<span" in old_str or "</span>" in old_str:
163
+ raise ValueError("Text link replacements must target plain text without span tags")
162
164
  if strip_span_tags(old_str) != strip_span_tags(new_str):
163
165
  raise ValueError("Text link replacements may only insert span tags")
164
166
 
@@ -460,12 +462,16 @@ def _build_retry_message(errors: Sequence[str], current_text: str, id_prefix: st
460
462
  error_lines = "\n".join(f"- {error}" for error in errors)
461
463
  context_section = build_span_context_section(current_text, errors)
462
464
  coverage_guidance = _build_coverage_guidance(errors)
465
+ nested_guidance = ""
466
+ if any("nested span" in error for error in errors):
467
+ nested_guidance = "Do not create nested or overlapping spans. Remove nested spans and wrap only bare text.\n"
463
468
  return (
464
469
  "Your last edit did not validate.\n"
465
470
  "Issues:\n"
466
471
  f"{error_lines}\n\n"
467
472
  f"{context_section}"
468
473
  f"{coverage_guidance}"
474
+ f"{nested_guidance}"
469
475
  "Please fix the markup using str_replace. Use id for first mentions and ref for repeats. "
470
476
  "Reuse the same id for identical names and do not assign multiple ids to the same name. "
471
477
  f"Ids must start with '{id_prefix}'. Try again.\n"
biblicus/text/prompts.py CHANGED
@@ -57,6 +57,8 @@ DEFAULT_ANNOTATE_SYSTEM_PROMPT = (
57
57
  "- new_str must be identical to old_str with only <span ...> and </span> inserted.\n"
58
58
  "- Do not include <span or </span> inside old_str or new_str.\n"
59
59
  "- Do not insert nested spans.\n"
60
+ "- Do not wrap text that is already inside a span; spans must never overlap.\n"
61
+ "- If a name appears inside an existing span, leave it alone and wrap only bare text.\n"
60
62
  "- If a tool call fails due to non-unique old_str, retry with a longer unique old_str.\n"
61
63
  "- If a tool call fails, read the error and keep editing. Do not call done until spans are inserted.\n"
62
64
  "- Do not delete, reorder, paraphrase, or label text beyond the span attributes.\n\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 0.16.0
3
+ Version: 1.0.0
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -293,7 +293,7 @@ for note_title, note_text in notes:
293
293
 
294
294
  backend = get_backend("scan")
295
295
  run = backend.build_run(corpus, recipe_name="Story demo", config={})
296
- budget = QueryBudget(max_total_items=5, max_total_characters=2000, max_items_per_source=None)
296
+ budget = QueryBudget(max_total_items=5, maximum_total_characters=2000, max_items_per_source=None)
297
297
  result = backend.query(
298
298
  corpus,
299
299
  run=run,
@@ -333,7 +333,7 @@ Example output:
333
333
  "query_text": "Primary button style preference",
334
334
  "budget": {
335
335
  "max_total_items": 5,
336
- "max_total_characters": 2000,
336
+ "maximum_total_characters": 2000,
337
337
  "max_items_per_source": null
338
338
  },
339
339
  "run_id": "RUN_ID",
@@ -1,13 +1,13 @@
1
- biblicus/__init__.py,sha256=VK1nvxxf1NI1u5Ad94yyMpC5Xc_HJ7-3F_TnJdhItbA,496
1
+ biblicus/__init__.py,sha256=z9Wif5-ZzIrptsUS8OELW5zG5_R3-4ZcSuVUkfqKbaA,989
2
2
  biblicus/__main__.py,sha256=ipfkUoTlocVnrQDM69C7TeBqQxmHVeiWMRaT3G9rtnk,117
3
3
  biblicus/chunking.py,sha256=GdJr0skAAI0Su99mr7dXqCgR7eJ0sJu8n2XesVGyddY,13206
4
- biblicus/cli.py,sha256=x3bbtg_nzvIZlHmiPp-4L2EtV6wugTMueFTkXQy9y1s,43372
4
+ biblicus/cli.py,sha256=DdEL8Uvl38Zn2w4egCxQ4zWNelrI3QDs4qh4tGWGuAI,43793
5
5
  biblicus/constants.py,sha256=gAlEVJhxdFj-eWWJrlYbP7H1X3c5gwhrIBq9NQ1Vq_E,371
6
- biblicus/context.py,sha256=iXRFGpf_5YDPsDsm_iTK6nCvtUWDoYVI7op-l2QU3uA,10189
7
- biblicus/corpus.py,sha256=qSDnYJXhWlF2p_BbFLl6xtI53lIIPxwyKLLGLC432Sg,55612
6
+ biblicus/context.py,sha256=I7L86ag2AbNr_QgiP5YSt1uwwULGx1cH73eR2nE9T3g,10842
7
+ biblicus/corpus.py,sha256=LySjqBpTF_B19nMyGBoeB8AMDlqohcgsBfmJILm3P5c,59546
8
8
  biblicus/crawl.py,sha256=n8rXBMnziBK9vtKQQCXYOpBzqsPCswj2PzVJUb370KY,6250
9
9
  biblicus/embedding_providers.py,sha256=phWEsq1vryyTFRRs6uZ0sx9FhrqWIkDsS3I52I64zqM,3839
10
- biblicus/errors.py,sha256=uMajd5DvgnJ_-jq5sbeom1GV8DPUc-kojBaECFi6CsY,467
10
+ biblicus/errors.py,sha256=g5TRPdO2XGi-7Wi1C4CXMJ6dTQKYAyP--EWKCv6FGKs,1362
11
11
  biblicus/evaluation.py,sha256=5xWpb-8f49Osh9aHzo1ab3AXOmls3Imc5rdnEC0pN-8,8143
12
12
  biblicus/evidence_processing.py,sha256=sJe6T1nLxvU0xs9yMH8JZZS19zHXMR-Fpr5lWi5ndUM,6120
13
13
  biblicus/extraction.py,sha256=qvrsq6zSz2Kg-cap-18HPHC9pQlqEGo7pyID2uKCyBo,19760
@@ -18,11 +18,11 @@ biblicus/hook_manager.py,sha256=ZCAkE5wLvn4lnQz8jho_o0HGEC9KdQd9qitkAEUQRcw,6997
18
18
  biblicus/hooks.py,sha256=OHQOmOi7rUcQqYWVeod4oPe8nVLepD7F_SlN7O_-BsE,7863
19
19
  biblicus/ignore.py,sha256=fyjt34E6tWNNrm1FseOhgH2MgryyVBQVzxhKL5s4aio,1800
20
20
  biblicus/inference.py,sha256=_k00AIPoXD2lruiTB-JUagtY4f_WKcdzA3axwiq1tck,3512
21
- biblicus/knowledge_base.py,sha256=JmlJw8WD_fgstuq1PyWVzU9kzvVzyv7_xOvhS70xwUw,6654
22
- biblicus/models.py,sha256=5AQ6oXK_KJyU0Kyv5ff8yD8nevNKb_6Hjr2_vlRSlK0,16297
21
+ biblicus/knowledge_base.py,sha256=jpFEvo8gbEuwRUVYRRgQFvRTJZQml0WCHWSeY-CS4ag,6658
22
+ biblicus/models.py,sha256=nvuq5Y96hHvuhMCuHff38wNITyQJam6zFrgFxH5Kh7g,16475
23
23
  biblicus/recipes.py,sha256=rqU66QnjOup6O8Y9Yq7XszmpoM0Pyrjw3RrfdnlVqgE,4210
24
- biblicus/retrieval.py,sha256=GXYT_3RPdqZEYdBQ4F4lIXDOhWw0nfL9bd781bgrn_4,4279
25
- biblicus/sources.py,sha256=EFy8-rQNLsyzz-98mH-z8gEHMYbqigcNFKLaR92KfDE,7241
24
+ biblicus/retrieval.py,sha256=qAauHbnQcxtWZzonyOuwgSsffPyZ--0Z8wW-dEYk0z4,4287
25
+ biblicus/sources.py,sha256=FNwW1FWts0jxWIL3AHon7D6c5ZatyG9AGFqzn1Id5mE,8504
26
26
  biblicus/time.py,sha256=3BSKOSo7R10K-0Dzrbdtl3fh5_yShTYqfdlKvvdkx7M,485
27
27
  biblicus/uris.py,sha256=xXD77lqsT9NxbyzI1spX9Y5a3-U6sLYMnpeSAV7g-nM,2013
28
28
  biblicus/user_config.py,sha256=UXUYBNUN4FR37ggZGJG1wv3K8XzsMR8pXW1T18lrivw,6495
@@ -43,13 +43,18 @@ biblicus/analysis/schema.py,sha256=MCiAQJmijVk8iM8rOUYbzyaDwsMR-Oo86iZU5NCbDMM,4
43
43
  biblicus/analysis/topic_modeling.py,sha256=mNBiRMpY5Jtyz8Aj-WXYY8guEghx9jozTfgveinJLoc,22135
44
44
  biblicus/backends/__init__.py,sha256=WJSvXc6boEj8PeFr__AC6l_0lfBPJpaVgMbVq30vtZU,1669
45
45
  biblicus/backends/base.py,sha256=Erfj9dXg0nkRKnEcNjHR9_0Ddb2B1NvbmRksVm_g1dU,1776
46
- biblicus/backends/embedding_index_common.py,sha256=WdmnOr8-QaLZH9b7TNmiWg_5auIqDYviXc-twOd7WsM,10216
47
- biblicus/backends/embedding_index_file.py,sha256=igwlWHuqQD72qyJ3OZ9uyBGeha7AMg6uA3Uu9QD_2_M,9552
48
- biblicus/backends/embedding_index_inmemory.py,sha256=oZCNS1kMajEj8u9C5iOjs_dAwhZ8ajU_br0F_8onRLY,9457
49
- biblicus/backends/hybrid.py,sha256=_kqpvD9V6oFdFSXE7K_rTN8qqH-vDIigPDX0uQxFhaM,10828
50
- biblicus/backends/scan.py,sha256=hdNnQWqi5IH6j95w30BZHxLJ0W9PTaOkqfWJuxCCEMI,12478
46
+ biblicus/backends/embedding_index_common.py,sha256=wwvp6DjcaAwq-cp2jaO9TvnxDM7JDi-kpgT9uQG9Cxs,11552
47
+ biblicus/backends/embedding_index_file.py,sha256=vibYEWa12Gx-Pm8WnuBnMfBaKiwlAvVW1dEzWJc6JO4,9856
48
+ biblicus/backends/embedding_index_inmemory.py,sha256=LYiNBRmnh4DB8hmlBxMrm_uNmWi46Jt2EvjCuJGm2DI,9711
49
+ biblicus/backends/hybrid.py,sha256=vlsN9N6FZ5A3dQtGXy0W89L4qNQX5EYJNvUuj2-Uqaw,10897
50
+ biblicus/backends/scan.py,sha256=NBlfFHkDS3vdv70bgggK-jHykQC3W_i-RDaa97LEwKE,12548
51
51
  biblicus/backends/sqlite_full_text_search.py,sha256=tkFYdKwH6WvAF3En1fvGN_03Ud0_Z1igGxhUW4meCbA,24496
52
- biblicus/backends/tf_vector.py,sha256=HFV2aj1i4uxFYBd4GOv2M665ZcaQNgHWA4qmLbpIc4k,15220
52
+ biblicus/backends/tf_vector.py,sha256=Z5MiEpbZ7A4UtRLYPEU1g8ubjWV5vuyPG40FpElEVzA,15119
53
+ biblicus/context_engine/__init__.py,sha256=cIJWTUwOewW1x13a2n0YKfr4-XU0IwlVdAH_0pckfKk,1337
54
+ biblicus/context_engine/assembler.py,sha256=ot5mdGJTA1nO8uUP_J_yGXgfVqQhFuEQJ3BH-HF4ZaY,42336
55
+ biblicus/context_engine/compaction.py,sha256=2bLaCpT48d1TL7vt9rrcRCgfdHeWWp9LX85Cgij12o0,2921
56
+ biblicus/context_engine/models.py,sha256=jesVd83ZQcatO-7yNlzwKkactSQ-e1znYuWof4rxVFg,12762
57
+ biblicus/context_engine/retrieval.py,sha256=au_mN8VYc_MhIlbMGHfDf2IK0UWAigj7R5NFXFZ0Kz8,4143
53
58
  biblicus/extractors/__init__.py,sha256=ci3oldbdQZ8meAfHccM48CqQtZsPSRg3HkPrBSZF15M,2673
54
59
  biblicus/extractors/base.py,sha256=ka-nz_1zHPr4TS9sU4JfOoY-PJh7lbHPBOEBrbQFGSc,2171
55
60
  biblicus/extractors/deepgram_stt.py,sha256=VI71i4lbE-EFHcvpNcCPRpT8z7A5IuaSrT1UaPyZ8UY,6323
@@ -71,16 +76,16 @@ biblicus/extractors/unstructured_text.py,sha256=l2S_wD_htu7ZHoJQNQtP-kGlEgOeKV_w
71
76
  biblicus/text/__init__.py,sha256=MiaGAY7xWlUCeBzDzNz6pJnSMiU_Ge5EmlSiEzhqTRo,947
72
77
  biblicus/text/annotate.py,sha256=asmpj3_s_t8hl6stEg99apmqxAhDTkoPzHhZNggYE3Y,8355
73
78
  biblicus/text/extract.py,sha256=pdnUiZWtfCUj7kZK5zhd-tjqokgmhYYheWhyN3iShRU,7669
74
- biblicus/text/link.py,sha256=Xl0yxD1rvbRJRVdWG_ZP6zgmbpgWSJYcUcNM06-OUWU,20077
79
+ biblicus/text/link.py,sha256=2IdOi3WgyBKPFau0bpS1eToV1q2v_6wq5RK5_P_qUDg,20448
75
80
  biblicus/text/markup.py,sha256=8jj9aX03HiZTOWdPs_VC4JLpQ7TlPHgGuXj_QUQIHVw,6265
76
81
  biblicus/text/models.py,sha256=REp6RowUWFdV-6y437JENP7XtGKt57BOvVtF91KmUqI,10853
77
- biblicus/text/prompts.py,sha256=Z5fSsy1Xzr0rCI0WZ3djiEQlbRDncyNBQ7_ZoWVPL4g,6704
82
+ biblicus/text/prompts.py,sha256=-M-8sQ7Dfm1k4j6Kn4ekAuiYe_TkIwLu2VSgpas9rUU,6881
78
83
  biblicus/text/redact.py,sha256=tkDRmA0VvOZwMryEmBPLEHf3Z6VHJkkaWjBaNIMyGZ0,8415
79
84
  biblicus/text/slice.py,sha256=dlHxGO8c5P8BszXGwlNQoQ-cyWjJf6PfS1LUBJXXGEE,5762
80
85
  biblicus/text/tool_loop.py,sha256=w1PGLBvIemOdi6l0ArdYDVL7zgx-RC76bBOO0PKqpt0,11831
81
- biblicus-0.16.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
82
- biblicus-0.16.0.dist-info/METADATA,sha256=D_qXRDdM_7LnOD-GBtnqwldEWOKjt_uV-TfqvnL8MAU,30933
83
- biblicus-0.16.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
84
- biblicus-0.16.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
85
- biblicus-0.16.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
86
- biblicus-0.16.0.dist-info/RECORD,,
86
+ biblicus-1.0.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
87
+ biblicus-1.0.0.dist-info/METADATA,sha256=oyWd6igX6I3o46-VjOAUVskj1pLzZ8DovsTV1mqpPoY,30940
88
+ biblicus-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
89
+ biblicus-1.0.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
90
+ biblicus-1.0.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
91
+ biblicus-1.0.0.dist-info/RECORD,,