hecvec 0.4.4__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {hecvec-0.4.4 → hecvec-0.4.6}/PKG-INFO +1 -1
  2. {hecvec-0.4.4 → hecvec-0.4.6}/pyproject.toml +1 -1
  3. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/__init__.py +1 -1
  4. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/pipeline.py +6 -4
  5. {hecvec-0.4.4 → hecvec-0.4.6}/.gitignore +0 -0
  6. {hecvec-0.4.4 → hecvec-0.4.6}/README.md +0 -0
  7. {hecvec-0.4.4 → hecvec-0.4.6}/scripts/test_slice.py +0 -0
  8. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/_recursive_chunking.py +0 -0
  9. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/chroma_client.py +0 -0
  10. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/chroma_list.py +0 -0
  11. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/chunkers.py +0 -0
  12. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/chunking.py +0 -0
  13. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/cli.py +0 -0
  14. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/embeddings.py +0 -0
  15. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/env.py +0 -0
  16. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/hecvec.py +0 -0
  17. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/listdir.py +0 -0
  18. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/reading.py +0 -0
  19. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/run_llm_chunk.py +0 -0
  20. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/run_semantic_chunk.py +0 -0
  21. {hecvec-0.4.4 → hecvec-0.4.6}/src/hecvec/token_splitter.py +0 -0
  22. {hecvec-0.4.4 → hecvec-0.4.6}/tests/conftest.py +0 -0
  23. {hecvec-0.4.4 → hecvec-0.4.6}/tests/test_env.py +0 -0
  24. {hecvec-0.4.4 → hecvec-0.4.6}/tests/test_listdir.py +0 -0
  25. {hecvec-0.4.4 → hecvec-0.4.6}/tests/test_reading.py +0 -0
  26. {hecvec-0.4.4 → hecvec-0.4.6}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hecvec
3
- Version: 0.4.4
3
+ Version: 0.4.6
4
4
  Summary: List directories (safe root), filter .txt/.md files, read as text, chunk, embed, and push to Chroma.
5
5
  License-Expression: MIT
6
6
  Keywords: chunking,document-pipeline,listdir,text-files
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hecvec"
7
- version = "0.4.4"
7
+ version = "0.4.6"
8
8
  description = "List directories (safe root), filter .txt/.md files, read as text, chunk, embed, and push to Chroma."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9,<3.14"
@@ -35,4 +35,4 @@ __all__ = [
35
35
  "__version__",
36
36
  ]
37
37
 
38
- __version__ = "0.4.4"
38
+ __version__ = "0.4.6"
@@ -57,6 +57,8 @@ class Slicer:
57
57
  collection_name: str = "hecvec",
58
58
  chroma_host: str = "localhost",
59
59
  chroma_port: int = 8000,
60
+ chroma_mode: ChromaMode = "auto",
61
+ chroma_persist_path: str | Path | None = None,
60
62
  embedding_model: str = "text-embedding-3-small",
61
63
  chunk_size: int = 200,
62
64
  chunk_overlap: int = 0,
@@ -215,7 +217,7 @@ class Slicer:
215
217
 
216
218
  # 5/5 Push to Chroma
217
219
  stage_start = perf_counter()
218
- logger.info("[5/5] Writing to Chroma | host=%s | port=%s | collection=%s", chroma_host, chroma_port, collection_name)
220
+ logger.info("[5/5] Chroma | host=%s | port=%s | collection=%s", chroma_host, chroma_port, collection_name)
219
221
  client, chroma_mode_used = get_client(
220
222
  host=chroma_host,
221
223
  port=chroma_port,
@@ -223,11 +225,11 @@ class Slicer:
223
225
  mode=chroma_mode,
224
226
  )
225
227
  if chroma_mode_used == "server":
226
- logger.info("[5/5] Using Chroma server at %s:%s", chroma_host, chroma_port)
228
+ logger.info("[5/5] Chroma: connected to server at %s:%s (data can persist)", chroma_host, chroma_port)
227
229
  elif chroma_mode_used == "persistent":
228
- logger.info("[5/5] Using Chroma persistent storage at %s", chroma_persist_path)
230
+ logger.info("[5/5] Chroma: using persistent storage at %s (data persists on disk)", chroma_persist_path)
229
231
  else:
230
- logger.info("[5/5] Using Chroma in-memory (ephemeral) client")
232
+ logger.info("[5/5] Chroma: not connected to server; using in-memory (ephemeral). Data is lost when this process exits.")
231
233
  logger.info("[5/5] Adding %d document chunk(s) to collection...", len(documents))
232
234
  add_result = add_documents(client, collection_name, ids, embeddings, documents)
233
235
  if add_result["collection_existed"]:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes