haiku.rag 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/app.py CHANGED
@@ -474,9 +474,7 @@ class HaikuRAGApp:
474
474
 
475
475
  # Start file monitor if enabled
476
476
  if enable_monitor:
477
- monitor = FileWatcher(
478
- paths=Config.storage.monitor_directories, client=client
479
- )
477
+ monitor = FileWatcher(client=client)
480
478
  monitor_task = asyncio.create_task(monitor.observe())
481
479
  tasks.append(monitor_task)
482
480
 
@@ -11,6 +11,7 @@ from haiku.rag.config.models import (
11
11
  AppConfig,
12
12
  EmbeddingsConfig,
13
13
  LanceDBConfig,
14
+ MonitorConfig,
14
15
  OllamaConfig,
15
16
  ProcessingConfig,
16
17
  ProvidersConfig,
@@ -25,6 +26,7 @@ __all__ = [
25
26
  "Config",
26
27
  "AppConfig",
27
28
  "StorageConfig",
29
+ "MonitorConfig",
28
30
  "LanceDBConfig",
29
31
  "EmbeddingsConfig",
30
32
  "RerankingConfig",
@@ -10,7 +10,7 @@ def find_config_file(cli_path: Path | None = None) -> Path | None:
10
10
  Search order:
11
11
  1. CLI-provided path (via HAIKU_RAG_CONFIG_PATH env var or parameter)
12
12
  2. ./haiku.rag.yaml (current directory)
13
- 3. ~/.config/haiku.rag/config.yaml (user config)
13
+ 3. Platform-specific user config directory
14
14
 
15
15
  Returns None if no config file is found.
16
16
  """
@@ -29,8 +29,10 @@ def find_config_file(cli_path: Path | None = None) -> Path | None:
29
29
  if cwd_config.exists():
30
30
  return cwd_config
31
31
 
32
- user_config_dir = Path.home() / ".config" / "haiku.rag"
33
- user_config = user_config_dir / "config.yaml"
32
+ # Use same directory as data storage for config
33
+ from haiku.rag.utils import get_default_data_dir
34
+
35
+ user_config = get_default_data_dir() / "config.yaml"
34
36
  if user_config.exists():
35
37
  return user_config
36
38
 
@@ -50,10 +52,14 @@ def generate_default_config() -> dict:
50
52
  "environment": "production",
51
53
  "storage": {
52
54
  "data_dir": "",
53
- "monitor_directories": [],
54
55
  "disable_autocreate": False,
55
56
  "vacuum_retention_seconds": 60,
56
57
  },
58
+ "monitor": {
59
+ "directories": [],
60
+ "ignore_patterns": [],
61
+ "include_patterns": [],
62
+ },
57
63
  "lancedb": {"uri": "", "api_key": "", "region": ""},
58
64
  "embeddings": {
59
65
  "provider": "ollama",
@@ -88,7 +94,7 @@ def load_config_from_env() -> dict:
88
94
  env_mappings = {
89
95
  "ENV": "environment",
90
96
  "DEFAULT_DATA_DIR": ("storage", "data_dir"),
91
- "MONITOR_DIRECTORIES": ("storage", "monitor_directories"),
97
+ "MONITOR_DIRECTORIES": ("monitor", "directories"),
92
98
  "DISABLE_DB_AUTOCREATE": ("storage", "disable_autocreate"),
93
99
  "VACUUM_RETENTION_SECONDS": ("storage", "vacuum_retention_seconds"),
94
100
  "LANCEDB_URI": ("lancedb", "uri"),
@@ -7,11 +7,16 @@ from haiku.rag.utils import get_default_data_dir
7
7
 
8
8
  class StorageConfig(BaseModel):
9
9
  data_dir: Path = Field(default_factory=get_default_data_dir)
10
- monitor_directories: list[Path] = []
11
10
  disable_autocreate: bool = False
12
11
  vacuum_retention_seconds: int = 60
13
12
 
14
13
 
14
+ class MonitorConfig(BaseModel):
15
+ directories: list[Path] = []
16
+ ignore_patterns: list[str] = []
17
+ include_patterns: list[str] = []
18
+
19
+
15
20
  class LanceDBConfig(BaseModel):
16
21
  uri: str = ""
17
22
  api_key: str = ""
@@ -72,6 +77,7 @@ class A2AConfig(BaseModel):
72
77
  class AppConfig(BaseModel):
73
78
  environment: str = "production"
74
79
  storage: StorageConfig = Field(default_factory=StorageConfig)
80
+ monitor: MonitorConfig = Field(default_factory=MonitorConfig)
75
81
  lancedb: LanceDBConfig = Field(default_factory=LanceDBConfig)
76
82
  embeddings: EmbeddingsConfig = Field(default_factory=EmbeddingsConfig)
77
83
  reranking: RerankingConfig = Field(default_factory=RerankingConfig)
haiku/rag/monitor.py CHANGED
@@ -2,9 +2,12 @@ import logging
2
2
  from pathlib import Path
3
3
  from typing import TYPE_CHECKING
4
4
 
5
+ import pathspec
6
+ from pathspec.patterns.gitwildmatch import GitWildMatchPattern
5
7
  from watchfiles import Change, DefaultFilter, awatch
6
8
 
7
9
  from haiku.rag.client import HaikuRAG
10
+ from haiku.rag.config import AppConfig, Config
8
11
  from haiku.rag.store.models.document import Document
9
12
 
10
13
  if TYPE_CHECKING:
@@ -14,25 +17,63 @@ logger = logging.getLogger(__name__)
14
17
 
15
18
 
16
19
  class FileFilter(DefaultFilter):
17
- def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
20
+ def __init__(
21
+ self,
22
+ *,
23
+ ignore_patterns: list[str] | None = None,
24
+ include_patterns: list[str] | None = None,
25
+ ) -> None:
18
26
  # Lazy import to avoid loading docling
19
27
  from haiku.rag.reader import FileReader
20
28
 
21
29
  self.extensions = tuple(FileReader.extensions)
22
- super().__init__(ignore_paths=ignore_paths)
30
+ self.ignore_spec = (
31
+ pathspec.PathSpec.from_lines(GitWildMatchPattern, ignore_patterns)
32
+ if ignore_patterns
33
+ else None
34
+ )
35
+ self.include_spec = (
36
+ pathspec.PathSpec.from_lines(GitWildMatchPattern, include_patterns)
37
+ if include_patterns
38
+ else None
39
+ )
40
+ super().__init__()
23
41
 
24
42
  def __call__(self, change: Change, path: str) -> bool:
25
- return path.endswith(self.extensions) and super().__call__(change, path)
43
+ # Check extension filter
44
+ if not path.endswith(self.extensions):
45
+ return False
46
+
47
+ # Apply include patterns if specified (whitelist mode)
48
+ if self.include_spec:
49
+ if not self.include_spec.match_file(path):
50
+ return False
51
+
52
+ # Apply ignore patterns (blacklist mode)
53
+ if self.ignore_spec:
54
+ if self.ignore_spec.match_file(path):
55
+ return False
56
+
57
+ # Apply default watchfiles filter
58
+ return super().__call__(change, path)
26
59
 
27
60
 
28
61
  class FileWatcher:
29
- def __init__(self, paths: list[Path], client: HaikuRAG):
30
- self.paths = paths
62
+ def __init__(
63
+ self,
64
+ client: HaikuRAG,
65
+ config: AppConfig = Config,
66
+ ):
67
+ self.paths = config.monitor.directories
31
68
  self.client = client
69
+ self.ignore_patterns = config.monitor.ignore_patterns or None
70
+ self.include_patterns = config.monitor.include_patterns or None
32
71
 
33
72
  async def observe(self):
34
73
  logger.info(f"Watching files in {self.paths}")
35
- filter = FileFilter()
74
+ filter = FileFilter(
75
+ ignore_patterns=self.ignore_patterns, include_patterns=self.include_patterns
76
+ )
36
77
  await self.refresh()
37
78
 
38
79
  async for changes in awatch(*self.paths, watch_filter=filter):
@@ -49,10 +90,17 @@ class FileWatcher:
49
90
  # Lazy import to avoid loading docling
50
91
  from haiku.rag.reader import FileReader
51
92
 
93
+ # Create filter to apply same logic as observe()
94
+ filter = FileFilter(
95
+ ignore_patterns=self.ignore_patterns, include_patterns=self.include_patterns
96
+ )
97
+
52
98
  for path in self.paths:
53
99
  for f in Path(path).rglob("**/*"):
54
100
  if f.is_file() and f.suffix in FileReader.extensions:
55
- await self._upsert_document(f)
101
+ # Apply pattern filters
102
+ if filter(Change.added, str(f)):
103
+ await self._upsert_document(f)
56
104
 
57
105
  async def _upsert_document(self, file: Path) -> Document | None:
58
106
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.13.1
3
+ Version: 0.13.2
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -17,19 +17,20 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Typing :: Typed
19
19
  Requires-Python: >=3.12
20
- Requires-Dist: docling>=2.56.1
21
- Requires-Dist: fastmcp>=2.12.4
20
+ Requires-Dist: docling>=2.58.0
21
+ Requires-Dist: fastmcp>=2.13.0.2
22
22
  Requires-Dist: httpx>=0.28.1
23
23
  Requires-Dist: lancedb>=0.25.2
24
- Requires-Dist: pydantic-ai>=1.0.18
25
- Requires-Dist: pydantic-graph>=1.0.18
26
- Requires-Dist: pydantic>=2.12.2
27
- Requires-Dist: python-dotenv>=1.1.1
28
- Requires-Dist: pyyaml>=6.0.1
24
+ Requires-Dist: pathspec>=0.12.1
25
+ Requires-Dist: pydantic-ai>=1.7.0
26
+ Requires-Dist: pydantic-graph>=1.7.0
27
+ Requires-Dist: pydantic>=2.12.3
28
+ Requires-Dist: python-dotenv>=1.2.1
29
+ Requires-Dist: pyyaml>=6.0.3
29
30
  Requires-Dist: rich>=14.2.0
30
31
  Requires-Dist: tiktoken>=0.12.0
31
- Requires-Dist: typer>=0.19.2
32
- Requires-Dist: watchfiles>=1.1.0
32
+ Requires-Dist: typer<0.20.0,>=0.19.2
33
+ Requires-Dist: watchfiles>=1.1.1
33
34
  Provides-Extra: a2a
34
35
  Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
35
36
  Provides-Extra: mxbai
@@ -1,11 +1,11 @@
1
1
  haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- haiku/rag/app.py,sha256=lNPYYWQLOPlOLYiwIR4yQuwzl-LrRNQn7n2nacgdq_k,21594
2
+ haiku/rag/app.py,sha256=hjHZoXfMvb2Pvd4sSdzQVq759zO7UGR9YuyXXA6ZbDI,21514
3
3
  haiku/rag/chunker.py,sha256=pA0S0fFKAuvzGm2dGyp7FAkeFZA0YTCm_ata83Pnflw,1566
4
4
  haiku/rag/cli.py,sha256=Y42tnlVFGvCZVjBcLWrIVgM0A7KjNYX9MAuk9-zQvvE,14523
5
5
  haiku/rag/client.py,sha256=cG6DAhzJJ4vdo8QFn9p8iA6YTa0arMrTtIswoZc7sY0,26816
6
6
  haiku/rag/logging.py,sha256=dm65AwADpcQsH5OAPtRA-4hsw0w5DK-sGOvzYkj6jzw,1720
7
7
  haiku/rag/mcp.py,sha256=txuEnrUMWvs_shQBk15gEkJD7xNdSYzp3z75UUWaHFM,9328
8
- haiku/rag/monitor.py,sha256=d92oRufhI8oYXH7oF6oYVf1_AcpFUafjM6tl4VhAupI,3322
8
+ haiku/rag/monitor.py,sha256=EGQ-sKH1uKBSHtglehuW1COytx-4zKCKAT198tTr0Fo,4889
9
9
  haiku/rag/reader.py,sha256=aW8LG0X31kVWS7kU2tKVpe8RqP3Ne_oIidd_X3UDLH0,3307
10
10
  haiku/rag/utils.py,sha256=47ehVYJlLz6Of_Ua89qj94JclO5ZPBFU9eyonifvnVg,6131
11
11
  haiku/rag/a2a/__init__.py,sha256=tY_jLSUM0zKzyBctMkjpqmDWpxWc9QVEK1qAsb-plGs,5933
@@ -16,9 +16,9 @@ haiku/rag/a2a/prompts.py,sha256=yCla8x0hbOhKrkuaqVrF1upn-YjQM3-2NsE2TSnet0M,3030
16
16
  haiku/rag/a2a/skills.py,sha256=dwyD2Bn493eL3Vf4uQzmyxj_9IUSb66kQ-085FBAuCs,2701
17
17
  haiku/rag/a2a/storage.py,sha256=c8vmGCiZ3nuV9wUuTnwpoRD2HVVvK2JPySQOc5PVMvg,2759
18
18
  haiku/rag/a2a/worker.py,sha256=S9hiA1ncpJPdtN0eEmMjsvr5LQ4wMVN5R8CjYkTeohU,12367
19
- haiku/rag/config/__init__.py,sha256=PSHsc7gXjvRxpzN4rxR083-WYU-pocqm0hf2uhkr9Vw,1019
20
- haiku/rag/config/loader.py,sha256=eWkD8uVTa19nf7d7yyZImk7t5k0-SagYH4RSBqfkPxQ,4848
21
- haiku/rag/config/models.py,sha256=vkq2WyJfuY1cm8YEFlox0Cd8sVyXb4l1XX2fkBjI6I4,2169
19
+ haiku/rag/config/__init__.py,sha256=1AiYIX9fiVvi4nKlRnCf18ywFHQECZ-mXXvOIRYfGSU,1059
20
+ haiku/rag/config/loader.py,sha256=P2vKmk1fi9O2fbQAyWAq5SqLaFBAq_sbVr8lqi8YtlY,4982
21
+ haiku/rag/config/models.py,sha256=yySklxjYODF5Tj6xTQogG8LTjNaKnO8SLwqdeP6buKY,2334
22
22
  haiku/rag/embeddings/__init__.py,sha256=zwWRU9S5YGEJxlgPv5haHBgj3LUJMe-dEwr3LKLa9RY,1731
23
23
  haiku/rag/embeddings/base.py,sha256=kzca54e2HGzS_0YKt9OLESM9lrFKpBm_97V07jx0aas,783
24
24
  haiku/rag/embeddings/ollama.py,sha256=_uIIObbZX9QVU1lcgWQFooA3b-AeZRNncM7yQ2TxlEU,825
@@ -70,8 +70,8 @@ haiku/rag/store/repositories/settings.py,sha256=15gS7Xj7cG4qetv_ioxZO_r31by7GuSq
70
70
  haiku/rag/store/upgrades/__init__.py,sha256=RQ8A6rEXBASLb5PD9vdDnEas_m_GgRzzdVu4B88Snqc,1975
71
71
  haiku/rag/store/upgrades/v0_10_1.py,sha256=qNGnxj6hoHaHJ1rKTiALfw0c9NQOi0KAK-VZCD_073A,1959
72
72
  haiku/rag/store/upgrades/v0_9_3.py,sha256=NrjNilQSgDtFWRbL3ZUtzQzJ8tf9u0dDRJtnDFwwbdw,3322
73
- haiku_rag-0.13.1.dist-info/METADATA,sha256=xoojNWUahlMw6gWdujYr_VNti4ss4We0mL0rkTOkxgo,8139
74
- haiku_rag-0.13.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
75
- haiku_rag-0.13.1.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
76
- haiku_rag-0.13.1.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
77
- haiku_rag-0.13.1.dist-info/RECORD,,
73
+ haiku_rag-0.13.2.dist-info/METADATA,sha256=YyvjWsHy7Y8DX8-spZcc7wzUsb7yn2okaMS3SiWw2Nk,8179
74
+ haiku_rag-0.13.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
75
+ haiku_rag-0.13.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
76
+ haiku_rag-0.13.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
77
+ haiku_rag-0.13.2.dist-info/RECORD,,