haiku.rag 0.13.1__tar.gz → 0.13.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/PKG-INFO +11 -10
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/pyproject.toml +18 -17
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/app.py +1 -3
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/config/__init__.py +2 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/config/loader.py +11 -5
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/config/models.py +7 -1
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/monitor.py +55 -7
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/uv.lock +366 -307
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/.dockerignore +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/.gitignore +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/.python-version +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/LICENSE +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/README.md +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/mkdocs.yml +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/server.json +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/benchmark.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/config.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/datasets/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/datasets/repliqa.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/datasets/wix.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/llm_judge.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/evaluations/prompts.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/client.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/context.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/models.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/prompts.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/skills.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/storage.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/a2a/worker.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/cli.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/client.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/base.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/common.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/models.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/nodes/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/nodes/analysis.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/nodes/plan.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/nodes/search.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/graph/prompts.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/agent.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/dependencies.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/graph.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/models.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/nodes.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/prompts.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/deep/state.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/common.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/dependencies.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/graph.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/models.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/prompts.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/state.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/research/stream.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/repositories/chunk.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
- {haiku_rag-0.13.1 → haiku_rag-0.13.2}/src/haiku/rag/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.2
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -17,19 +17,20 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
18
|
Classifier: Typing :: Typed
|
|
19
19
|
Requires-Python: >=3.12
|
|
20
|
-
Requires-Dist: docling>=2.
|
|
21
|
-
Requires-Dist: fastmcp>=2.
|
|
20
|
+
Requires-Dist: docling>=2.58.0
|
|
21
|
+
Requires-Dist: fastmcp>=2.13.0.2
|
|
22
22
|
Requires-Dist: httpx>=0.28.1
|
|
23
23
|
Requires-Dist: lancedb>=0.25.2
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist: pydantic-
|
|
26
|
-
Requires-Dist: pydantic>=
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: pathspec>=0.12.1
|
|
25
|
+
Requires-Dist: pydantic-ai>=1.7.0
|
|
26
|
+
Requires-Dist: pydantic-graph>=1.7.0
|
|
27
|
+
Requires-Dist: pydantic>=2.12.3
|
|
28
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
29
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
29
30
|
Requires-Dist: rich>=14.2.0
|
|
30
31
|
Requires-Dist: tiktoken>=0.12.0
|
|
31
|
-
Requires-Dist: typer
|
|
32
|
-
Requires-Dist: watchfiles>=1.1.
|
|
32
|
+
Requires-Dist: typer<0.20.0,>=0.19.2
|
|
33
|
+
Requires-Dist: watchfiles>=1.1.1
|
|
33
34
|
Provides-Extra: a2a
|
|
34
35
|
Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
|
|
35
36
|
Provides-Extra: mxbai
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
name = "haiku.rag"
|
|
4
4
|
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
|
-
version = "0.13.
|
|
5
|
+
version = "0.13.2"
|
|
6
6
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
7
7
|
license = { text = "MIT" }
|
|
8
8
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -22,19 +22,20 @@ classifiers = [
|
|
|
22
22
|
]
|
|
23
23
|
|
|
24
24
|
dependencies = [
|
|
25
|
-
"docling>=2.
|
|
26
|
-
"fastmcp>=2.
|
|
25
|
+
"docling>=2.58.0",
|
|
26
|
+
"fastmcp>=2.13.0.2",
|
|
27
27
|
"httpx>=0.28.1",
|
|
28
28
|
"lancedb>=0.25.2",
|
|
29
|
-
"
|
|
30
|
-
"pydantic
|
|
31
|
-
"pydantic-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
29
|
+
"pathspec>=0.12.1",
|
|
30
|
+
"pydantic>=2.12.3",
|
|
31
|
+
"pydantic-ai>=1.7.0",
|
|
32
|
+
"pydantic-graph>=1.7.0",
|
|
33
|
+
"python-dotenv>=1.2.1",
|
|
34
|
+
"pyyaml>=6.0.3",
|
|
34
35
|
"rich>=14.2.0",
|
|
35
36
|
"tiktoken>=0.12.0",
|
|
36
|
-
"typer>=0.19.2",
|
|
37
|
-
"watchfiles>=1.1.
|
|
37
|
+
"typer>=0.19.2,<0.20.0",
|
|
38
|
+
"watchfiles>=1.1.1",
|
|
38
39
|
]
|
|
39
40
|
|
|
40
41
|
[project.optional-dependencies]
|
|
@@ -57,17 +58,17 @@ packages = ["src/haiku"]
|
|
|
57
58
|
|
|
58
59
|
[dependency-groups]
|
|
59
60
|
dev = [
|
|
60
|
-
"datasets>=4.
|
|
61
|
-
"logfire>=4.
|
|
61
|
+
"datasets>=4.3.0",
|
|
62
|
+
"logfire>=4.14.2",
|
|
62
63
|
"mkdocs>=1.6.1",
|
|
63
|
-
"mkdocs-material>=9.6.
|
|
64
|
-
"pydantic-evals>=1.0
|
|
65
|
-
"pre-commit>=4.
|
|
66
|
-
"pyright>=1.1.
|
|
64
|
+
"mkdocs-material>=9.6.22",
|
|
65
|
+
"pydantic-evals>=1.7.0",
|
|
66
|
+
"pre-commit>=4.3.0",
|
|
67
|
+
"pyright>=1.1.407",
|
|
67
68
|
"pytest>=8.4.2",
|
|
68
69
|
"pytest-asyncio>=1.2.0",
|
|
69
70
|
"pytest-cov>=7.0.0",
|
|
70
|
-
"ruff>=0.
|
|
71
|
+
"ruff>=0.14.2",
|
|
71
72
|
]
|
|
72
73
|
|
|
73
74
|
[tool.ruff]
|
|
@@ -474,9 +474,7 @@ class HaikuRAGApp:
|
|
|
474
474
|
|
|
475
475
|
# Start file monitor if enabled
|
|
476
476
|
if enable_monitor:
|
|
477
|
-
monitor = FileWatcher(
|
|
478
|
-
paths=Config.storage.monitor_directories, client=client
|
|
479
|
-
)
|
|
477
|
+
monitor = FileWatcher(client=client)
|
|
480
478
|
monitor_task = asyncio.create_task(monitor.observe())
|
|
481
479
|
tasks.append(monitor_task)
|
|
482
480
|
|
|
@@ -11,6 +11,7 @@ from haiku.rag.config.models import (
|
|
|
11
11
|
AppConfig,
|
|
12
12
|
EmbeddingsConfig,
|
|
13
13
|
LanceDBConfig,
|
|
14
|
+
MonitorConfig,
|
|
14
15
|
OllamaConfig,
|
|
15
16
|
ProcessingConfig,
|
|
16
17
|
ProvidersConfig,
|
|
@@ -25,6 +26,7 @@ __all__ = [
|
|
|
25
26
|
"Config",
|
|
26
27
|
"AppConfig",
|
|
27
28
|
"StorageConfig",
|
|
29
|
+
"MonitorConfig",
|
|
28
30
|
"LanceDBConfig",
|
|
29
31
|
"EmbeddingsConfig",
|
|
30
32
|
"RerankingConfig",
|
|
@@ -10,7 +10,7 @@ def find_config_file(cli_path: Path | None = None) -> Path | None:
|
|
|
10
10
|
Search order:
|
|
11
11
|
1. CLI-provided path (via HAIKU_RAG_CONFIG_PATH env var or parameter)
|
|
12
12
|
2. ./haiku.rag.yaml (current directory)
|
|
13
|
-
3.
|
|
13
|
+
3. Platform-specific user config directory
|
|
14
14
|
|
|
15
15
|
Returns None if no config file is found.
|
|
16
16
|
"""
|
|
@@ -29,8 +29,10 @@ def find_config_file(cli_path: Path | None = None) -> Path | None:
|
|
|
29
29
|
if cwd_config.exists():
|
|
30
30
|
return cwd_config
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
# Use same directory as data storage for config
|
|
33
|
+
from haiku.rag.utils import get_default_data_dir
|
|
34
|
+
|
|
35
|
+
user_config = get_default_data_dir() / "config.yaml"
|
|
34
36
|
if user_config.exists():
|
|
35
37
|
return user_config
|
|
36
38
|
|
|
@@ -50,10 +52,14 @@ def generate_default_config() -> dict:
|
|
|
50
52
|
"environment": "production",
|
|
51
53
|
"storage": {
|
|
52
54
|
"data_dir": "",
|
|
53
|
-
"monitor_directories": [],
|
|
54
55
|
"disable_autocreate": False,
|
|
55
56
|
"vacuum_retention_seconds": 60,
|
|
56
57
|
},
|
|
58
|
+
"monitor": {
|
|
59
|
+
"directories": [],
|
|
60
|
+
"ignore_patterns": [],
|
|
61
|
+
"include_patterns": [],
|
|
62
|
+
},
|
|
57
63
|
"lancedb": {"uri": "", "api_key": "", "region": ""},
|
|
58
64
|
"embeddings": {
|
|
59
65
|
"provider": "ollama",
|
|
@@ -88,7 +94,7 @@ def load_config_from_env() -> dict:
|
|
|
88
94
|
env_mappings = {
|
|
89
95
|
"ENV": "environment",
|
|
90
96
|
"DEFAULT_DATA_DIR": ("storage", "data_dir"),
|
|
91
|
-
"MONITOR_DIRECTORIES": ("
|
|
97
|
+
"MONITOR_DIRECTORIES": ("monitor", "directories"),
|
|
92
98
|
"DISABLE_DB_AUTOCREATE": ("storage", "disable_autocreate"),
|
|
93
99
|
"VACUUM_RETENTION_SECONDS": ("storage", "vacuum_retention_seconds"),
|
|
94
100
|
"LANCEDB_URI": ("lancedb", "uri"),
|
|
@@ -7,11 +7,16 @@ from haiku.rag.utils import get_default_data_dir
|
|
|
7
7
|
|
|
8
8
|
class StorageConfig(BaseModel):
|
|
9
9
|
data_dir: Path = Field(default_factory=get_default_data_dir)
|
|
10
|
-
monitor_directories: list[Path] = []
|
|
11
10
|
disable_autocreate: bool = False
|
|
12
11
|
vacuum_retention_seconds: int = 60
|
|
13
12
|
|
|
14
13
|
|
|
14
|
+
class MonitorConfig(BaseModel):
|
|
15
|
+
directories: list[Path] = []
|
|
16
|
+
ignore_patterns: list[str] = []
|
|
17
|
+
include_patterns: list[str] = []
|
|
18
|
+
|
|
19
|
+
|
|
15
20
|
class LanceDBConfig(BaseModel):
|
|
16
21
|
uri: str = ""
|
|
17
22
|
api_key: str = ""
|
|
@@ -72,6 +77,7 @@ class A2AConfig(BaseModel):
|
|
|
72
77
|
class AppConfig(BaseModel):
|
|
73
78
|
environment: str = "production"
|
|
74
79
|
storage: StorageConfig = Field(default_factory=StorageConfig)
|
|
80
|
+
monitor: MonitorConfig = Field(default_factory=MonitorConfig)
|
|
75
81
|
lancedb: LanceDBConfig = Field(default_factory=LanceDBConfig)
|
|
76
82
|
embeddings: EmbeddingsConfig = Field(default_factory=EmbeddingsConfig)
|
|
77
83
|
reranking: RerankingConfig = Field(default_factory=RerankingConfig)
|
|
@@ -2,9 +2,12 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
|
+
import pathspec
|
|
6
|
+
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
|
|
5
7
|
from watchfiles import Change, DefaultFilter, awatch
|
|
6
8
|
|
|
7
9
|
from haiku.rag.client import HaikuRAG
|
|
10
|
+
from haiku.rag.config import AppConfig, Config
|
|
8
11
|
from haiku.rag.store.models.document import Document
|
|
9
12
|
|
|
10
13
|
if TYPE_CHECKING:
|
|
@@ -14,25 +17,63 @@ logger = logging.getLogger(__name__)
|
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class FileFilter(DefaultFilter):
|
|
17
|
-
def __init__(
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
*,
|
|
23
|
+
ignore_patterns: list[str] | None = None,
|
|
24
|
+
include_patterns: list[str] | None = None,
|
|
25
|
+
) -> None:
|
|
18
26
|
# Lazy import to avoid loading docling
|
|
19
27
|
from haiku.rag.reader import FileReader
|
|
20
28
|
|
|
21
29
|
self.extensions = tuple(FileReader.extensions)
|
|
22
|
-
|
|
30
|
+
self.ignore_spec = (
|
|
31
|
+
pathspec.PathSpec.from_lines(GitWildMatchPattern, ignore_patterns)
|
|
32
|
+
if ignore_patterns
|
|
33
|
+
else None
|
|
34
|
+
)
|
|
35
|
+
self.include_spec = (
|
|
36
|
+
pathspec.PathSpec.from_lines(GitWildMatchPattern, include_patterns)
|
|
37
|
+
if include_patterns
|
|
38
|
+
else None
|
|
39
|
+
)
|
|
40
|
+
super().__init__()
|
|
23
41
|
|
|
24
42
|
def __call__(self, change: Change, path: str) -> bool:
|
|
25
|
-
|
|
43
|
+
# Check extension filter
|
|
44
|
+
if not path.endswith(self.extensions):
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
# Apply include patterns if specified (whitelist mode)
|
|
48
|
+
if self.include_spec:
|
|
49
|
+
if not self.include_spec.match_file(path):
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
# Apply ignore patterns (blacklist mode)
|
|
53
|
+
if self.ignore_spec:
|
|
54
|
+
if self.ignore_spec.match_file(path):
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
# Apply default watchfiles filter
|
|
58
|
+
return super().__call__(change, path)
|
|
26
59
|
|
|
27
60
|
|
|
28
61
|
class FileWatcher:
|
|
29
|
-
def __init__(
|
|
30
|
-
self
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
client: HaikuRAG,
|
|
65
|
+
config: AppConfig = Config,
|
|
66
|
+
):
|
|
67
|
+
self.paths = config.monitor.directories
|
|
31
68
|
self.client = client
|
|
69
|
+
self.ignore_patterns = config.monitor.ignore_patterns or None
|
|
70
|
+
self.include_patterns = config.monitor.include_patterns or None
|
|
32
71
|
|
|
33
72
|
async def observe(self):
|
|
34
73
|
logger.info(f"Watching files in {self.paths}")
|
|
35
|
-
filter = FileFilter(
|
|
74
|
+
filter = FileFilter(
|
|
75
|
+
ignore_patterns=self.ignore_patterns, include_patterns=self.include_patterns
|
|
76
|
+
)
|
|
36
77
|
await self.refresh()
|
|
37
78
|
|
|
38
79
|
async for changes in awatch(*self.paths, watch_filter=filter):
|
|
@@ -49,10 +90,17 @@ class FileWatcher:
|
|
|
49
90
|
# Lazy import to avoid loading docling
|
|
50
91
|
from haiku.rag.reader import FileReader
|
|
51
92
|
|
|
93
|
+
# Create filter to apply same logic as observe()
|
|
94
|
+
filter = FileFilter(
|
|
95
|
+
ignore_patterns=self.ignore_patterns, include_patterns=self.include_patterns
|
|
96
|
+
)
|
|
97
|
+
|
|
52
98
|
for path in self.paths:
|
|
53
99
|
for f in Path(path).rglob("**/*"):
|
|
54
100
|
if f.is_file() and f.suffix in FileReader.extensions:
|
|
55
|
-
|
|
101
|
+
# Apply pattern filters
|
|
102
|
+
if filter(Change.added, str(f)):
|
|
103
|
+
await self._upsert_document(f)
|
|
56
104
|
|
|
57
105
|
async def _upsert_document(self, file: Path) -> Document | None:
|
|
58
106
|
try:
|