chunksilo 2.1.0__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chunksilo might be problematic. Click here for more details.
- {chunksilo-2.1.0/src/chunksilo.egg-info → chunksilo-2.1.2}/PKG-INFO +2 -3
- {chunksilo-2.1.0 → chunksilo-2.1.2}/README.md +1 -1
- {chunksilo-2.1.0 → chunksilo-2.1.2}/pyproject.toml +1 -1
- {chunksilo-2.1.0 → chunksilo-2.1.2}/requirements.txt +0 -1
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/__init__.py +1 -1
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/search.py +54 -27
- {chunksilo-2.1.0 → chunksilo-2.1.2/src/chunksilo.egg-info}/PKG-INFO +2 -3
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/requires.txt +0 -1
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_rag_metrics.py +0 -3
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_retrieval_only.py +0 -3
- {chunksilo-2.1.0 → chunksilo-2.1.2}/LICENSE +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/NOTICE +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/setup.cfg +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/__main__.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/cfgload.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/cli.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/confluence_html_formatter.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/index.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/server.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/SOURCES.txt +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/dependency_links.txt +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/entry_points.txt +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/top_level.txt +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_chunk_location.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_confluence_html_formatter.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_error_handling.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_heading_path_integration.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_incremental_ingest.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_jira_integration.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_system.py +0 -0
- {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chunksilo
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: Local RAG-based semantic document search with MCP server interface
|
|
5
5
|
Author: Fredrik Reveny
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,7 +26,6 @@ Requires-Dist: pillow<11,>=10.3.0
|
|
|
26
26
|
Requires-Dist: pypdf<7,>=5.1.0
|
|
27
27
|
Requires-Dist: python-docx<2,>=1.1.0
|
|
28
28
|
Requires-Dist: mcp<2,>=1.0.0
|
|
29
|
-
Requires-Dist: python-dotenv<2,>=1.0.0
|
|
30
29
|
Requires-Dist: huggingface-hub<2,>=0.22.0
|
|
31
30
|
Requires-Dist: flashrank<1,>=0.1.0
|
|
32
31
|
Requires-Dist: fastembed<1,>=0.5.0
|
|
@@ -41,7 +40,7 @@ Requires-Dist: requests<3,>=2.31.0; extra == "test"
|
|
|
41
40
|
Dynamic: license-file
|
|
42
41
|
|
|
43
42
|
<p align="center">
|
|
44
|
-
<img src="chunksilo.png" alt="ChunkSilo Logo" width="500">
|
|
43
|
+
<img src="https://raw.githubusercontent.com/Chetic/chunksilo/main/chunksilo.png" alt="ChunkSilo Logo" width="500">
|
|
45
44
|
</p>
|
|
46
45
|
|
|
47
46
|
# ChunkSilo MCP Server
|
|
@@ -738,8 +738,8 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
|
|
|
738
738
|
|
|
739
739
|
Configuration Requirements:
|
|
740
740
|
config["jira"]["url"]: Jira base URL (empty = disabled)
|
|
741
|
-
config["jira"]["username"]: Jira username
|
|
742
|
-
config["jira"]["api_token"]:
|
|
741
|
+
config["jira"]["username"]: Jira username/email (required for Cloud, optional for Server PAT)
|
|
742
|
+
config["jira"]["api_token"]: API token (Cloud) or Personal Access Token (Server/Data Center)
|
|
743
743
|
config["jira"]["max_results"]: Maximum issues to return
|
|
744
744
|
config["jira"]["projects"]: List of project keys (empty = all)
|
|
745
745
|
config["jira"]["include_comments"]: Include issue comments
|
|
@@ -763,8 +763,12 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
|
|
|
763
763
|
- Automatically configured through jira_options["verify"]
|
|
764
764
|
|
|
765
765
|
Authentication:
|
|
766
|
-
-
|
|
767
|
-
|
|
766
|
+
- Jira Cloud: Set both username (email) and api_token (API token)
|
|
767
|
+
Uses basic auth internally
|
|
768
|
+
- Jira Server/Data Center with PAT: Set only api_token (Personal Access Token)
|
|
769
|
+
Leave username empty; uses bearer token auth internally
|
|
770
|
+
- Jira Server/Data Center with password: Set username and api_token (password)
|
|
771
|
+
Uses basic auth internally (if basic auth enabled on server)
|
|
768
772
|
|
|
769
773
|
References:
|
|
770
774
|
- Jira REST API: https://developer.atlassian.com/cloud/jira/platform/rest/v3/
|
|
@@ -790,13 +794,10 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
|
|
|
790
794
|
ca_bundle_path = config["ssl"]["ca_bundle_path"] or None
|
|
791
795
|
|
|
792
796
|
# Validate required credentials are present
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
if not api_token:
|
|
798
|
-
missing.append("jira.api_token")
|
|
799
|
-
logger.warning(f"Jira search skipped: missing {', '.join(missing)} in config")
|
|
797
|
+
# For Jira Cloud: both username and api_token required (basic auth)
|
|
798
|
+
# For Jira Server/Data Center with PAT: only api_token required (token auth)
|
|
799
|
+
if not api_token:
|
|
800
|
+
logger.warning("Jira search skipped: missing jira.api_token in config")
|
|
800
801
|
return []
|
|
801
802
|
|
|
802
803
|
try:
|
|
@@ -806,12 +807,23 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
|
|
|
806
807
|
if ca_bundle_path:
|
|
807
808
|
jira_options["verify"] = ca_bundle_path
|
|
808
809
|
|
|
809
|
-
#
|
|
810
|
-
#
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
810
|
+
# Choose authentication method based on credentials provided:
|
|
811
|
+
# - Username + API token: Use basic auth (Jira Cloud, or Server with password)
|
|
812
|
+
# - API token only: Use token auth (Jira Server/Data Center with PAT)
|
|
813
|
+
if username:
|
|
814
|
+
# Basic auth for Jira Cloud (username + API token)
|
|
815
|
+
# Also works for Jira Server with username + password
|
|
816
|
+
jira_client = JIRA(
|
|
817
|
+
options=jira_options,
|
|
818
|
+
basic_auth=(username, api_token)
|
|
819
|
+
)
|
|
820
|
+
else:
|
|
821
|
+
# Token auth for Jira Server/Data Center Personal Access Tokens (PAT)
|
|
822
|
+
# PATs are used alone without a username
|
|
823
|
+
jira_client = JIRA(
|
|
824
|
+
options=jira_options,
|
|
825
|
+
token_auth=api_token
|
|
826
|
+
)
|
|
815
827
|
|
|
816
828
|
# Construct JQL with text search and project filtering
|
|
817
829
|
jql = _prepare_jira_jql_query(query, config)
|
|
@@ -1116,6 +1128,7 @@ def run_search(
|
|
|
1116
1128
|
rerank_request = RerankRequest(query=enhanced_query, passages=passages)
|
|
1117
1129
|
reranked_results = reranker.rerank(rerank_request)
|
|
1118
1130
|
|
|
1131
|
+
# Build text-to-node mapping for fallback text matching
|
|
1119
1132
|
text_to_indices: dict[str, list[tuple[int, NodeWithScore]]] = {}
|
|
1120
1133
|
for idx, node in enumerate(nodes):
|
|
1121
1134
|
node_text = node.node.get_content() or ""
|
|
@@ -1126,20 +1139,34 @@ def run_search(
|
|
|
1126
1139
|
reranked_nodes = []
|
|
1127
1140
|
seen_indices: set[int] = set()
|
|
1128
1141
|
for result in reranked_results:
|
|
1129
|
-
doc_text = result.get("text", "")
|
|
1130
1142
|
score = result.get("score", 0.0)
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1143
|
+
result_idx = result.get("id")
|
|
1144
|
+
|
|
1145
|
+
# Primary: match by index (flashrank returns original passage index)
|
|
1146
|
+
if result_idx is not None and 0 <= result_idx < len(nodes):
|
|
1147
|
+
if result_idx not in seen_indices:
|
|
1148
|
+
node = nodes[result_idx]
|
|
1149
|
+
reranked_nodes.append(node)
|
|
1150
|
+
rerank_scores[id(node)] = float(score)
|
|
1151
|
+
seen_indices.add(result_idx)
|
|
1152
|
+
else:
|
|
1153
|
+
# Fallback: match by text content
|
|
1154
|
+
doc_text = result.get("text", "")
|
|
1155
|
+
if doc_text in text_to_indices:
|
|
1156
|
+
for idx, node in text_to_indices[doc_text]:
|
|
1157
|
+
if idx not in seen_indices:
|
|
1158
|
+
reranked_nodes.append(node)
|
|
1159
|
+
rerank_scores[id(node)] = float(score)
|
|
1160
|
+
seen_indices.add(idx)
|
|
1161
|
+
break
|
|
1162
|
+
|
|
1163
|
+
# Add remaining unmatched nodes with minimum matched score
|
|
1164
|
+
# This ensures Jira/Confluence results aren't dropped due to text mismatch
|
|
1165
|
+
min_score = min(rerank_scores.values()) if rerank_scores else 0.0
|
|
1140
1166
|
for idx, node in enumerate(nodes):
|
|
1141
1167
|
if idx not in seen_indices:
|
|
1142
1168
|
reranked_nodes.append(node)
|
|
1169
|
+
rerank_scores[id(node)] = min_score
|
|
1143
1170
|
|
|
1144
1171
|
nodes = reranked_nodes[:rerank_limit]
|
|
1145
1172
|
except Exception as e:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chunksilo
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: Local RAG-based semantic document search with MCP server interface
|
|
5
5
|
Author: Fredrik Reveny
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,7 +26,6 @@ Requires-Dist: pillow<11,>=10.3.0
|
|
|
26
26
|
Requires-Dist: pypdf<7,>=5.1.0
|
|
27
27
|
Requires-Dist: python-docx<2,>=1.1.0
|
|
28
28
|
Requires-Dist: mcp<2,>=1.0.0
|
|
29
|
-
Requires-Dist: python-dotenv<2,>=1.0.0
|
|
30
29
|
Requires-Dist: huggingface-hub<2,>=0.22.0
|
|
31
30
|
Requires-Dist: flashrank<1,>=0.1.0
|
|
32
31
|
Requires-Dist: fastembed<1,>=0.5.0
|
|
@@ -41,7 +40,7 @@ Requires-Dist: requests<3,>=2.31.0; extra == "test"
|
|
|
41
40
|
Dynamic: license-file
|
|
42
41
|
|
|
43
42
|
<p align="center">
|
|
44
|
-
<img src="chunksilo.png" alt="ChunkSilo Logo" width="500">
|
|
43
|
+
<img src="https://raw.githubusercontent.com/Chetic/chunksilo/main/chunksilo.png" alt="ChunkSilo Logo" width="500">
|
|
45
44
|
</p>
|
|
46
45
|
|
|
47
46
|
# ChunkSilo MCP Server
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
"""Test the RAG system in retrieval-only mode (no LLM in the MCP server)."""
|
|
3
3
|
import traceback
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from dotenv import load_dotenv
|
|
6
5
|
import pytest
|
|
7
6
|
|
|
8
7
|
from chunksilo.index import load_index_config, build_index
|
|
@@ -11,8 +10,6 @@ from chunksilo.cfgload import load_config
|
|
|
11
10
|
|
|
12
11
|
STORAGE_DIR = Path(load_config()["storage"]["storage_dir"])
|
|
13
12
|
|
|
14
|
-
load_dotenv()
|
|
15
|
-
|
|
16
13
|
|
|
17
14
|
def test_ingestion():
|
|
18
15
|
"""Test the ingestion pipeline."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|