contextinator 1.2.4__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {contextinator-1.2.4/src/contextinator.egg-info → contextinator-1.2.5}/PKG-INFO +2 -1
- {contextinator-1.2.4 → contextinator-1.2.5}/README.md +1 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/_version.py +3 -3
- contextinator-1.2.5/src/contextinator/tools/cat_file.py +110 -0
- {contextinator-1.2.4 → contextinator-1.2.5/src/contextinator.egg-info}/PKG-INFO +2 -1
- contextinator-1.2.4/src/contextinator/tools/cat_file.py +0 -85
- {contextinator-1.2.4 → contextinator-1.2.5}/.env.example +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/.github/RELEASE_PROCESS.md +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/.github/RELEASING.md +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/.github/workflows/release.yml +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/.gitignore +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/CODE_OF_CONDUCT.md +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/CONTRIBUTING.md +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/LICENSE +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/MANIFEST.in +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/USAGE.md +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/docker-compose.yml +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/docs/0banner.png +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/docs/banner.webp +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/pyproject.toml +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/setup.cfg +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/__main__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/ast_parser.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/ast_visualizer.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/chunk_service.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/context_builder.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/file_discovery.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/node_collector.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/notebook_parser.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/chunking/splitter.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/cli.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/config/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/config/settings.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/embedding/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/embedding/embedding_service.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/ingestion/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/ingestion/async_service.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/tools/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/tools/grep_search.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/tools/repo_structure.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/tools/semantic_search.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/tools/symbol_search.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/exceptions.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/hash_utils.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/logger.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/output_formatter.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/progress.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/repo_utils.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/rich_help.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/token_counter.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/utils/toon_encoder.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/vectorstore/__init__.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/vectorstore/async_chroma.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/vectorstore/chroma_store.py +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator.egg-info/SOURCES.txt +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator.egg-info/dependency_links.txt +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator.egg-info/entry_points.txt +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator.egg-info/requires.txt +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator.egg-info/top_level.txt +0 -0
- {contextinator-1.2.4 → contextinator-1.2.5}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: contextinator
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: Intelligent Codebase Understanding for AI Agents - Transform any codebase into semantically-aware, searchable knowledge
|
|
5
5
|
Author-email: STARTHACK Team <founders@starthack.io>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -85,6 +85,7 @@ Dynamic: license-file
|
|
|
85
85
|
Turn any codebase into semantically-aware, searchable knowledge for AI-powered workflows.
|
|
86
86
|
</p>
|
|
87
87
|
|
|
88
|
+
|
|
88
89
|
### Key Features
|
|
89
90
|
|
|
90
91
|
- **AST-Powered Chunking** - Extract functions, classes, and methods from 23+ programming languages
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '1.2.
|
|
32
|
-
__version_tuple__ = version_tuple = (1, 2,
|
|
31
|
+
__version__ = version = '1.2.5'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 2, 5)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g8ab564fe4'
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Cat file with TRUE async."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import hashlib
|
|
5
|
+
from typing import Dict, List, Optional
|
|
6
|
+
from ..utils.logger import logger
|
|
7
|
+
from ..config import USE_CHROMA_SERVER
|
|
8
|
+
|
|
9
|
+
_async_chroma_client = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def _get_async_chroma():
|
|
13
|
+
global _async_chroma_client
|
|
14
|
+
if _async_chroma_client is None:
|
|
15
|
+
if USE_CHROMA_SERVER:
|
|
16
|
+
from ..vectorstore.async_chroma import get_async_client
|
|
17
|
+
from ..config import CHROMA_SERVER_URL
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
parsed = urlparse(CHROMA_SERVER_URL)
|
|
21
|
+
_async_chroma_client = await get_async_client(
|
|
22
|
+
host=parsed.hostname or "localhost", port=parsed.port or 8000
|
|
23
|
+
)
|
|
24
|
+
else:
|
|
25
|
+
return None
|
|
26
|
+
return _async_chroma_client
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def cat_file(
|
|
30
|
+
collection_name: str, file_path: str, chromadb_dir: Optional[str] = None
|
|
31
|
+
) -> str:
|
|
32
|
+
"""TRUE async cat file."""
|
|
33
|
+
if not collection_name or not file_path:
|
|
34
|
+
raise ValueError("Collection name and file path required")
|
|
35
|
+
|
|
36
|
+
if file_path.startswith("/"):
|
|
37
|
+
file_path = file_path[1:]
|
|
38
|
+
|
|
39
|
+
client = await _get_async_chroma()
|
|
40
|
+
from ..config import sanitize_collection_name
|
|
41
|
+
|
|
42
|
+
collection = await client.get_collection(sanitize_collection_name(collection_name))
|
|
43
|
+
|
|
44
|
+
results = await collection.get(
|
|
45
|
+
where={"file_path": file_path}, include=["documents", "metadatas"]
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if not results["ids"]:
|
|
49
|
+
raise ValueError(f"File not found: {file_path}")
|
|
50
|
+
|
|
51
|
+
logger.debug(f"Found {len(results['ids'])} chunks for file: {file_path}")
|
|
52
|
+
|
|
53
|
+
# Parse and enrich chunk metadata
|
|
54
|
+
chunks = []
|
|
55
|
+
for i, (doc, meta) in enumerate(zip(results["documents"], results["metadatas"])):
|
|
56
|
+
chunk = {
|
|
57
|
+
"content": doc,
|
|
58
|
+
"start_line": meta.get("start_line", 0),
|
|
59
|
+
"end_line": meta.get("end_line", 0),
|
|
60
|
+
"split_index": meta.get("split_index", 0),
|
|
61
|
+
"parent_id": meta.get("parent_id"),
|
|
62
|
+
"is_split": meta.get("is_split", False),
|
|
63
|
+
"original_id": meta.get("original_id"),
|
|
64
|
+
"node_type": meta.get("node_type", "unknown"),
|
|
65
|
+
"chunk_id": results["ids"][i],
|
|
66
|
+
}
|
|
67
|
+
chunks.append(chunk)
|
|
68
|
+
|
|
69
|
+
# Sort chunks by start_line first, then by split_index, then by end_line
|
|
70
|
+
chunks.sort(key=lambda x: (x["start_line"], x["split_index"], x["end_line"]))
|
|
71
|
+
|
|
72
|
+
logger.debug(f"Chunk details: {[(c['start_line'], c['end_line'], c['is_split'], c['split_index'], len(c['content'])) for c in chunks]}")
|
|
73
|
+
|
|
74
|
+
# Reconstruct file
|
|
75
|
+
return _reconstruct_file(chunks)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _reconstruct_file(chunks: List[Dict]) -> str:
|
|
79
|
+
"""Deduplicate and concatenate chunks, removing nested duplicates."""
|
|
80
|
+
if not chunks:
|
|
81
|
+
return ""
|
|
82
|
+
|
|
83
|
+
# Sort by start_line, then by length (longer first)
|
|
84
|
+
chunks.sort(key=lambda x: (int(x.get('start_line', 0)), -int(x.get('end_line', 0))))
|
|
85
|
+
|
|
86
|
+
# Remove chunks that are completely contained in other chunks
|
|
87
|
+
unique = []
|
|
88
|
+
for c in chunks:
|
|
89
|
+
c_start = int(c.get('start_line', 0))
|
|
90
|
+
c_end = int(c.get('end_line', 0))
|
|
91
|
+
|
|
92
|
+
# Check if this chunk is contained in any already-kept chunk
|
|
93
|
+
is_contained = False
|
|
94
|
+
for kept in unique:
|
|
95
|
+
k_start = int(kept.get('start_line', 0))
|
|
96
|
+
k_end = int(kept.get('end_line', 0))
|
|
97
|
+
|
|
98
|
+
# If c is completely inside kept, skip it
|
|
99
|
+
if k_start <= c_start and c_end <= k_end and (k_start != c_start or k_end != c_end):
|
|
100
|
+
is_contained = True
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
if not is_contained:
|
|
104
|
+
unique.append(c)
|
|
105
|
+
|
|
106
|
+
# Concatenate with double newline between chunks
|
|
107
|
+
return '\n\n'.join(c.get('content', '').strip() for c in unique if c.get('content', '').strip())
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
__all__ = ["cat_file"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: contextinator
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: Intelligent Codebase Understanding for AI Agents - Transform any codebase into semantically-aware, searchable knowledge
|
|
5
5
|
Author-email: STARTHACK Team <founders@starthack.io>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -85,6 +85,7 @@ Dynamic: license-file
|
|
|
85
85
|
Turn any codebase into semantically-aware, searchable knowledge for AI-powered workflows.
|
|
86
86
|
</p>
|
|
87
87
|
|
|
88
|
+
|
|
88
89
|
### Key Features
|
|
89
90
|
|
|
90
91
|
- **AST-Powered Chunking** - Extract functions, classes, and methods from 23+ programming languages
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
"""Cat file with TRUE async."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
from typing import Optional
|
|
5
|
-
from ..utils.logger import logger
|
|
6
|
-
from ..config import USE_CHROMA_SERVER
|
|
7
|
-
|
|
8
|
-
_async_chroma_client = None
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
async def _get_async_chroma():
|
|
12
|
-
global _async_chroma_client
|
|
13
|
-
if _async_chroma_client is None:
|
|
14
|
-
if USE_CHROMA_SERVER:
|
|
15
|
-
from ..vectorstore.async_chroma import get_async_client
|
|
16
|
-
from ..config import CHROMA_SERVER_URL
|
|
17
|
-
from urllib.parse import urlparse
|
|
18
|
-
|
|
19
|
-
parsed = urlparse(CHROMA_SERVER_URL)
|
|
20
|
-
_async_chroma_client = await get_async_client(
|
|
21
|
-
host=parsed.hostname or "localhost", port=parsed.port or 8000
|
|
22
|
-
)
|
|
23
|
-
else:
|
|
24
|
-
return None
|
|
25
|
-
return _async_chroma_client
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def cat_file(
|
|
29
|
-
collection_name: str, file_path: str, chromadb_dir: Optional[str] = None
|
|
30
|
-
) -> str:
|
|
31
|
-
"""TRUE async cat file."""
|
|
32
|
-
if not collection_name or not file_path:
|
|
33
|
-
raise ValueError("Collection name and file path required")
|
|
34
|
-
|
|
35
|
-
if file_path.startswith("/"):
|
|
36
|
-
file_path = file_path[1:]
|
|
37
|
-
|
|
38
|
-
client = await _get_async_chroma()
|
|
39
|
-
from ..config import sanitize_collection_name
|
|
40
|
-
|
|
41
|
-
collection = await client.get_collection(sanitize_collection_name(collection_name))
|
|
42
|
-
|
|
43
|
-
results = await collection.get(
|
|
44
|
-
where={"file_path": file_path}, include=["documents", "metadatas"]
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
if not results["ids"]:
|
|
48
|
-
raise ValueError(f"File not found: {file_path}")
|
|
49
|
-
|
|
50
|
-
chunks = sorted(
|
|
51
|
-
[
|
|
52
|
-
{
|
|
53
|
-
"content": doc,
|
|
54
|
-
"start_line": meta.get("start_line", 0),
|
|
55
|
-
"end_line": meta.get("end_line", 0),
|
|
56
|
-
"split_index": meta.get("split_index", 0),
|
|
57
|
-
"parent_id": meta.get("parent_id"),
|
|
58
|
-
}
|
|
59
|
-
for doc, meta in zip(results["documents"], results["metadatas"])
|
|
60
|
-
],
|
|
61
|
-
key=lambda x: (x["start_line"], x["split_index"]),
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Deduplicate overlapping chunks and filter out nested children
|
|
65
|
-
seen_ranges = set()
|
|
66
|
-
unique_chunks = []
|
|
67
|
-
for chunk in chunks:
|
|
68
|
-
range_key = (chunk["start_line"], chunk["end_line"])
|
|
69
|
-
# Skip if same range already seen OR if this chunk is nested inside another
|
|
70
|
-
if range_key in seen_ranges:
|
|
71
|
-
continue
|
|
72
|
-
# Check if this chunk is contained within any already added chunk
|
|
73
|
-
is_nested = any(
|
|
74
|
-
uc["start_line"] <= chunk["start_line"] and uc["end_line"] >= chunk["end_line"]
|
|
75
|
-
and (uc["start_line"], uc["end_line"]) != range_key
|
|
76
|
-
for uc in unique_chunks
|
|
77
|
-
)
|
|
78
|
-
if not is_nested:
|
|
79
|
-
seen_ranges.add(range_key)
|
|
80
|
-
unique_chunks.append(chunk)
|
|
81
|
-
|
|
82
|
-
return "\n".join(c["content"] for c in unique_chunks)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
__all__ = ["cat_file"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{contextinator-1.2.4 → contextinator-1.2.5}/src/contextinator/embedding/embedding_service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|