code-explore 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_explore/__init__.py +3 -0
- code_explore/analyzer/__init__.py +13 -0
- code_explore/analyzer/dependencies.py +328 -0
- code_explore/analyzer/language.py +240 -0
- code_explore/analyzer/metrics.py +144 -0
- code_explore/analyzer/patterns.py +371 -0
- code_explore/api/__init__.py +1 -0
- code_explore/api/main.py +197 -0
- code_explore/cli/__init__.py +1 -0
- code_explore/cli/main.py +557 -0
- code_explore/database.py +207 -0
- code_explore/indexer/__init__.py +1 -0
- code_explore/indexer/embeddings.py +181 -0
- code_explore/models.py +106 -0
- code_explore/scanner/__init__.py +1 -0
- code_explore/scanner/git_info.py +94 -0
- code_explore/scanner/local.py +70 -0
- code_explore/scanner/readme.py +70 -0
- code_explore/search/__init__.py +1 -0
- code_explore/search/fulltext.py +137 -0
- code_explore/search/hybrid.py +92 -0
- code_explore/search/semantic.py +76 -0
- code_explore/summarizer/__init__.py +1 -0
- code_explore/summarizer/ollama.py +130 -0
- code_explore-0.1.0.dist-info/METADATA +67 -0
- code_explore-0.1.0.dist-info/RECORD +28 -0
- code_explore-0.1.0.dist-info/WHEEL +4 -0
- code_explore-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Read README files and list key files from repositories."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
README_CANDIDATES = ("README.md", "readme.md", "README.rst", "README", "README.txt", "Readme.md")
|
|
9
|
+
|
|
10
|
+
SKIP_ENTRIES = frozenset({
|
|
11
|
+
".git",
|
|
12
|
+
"node_modules",
|
|
13
|
+
"__pycache__",
|
|
14
|
+
".DS_Store",
|
|
15
|
+
".gitignore",
|
|
16
|
+
".editorconfig",
|
|
17
|
+
".vscode",
|
|
18
|
+
".idea",
|
|
19
|
+
".mypy_cache",
|
|
20
|
+
".pytest_cache",
|
|
21
|
+
".ruff_cache",
|
|
22
|
+
".tox",
|
|
23
|
+
".eggs",
|
|
24
|
+
".env",
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
MAX_KEY_FILES = 50
|
|
28
|
+
MAX_README_CHARS = 1500
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def read_readme(repo_path: Path) -> str | None:
|
|
32
|
+
"""Read the README file from a repository and return its first 1500 characters.
|
|
33
|
+
|
|
34
|
+
Tries several common README filenames. Returns None if no README is found
|
|
35
|
+
or if the file cannot be read.
|
|
36
|
+
"""
|
|
37
|
+
for candidate in README_CANDIDATES:
|
|
38
|
+
readme_path = repo_path / candidate
|
|
39
|
+
try:
|
|
40
|
+
if readme_path.is_file():
|
|
41
|
+
text = readme_path.read_text(encoding="utf-8", errors="replace")
|
|
42
|
+
return text[:MAX_README_CHARS] if text else None
|
|
43
|
+
except (OSError, PermissionError) as e:
|
|
44
|
+
logger.debug("Could not read %s: %s", readme_path, e)
|
|
45
|
+
continue
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def list_key_files(repo_path: Path) -> list[str]:
|
|
50
|
+
"""List important top-level files and directories in a repository.
|
|
51
|
+
|
|
52
|
+
Skips common non-informative entries like .git, node_modules, etc.
|
|
53
|
+
Returns up to 50 file/directory names (not full paths).
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
entries = sorted(repo_path.iterdir())
|
|
57
|
+
except (OSError, PermissionError) as e:
|
|
58
|
+
logger.debug("Could not list directory %s: %s", repo_path, e)
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
names: list[str] = []
|
|
62
|
+
for entry in entries:
|
|
63
|
+
if entry.name in SKIP_ENTRIES:
|
|
64
|
+
continue
|
|
65
|
+
name = entry.name + "/" if entry.is_dir() and not entry.is_symlink() else entry.name
|
|
66
|
+
names.append(name)
|
|
67
|
+
if len(names) >= MAX_KEY_FILES:
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
return names
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Search engine - fulltext and semantic search."""
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""SQLite FTS5 fulltext search."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sqlite3
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from code_explore.database import get_connection, get_db_path, get_project
|
|
8
|
+
from code_explore.database import search_fulltext as db_search_fulltext
|
|
9
|
+
from code_explore.models import SearchResult
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
# Minimum word length to try as an individual search term
|
|
14
|
+
_MIN_WORD_LEN = 3
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _extract_snippets(
|
|
18
|
+
query: str, project_id: str, db_path: Path | None = None
|
|
19
|
+
) -> list[str]:
|
|
20
|
+
conn = get_connection(db_path)
|
|
21
|
+
try:
|
|
22
|
+
rows = conn.execute(
|
|
23
|
+
"""
|
|
24
|
+
SELECT
|
|
25
|
+
snippet(projects_fts, 0, '**', '**', '...', 32) as name_snip,
|
|
26
|
+
snippet(projects_fts, 1, '**', '**', '...', 64) as summary_snip,
|
|
27
|
+
snippet(projects_fts, 2, '**', '**', '...', 32) as tags_snip,
|
|
28
|
+
snippet(projects_fts, 3, '**', '**', '...', 64) as readme_snip
|
|
29
|
+
FROM projects_fts fts
|
|
30
|
+
JOIN projects p ON p.rowid = fts.rowid
|
|
31
|
+
WHERE projects_fts MATCH ? AND p.id = ?
|
|
32
|
+
LIMIT 1
|
|
33
|
+
""",
|
|
34
|
+
(query, project_id),
|
|
35
|
+
).fetchall()
|
|
36
|
+
except sqlite3.OperationalError:
|
|
37
|
+
return []
|
|
38
|
+
finally:
|
|
39
|
+
conn.close()
|
|
40
|
+
|
|
41
|
+
if not rows:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
snippets = []
|
|
45
|
+
row = rows[0]
|
|
46
|
+
for key in ("name_snip", "summary_snip", "tags_snip", "readme_snip"):
|
|
47
|
+
val = row[key]
|
|
48
|
+
if val and "**" in val:
|
|
49
|
+
snippets.append(val)
|
|
50
|
+
return snippets
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _or_query(query: str) -> str:
|
|
54
|
+
"""Convert a multi-word query to OR-separated FTS5 query."""
|
|
55
|
+
tokens = query.split()
|
|
56
|
+
if len(tokens) <= 1:
|
|
57
|
+
return query
|
|
58
|
+
return " OR ".join(tokens)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _significant_words(query: str) -> list[str]:
|
|
62
|
+
"""Return words from the query that are long enough to search individually."""
|
|
63
|
+
return [w for w in query.split() if len(w) >= _MIN_WORD_LEN]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def search(
|
|
67
|
+
query: str, limit: int = 20, db_path: Path | None = None
|
|
68
|
+
) -> list[SearchResult]:
|
|
69
|
+
# Strategy 1: Try the original query (AND logic)
|
|
70
|
+
raw_results = _safe_fts_search(query, limit=limit, db_path=db_path)
|
|
71
|
+
|
|
72
|
+
# Strategy 2: If no results, try OR between words
|
|
73
|
+
if not raw_results:
|
|
74
|
+
or_query = _or_query(query)
|
|
75
|
+
if or_query != query:
|
|
76
|
+
raw_results = _safe_fts_search(or_query, limit=limit, db_path=db_path)
|
|
77
|
+
|
|
78
|
+
# Strategy 3: If still no results, try each significant word individually
|
|
79
|
+
if not raw_results:
|
|
80
|
+
seen_ids: set[str] = set()
|
|
81
|
+
merged: list[tuple[str, float]] = []
|
|
82
|
+
score_penalty = 0.0
|
|
83
|
+
|
|
84
|
+
for word in _significant_words(query):
|
|
85
|
+
word_results = _safe_fts_search(word, limit=limit, db_path=db_path)
|
|
86
|
+
for project_id, rank in word_results:
|
|
87
|
+
if project_id not in seen_ids:
|
|
88
|
+
seen_ids.add(project_id)
|
|
89
|
+
# Apply increasing penalty so earlier word matches rank higher
|
|
90
|
+
adjusted_rank = rank - score_penalty if rank < 0 else rank + score_penalty
|
|
91
|
+
merged.append((project_id, adjusted_rank))
|
|
92
|
+
score_penalty += 1.0
|
|
93
|
+
|
|
94
|
+
raw_results = merged[:limit]
|
|
95
|
+
|
|
96
|
+
# Build SearchResult objects
|
|
97
|
+
# For snippets, try the original query first, then fall back to OR query
|
|
98
|
+
results = []
|
|
99
|
+
for project_id, rank in raw_results:
|
|
100
|
+
project = get_project(project_id, db_path=db_path)
|
|
101
|
+
if project is None:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
score = -rank if rank < 0 else rank
|
|
105
|
+
highlights = _extract_snippets(query, project_id, db_path=db_path)
|
|
106
|
+
if not highlights:
|
|
107
|
+
or_q = _or_query(query)
|
|
108
|
+
if or_q != query:
|
|
109
|
+
highlights = _extract_snippets(or_q, project_id, db_path=db_path)
|
|
110
|
+
if not highlights:
|
|
111
|
+
# Try individual significant words for snippet extraction
|
|
112
|
+
for word in _significant_words(query):
|
|
113
|
+
highlights = _extract_snippets(word, project_id, db_path=db_path)
|
|
114
|
+
if highlights:
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
results.append(
|
|
118
|
+
SearchResult(
|
|
119
|
+
project=project,
|
|
120
|
+
score=score,
|
|
121
|
+
match_type="fulltext",
|
|
122
|
+
highlights=highlights,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return results
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _safe_fts_search(
|
|
130
|
+
query: str, limit: int = 20, db_path: Path | None = None
|
|
131
|
+
) -> list[tuple[str, float]]:
|
|
132
|
+
"""Run FTS5 search, returning empty list on any error."""
|
|
133
|
+
try:
|
|
134
|
+
return db_search_fulltext(query, limit=limit, db_path=db_path)
|
|
135
|
+
except sqlite3.OperationalError as e:
|
|
136
|
+
logger.debug("FTS5 search failed for query %r: %s", query, e)
|
|
137
|
+
return []
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Hybrid search combining fulltext and semantic search with reciprocal rank fusion."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from code_explore.models import SearchResult
|
|
8
|
+
from code_explore.search.fulltext import search as fulltext_search
|
|
9
|
+
from code_explore.search.semantic import search as semantic_search
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
RRF_K = 60
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _reciprocal_rank_fusion(
|
|
17
|
+
fulltext_results: list[SearchResult],
|
|
18
|
+
semantic_results: list[SearchResult],
|
|
19
|
+
) -> list[SearchResult]:
|
|
20
|
+
scores: dict[str, float] = {}
|
|
21
|
+
results_map: dict[str, SearchResult] = {}
|
|
22
|
+
highlights_map: dict[str, list[str]] = {}
|
|
23
|
+
|
|
24
|
+
for rank, result in enumerate(fulltext_results):
|
|
25
|
+
pid = result.project.id
|
|
26
|
+
scores[pid] = scores.get(pid, 0.0) + 1.0 / (RRF_K + rank + 1)
|
|
27
|
+
if pid not in results_map:
|
|
28
|
+
results_map[pid] = result
|
|
29
|
+
highlights_map[pid] = list(result.highlights)
|
|
30
|
+
else:
|
|
31
|
+
for h in result.highlights:
|
|
32
|
+
if h not in highlights_map[pid]:
|
|
33
|
+
highlights_map[pid].append(h)
|
|
34
|
+
|
|
35
|
+
for rank, result in enumerate(semantic_results):
|
|
36
|
+
pid = result.project.id
|
|
37
|
+
scores[pid] = scores.get(pid, 0.0) + 1.0 / (RRF_K + rank + 1)
|
|
38
|
+
if pid not in results_map:
|
|
39
|
+
results_map[pid] = result
|
|
40
|
+
highlights_map[pid] = list(result.highlights)
|
|
41
|
+
else:
|
|
42
|
+
for h in result.highlights:
|
|
43
|
+
if h not in highlights_map[pid]:
|
|
44
|
+
highlights_map[pid].append(h)
|
|
45
|
+
|
|
46
|
+
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
|
47
|
+
|
|
48
|
+
merged = []
|
|
49
|
+
for pid, score in ranked:
|
|
50
|
+
base = results_map[pid]
|
|
51
|
+
merged.append(
|
|
52
|
+
SearchResult(
|
|
53
|
+
project=base.project,
|
|
54
|
+
score=score,
|
|
55
|
+
match_type="hybrid",
|
|
56
|
+
highlights=highlights_map.get(pid, []),
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return merged
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def search(
|
|
64
|
+
query: str, limit: int = 20, db_path: Path | None = None
|
|
65
|
+
) -> list[SearchResult]:
|
|
66
|
+
fulltext_results: list[SearchResult] = []
|
|
67
|
+
semantic_results: list[SearchResult] = []
|
|
68
|
+
|
|
69
|
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
|
70
|
+
ft_future = executor.submit(fulltext_search, query, limit=limit, db_path=db_path)
|
|
71
|
+
sem_future = executor.submit(semantic_search, query, limit=limit, db_path=db_path)
|
|
72
|
+
|
|
73
|
+
for future in as_completed([ft_future, sem_future]):
|
|
74
|
+
try:
|
|
75
|
+
if future is ft_future:
|
|
76
|
+
fulltext_results = future.result()
|
|
77
|
+
else:
|
|
78
|
+
semantic_results = future.result()
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.error("Search component failed: %s", e)
|
|
81
|
+
|
|
82
|
+
if not semantic_results and fulltext_results:
|
|
83
|
+
return fulltext_results[:limit]
|
|
84
|
+
|
|
85
|
+
if not fulltext_results and semantic_results:
|
|
86
|
+
return semantic_results[:limit]
|
|
87
|
+
|
|
88
|
+
if not fulltext_results and not semantic_results:
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
merged = _reciprocal_rank_fusion(fulltext_results, semantic_results)
|
|
92
|
+
return merged[:limit]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""LanceDB vector similarity search."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import lancedb
|
|
7
|
+
|
|
8
|
+
from code_explore.database import get_project
|
|
9
|
+
from code_explore.indexer.embeddings import (
|
|
10
|
+
VECTOR_DB_PATH,
|
|
11
|
+
TABLE_NAME,
|
|
12
|
+
generate_embedding,
|
|
13
|
+
_ollama_available,
|
|
14
|
+
)
|
|
15
|
+
from code_explore.models import SearchResult
|
|
16
|
+
from code_explore.search.fulltext import search as fulltext_search
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def search(
|
|
22
|
+
query: str, limit: int = 20, db_path: Path | None = None
|
|
23
|
+
) -> list[SearchResult]:
|
|
24
|
+
if not _ollama_available():
|
|
25
|
+
logger.warning("Ollama unavailable. Falling back to fulltext search.")
|
|
26
|
+
return fulltext_search(query, limit=limit, db_path=db_path)
|
|
27
|
+
|
|
28
|
+
query_vector = generate_embedding(query)
|
|
29
|
+
if query_vector is None:
|
|
30
|
+
logger.warning("Failed to generate query embedding. Falling back to fulltext search.")
|
|
31
|
+
return fulltext_search(query, limit=limit, db_path=db_path)
|
|
32
|
+
|
|
33
|
+
if not VECTOR_DB_PATH.exists():
|
|
34
|
+
logger.warning("Vector store not found. Falling back to fulltext search.")
|
|
35
|
+
return fulltext_search(query, limit=limit, db_path=db_path)
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
db = lancedb.connect(str(VECTOR_DB_PATH))
|
|
39
|
+
if TABLE_NAME not in db.table_names():
|
|
40
|
+
logger.warning("Embeddings table not found. Falling back to fulltext search.")
|
|
41
|
+
return fulltext_search(query, limit=limit, db_path=db_path)
|
|
42
|
+
|
|
43
|
+
table = db.open_table(TABLE_NAME)
|
|
44
|
+
raw_results = (
|
|
45
|
+
table.search(query_vector)
|
|
46
|
+
.metric("cosine")
|
|
47
|
+
.limit(limit)
|
|
48
|
+
.to_list()
|
|
49
|
+
)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
logger.error("Vector search failed: %s. Falling back to fulltext search.", e)
|
|
52
|
+
return fulltext_search(query, limit=limit, db_path=db_path)
|
|
53
|
+
|
|
54
|
+
results = []
|
|
55
|
+
for row in raw_results:
|
|
56
|
+
project_id = row["id"]
|
|
57
|
+
project = get_project(project_id, db_path=db_path)
|
|
58
|
+
if project is None:
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
distance = row.get("_distance", 0.0)
|
|
62
|
+
score = max(0.0, 1.0 - distance)
|
|
63
|
+
|
|
64
|
+
text_snippet = row.get("text", "")
|
|
65
|
+
highlights = [text_snippet] if text_snippet else []
|
|
66
|
+
|
|
67
|
+
results.append(
|
|
68
|
+
SearchResult(
|
|
69
|
+
project=project,
|
|
70
|
+
score=score,
|
|
71
|
+
match_type="semantic",
|
|
72
|
+
highlights=highlights,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""AI-powered project summarization via Ollama."""
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Generate AI summaries of projects using local Ollama."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from code_explore.models import Project
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
OLLAMA_BASE_URL = "http://localhost:11434"
|
|
12
|
+
DEFAULT_MODEL = "llama3.2:3b"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _build_prompt(project: Project) -> str:
|
|
16
|
+
parts = [f"Project: {project.name}"]
|
|
17
|
+
|
|
18
|
+
if project.git.remote_url:
|
|
19
|
+
parts.append(f"Repository: {project.git.remote_url}")
|
|
20
|
+
|
|
21
|
+
if project.readme_snippet:
|
|
22
|
+
parts.append(f"\nREADME (excerpt):\n{project.readme_snippet}")
|
|
23
|
+
|
|
24
|
+
if project.key_files:
|
|
25
|
+
parts.append(f"\nKey files: {', '.join(project.key_files[:40])}")
|
|
26
|
+
|
|
27
|
+
if project.primary_language:
|
|
28
|
+
parts.append(f"\nPrimary language: {project.primary_language}")
|
|
29
|
+
|
|
30
|
+
languages = [lang.name for lang in project.languages]
|
|
31
|
+
if languages:
|
|
32
|
+
parts.append(f"Languages: {', '.join(languages)}")
|
|
33
|
+
|
|
34
|
+
if project.frameworks:
|
|
35
|
+
parts.append(f"Frameworks: {', '.join(project.frameworks)}")
|
|
36
|
+
|
|
37
|
+
deps = [d.name for d in project.dependencies[:30]]
|
|
38
|
+
if deps:
|
|
39
|
+
parts.append(f"Dependencies: {', '.join(deps)}")
|
|
40
|
+
|
|
41
|
+
patterns = [p.name for p in project.patterns]
|
|
42
|
+
if patterns:
|
|
43
|
+
parts.append(f"Detected patterns: {', '.join(patterns)}")
|
|
44
|
+
|
|
45
|
+
if project.quality.total_files:
|
|
46
|
+
parts.append(f"Total files: {project.quality.total_files}, Total lines: {project.quality.total_lines}")
|
|
47
|
+
|
|
48
|
+
if project.path:
|
|
49
|
+
parts.append(f"Path: {project.path}")
|
|
50
|
+
|
|
51
|
+
context = "\n".join(parts)
|
|
52
|
+
|
|
53
|
+
return f"""You are analyzing a software project. Your job is to figure out WHAT this project actually does — its concrete purpose and functionality.
|
|
54
|
+
|
|
55
|
+
RULES:
|
|
56
|
+
- Focus on WHAT the project does, not what languages or technologies it uses.
|
|
57
|
+
- Be SPECIFIC: mention the actual domain, functionality, or problem it solves.
|
|
58
|
+
- NEVER say generic things like "utilizes various programming languages" or "a software project that uses modern technologies".
|
|
59
|
+
- The summary must be exactly 2 sentences.
|
|
60
|
+
- Tags should be domain-specific (e.g. "youtube-api", "video-download", "data-pipeline", "markdown-parser"), NOT generic (e.g. "javascript", "web", "coding").
|
|
61
|
+
|
|
62
|
+
Project information:
|
|
63
|
+
{context}
|
|
64
|
+
|
|
65
|
+
Respond in exactly this format (no extra lines):
|
|
66
|
+
SUMMARY: <exactly 2 sentences about what this project concretely does>
|
|
67
|
+
TAGS: tag1, tag2, tag3, ... (5-10 domain-specific tags)
|
|
68
|
+
CONCEPTS: concept1, concept2, concept3, ... (3-5 architectural themes)"""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _parse_response(text: str) -> tuple[str | None, list[str], list[str]]:
|
|
72
|
+
summary = None
|
|
73
|
+
tags: list[str] = []
|
|
74
|
+
concepts: list[str] = []
|
|
75
|
+
|
|
76
|
+
for line in text.strip().split("\n"):
|
|
77
|
+
line = line.strip()
|
|
78
|
+
upper = line.upper()
|
|
79
|
+
if upper.startswith("SUMMARY:"):
|
|
80
|
+
summary = line[len("SUMMARY:"):].strip()
|
|
81
|
+
elif upper.startswith("TAGS:"):
|
|
82
|
+
raw = line[len("TAGS:"):].strip()
|
|
83
|
+
tags = [t.strip() for t in raw.split(",") if t.strip()]
|
|
84
|
+
elif upper.startswith("CONCEPTS:"):
|
|
85
|
+
raw = line[len("CONCEPTS:"):].strip()
|
|
86
|
+
concepts = [c.strip() for c in raw.split(",") if c.strip()]
|
|
87
|
+
|
|
88
|
+
return summary, tags, concepts
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def summarize_project(
|
|
92
|
+
project: Project,
|
|
93
|
+
model: str = DEFAULT_MODEL,
|
|
94
|
+
base_url: str = OLLAMA_BASE_URL,
|
|
95
|
+
) -> tuple[str | None, list[str], list[str]]:
|
|
96
|
+
prompt = _build_prompt(project)
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
resp = httpx.post(
|
|
100
|
+
f"{base_url}/api/generate",
|
|
101
|
+
json={
|
|
102
|
+
"model": model,
|
|
103
|
+
"prompt": prompt,
|
|
104
|
+
"stream": False,
|
|
105
|
+
"options": {"temperature": 0.3, "num_predict": 512},
|
|
106
|
+
},
|
|
107
|
+
timeout=120.0,
|
|
108
|
+
)
|
|
109
|
+
resp.raise_for_status()
|
|
110
|
+
except (httpx.ConnectError, httpx.TimeoutException):
|
|
111
|
+
logger.warning("Ollama is not running at %s. Skipping summarization.", base_url)
|
|
112
|
+
return None, [], []
|
|
113
|
+
except httpx.HTTPStatusError as e:
|
|
114
|
+
logger.error("Ollama request failed: %s", e)
|
|
115
|
+
return None, [], []
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
response_text = resp.json()["response"]
|
|
119
|
+
except (KeyError, ValueError):
|
|
120
|
+
logger.error("Unexpected Ollama response format.")
|
|
121
|
+
return None, [], []
|
|
122
|
+
|
|
123
|
+
summary, tags, concepts = _parse_response(response_text)
|
|
124
|
+
|
|
125
|
+
if summary:
|
|
126
|
+
logger.info("Generated summary for project '%s'.", project.name)
|
|
127
|
+
else:
|
|
128
|
+
logger.warning("Failed to parse summary from Ollama response for '%s'.", project.name)
|
|
129
|
+
|
|
130
|
+
return summary, tags, concepts
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-explore
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Developer knowledge base CLI — scan, index, and search your programming projects
|
|
5
|
+
Project-URL: Homepage, https://github.com/aipioneers/code-explore
|
|
6
|
+
Project-URL: Repository, https://github.com/aipioneers/code-explore
|
|
7
|
+
Project-URL: Issues, https://github.com/aipioneers/code-explore/issues
|
|
8
|
+
Author-email: Tobias Oberrauch <tobias.oberrauch@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Keywords: cli,code-search,developer-tools,knowledge-base,project-indexer
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
Classifier: Topic :: Utilities
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: gitpython>=3.1.0
|
|
23
|
+
Requires-Dist: httpx>=0.24.0
|
|
24
|
+
Requires-Dist: lancedb>=0.4.0
|
|
25
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0.0
|
|
27
|
+
Requires-Dist: rich>=13.0.0
|
|
28
|
+
Requires-Dist: typer>=0.9.0
|
|
29
|
+
Provides-Extra: api
|
|
30
|
+
Requires-Dist: fastapi>=0.100.0; extra == 'api'
|
|
31
|
+
Requires-Dist: uvicorn>=0.23.0; extra == 'api'
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# code-explore
|
|
38
|
+
|
|
39
|
+
Developer knowledge base CLI — scan, index, and search your programming projects.
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install code-explore
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Short alias (3 chars):
|
|
51
|
+
cex scan ~/Projects
|
|
52
|
+
cex index
|
|
53
|
+
cex search "YouTube API alle Videos"
|
|
54
|
+
cex show data-youtube
|
|
55
|
+
cex stats
|
|
56
|
+
|
|
57
|
+
# Full command also works:
|
|
58
|
+
code-explore scan ~/Projects
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- Scan local project directories and extract metadata (languages, dependencies, patterns)
|
|
64
|
+
- Generate AI summaries using local Ollama models
|
|
65
|
+
- Create vector embeddings for semantic search (multilingual)
|
|
66
|
+
- Hybrid search combining fulltext + semantic ranking
|
|
67
|
+
- Incremental indexing with change detection
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
code_explore/__init__.py,sha256=OjKtv4gwtDVv6HuRcvmpdUCORj4oElr4F0GBWfnJjlA,79
|
|
2
|
+
code_explore/database.py,sha256=CxZ3D-m6-nc5WQPbDbypIoVyr9BOjlUrEG9gMWHVKzU,7287
|
|
3
|
+
code_explore/models.py,sha256=wW-UIHcALvLb1x8ITlxnVwokwLupmkf1ew5hPAbpdFw,2641
|
|
4
|
+
code_explore/analyzer/__init__.py,sha256=BkQ86zztgGaVC5iM0_1oXFVqECCYNDRuFyclu-GFAY8,427
|
|
5
|
+
code_explore/analyzer/dependencies.py,sha256=diNE7l6peazYrPpv9dEcuWrYSulSOPs_SdSb-PPSNTw,10426
|
|
6
|
+
code_explore/analyzer/language.py,sha256=8Hr_Rr1Ai2gAfPlnFKepZk6-O3awAimO1KfLd0ESIX0,6261
|
|
7
|
+
code_explore/analyzer/metrics.py,sha256=jbr90DSjZOuj1XUm1-6KH7fHkncuouKf5-aLGDRgMuQ,4043
|
|
8
|
+
code_explore/analyzer/patterns.py,sha256=WnqZSc1gSDCHSgfv82-U3zdhYLjxMG-VQFhvSyJybKk,16939
|
|
9
|
+
code_explore/api/__init__.py,sha256=uFvBuvIP20KxR---3VaMlsAP2FASNMmgE8hEZ6nmUPs,41
|
|
10
|
+
code_explore/api/main.py,sha256=uGSHEjZ94uCjOISiJXqxnYqV2EU1pR2856MhjBkZ5ko,6273
|
|
11
|
+
code_explore/cli/__init__.py,sha256=crANy4-v343x-Hv6UYfsyVIGEusUZuEMxq_NsD0s2JU,38
|
|
12
|
+
code_explore/cli/main.py,sha256=U3Zu4D5ZiXAdQ7UZgOtnpL6EkYS5hmqLNWuZ3IXasgk,21290
|
|
13
|
+
code_explore/indexer/__init__.py,sha256=ncOsx2mjAMWUsH-7jUF0habViiM0_dqNHebFzVvVst8,48
|
|
14
|
+
code_explore/indexer/embeddings.py,sha256=YWWU_tn5nox5JHLlx4dmBKYCfVtJWQMFGkx434mTNyo,5450
|
|
15
|
+
code_explore/scanner/__init__.py,sha256=JTlgdWbldIoyj6npvUnf3EgVZNS0toSaJx0ujt6xmfA,57
|
|
16
|
+
code_explore/scanner/git_info.py,sha256=JTVj_iER7ZqCCWGeUrHVQOLSz7RnYWM2bHaAfg57jkc,2621
|
|
17
|
+
code_explore/scanner/local.py,sha256=dBzlNTt5ZUs2jPJDDB200HHzWAzvJ_zQgriYlgQzRzM,1765
|
|
18
|
+
code_explore/scanner/readme.py,sha256=eVL_zoegLgcnqauRxu1t0Hl-qi7m7X4ajE56lHEJtzE,1993
|
|
19
|
+
code_explore/search/__init__.py,sha256=9cfckee15CtH9n88iZy1s5deeB7eet3bgnp4PO-hQaQ,52
|
|
20
|
+
code_explore/search/fulltext.py,sha256=xD6R2ZW8rYZn0k3jCxCd6JxwvVBvAMbKWNEU1KMNUFc,4628
|
|
21
|
+
code_explore/search/hybrid.py,sha256=PQLSrPiBXEfjhWSMLyxuZ75r2w0PVNGq00mlvdGAbM4,3105
|
|
22
|
+
code_explore/search/semantic.py,sha256=4LS3jod-2jnRdTdzB_6OY2Jao-E7bPtCGX3JwG-ifm4,2387
|
|
23
|
+
code_explore/summarizer/__init__.py,sha256=2rnSsUbwDJi-V4k_qpXzrtkEgahB1iIJA6OWcQKUlCA,51
|
|
24
|
+
code_explore/summarizer/ollama.py,sha256=DSVmifEywx_INX9QfUlsv6pkfVw4WfTHPMnFQpLCY48,4423
|
|
25
|
+
code_explore-0.1.0.dist-info/METADATA,sha256=Os__IaA60MML5ppUFKpTIPSDTKrZwKNKTvJEOq3LfAs,2143
|
|
26
|
+
code_explore-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
27
|
+
code_explore-0.1.0.dist-info/entry_points.txt,sha256=3Yk9IeoL39Gw1cp7ZRCCleg_vnTNizawQsSdsXcxCzM,91
|
|
28
|
+
code_explore-0.1.0.dist-info/RECORD,,
|