@dtoolkit/dproxy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.js +1398 -0
- package/dist/scripts/search +4 -0
- package/dist/scripts/search_facts.py +248 -0
- package/package.json +48 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Fact Search — search the PARA knowledge graph.
|
|
4
|
+
|
|
5
|
+
Searches facts in the configured life directory using keywords + simple ranking.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
search_facts.py <query> [--limit N] [--collection <name>] [--life-dir <path>]
|
|
9
|
+
|
|
10
|
+
Examples:
|
|
11
|
+
search_facts.py "react typescript"
|
|
12
|
+
search_facts.py "database migration" --limit 10
|
|
13
|
+
search_facts.py "stack" --collection projects --life-dir ~/my-kb
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import List, Dict, Tuple
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
|
|
23
|
+
DEFAULT_LIMIT = 20
|
|
24
|
+
|
|
25
|
+
def normalize_text(text: str) -> str:
|
|
26
|
+
"""Normalize text for search (lowercase, strip accents)."""
|
|
27
|
+
text = text.lower()
|
|
28
|
+
replacements = {
|
|
29
|
+
'á': 'a', 'é': 'e', 'í': 'i', 'ó': 'o', 'ú': 'u',
|
|
30
|
+
'ü': 'u', 'ñ': 'n'
|
|
31
|
+
}
|
|
32
|
+
for old, new in replacements.items():
|
|
33
|
+
text = text.replace(old, new)
|
|
34
|
+
return text
|
|
35
|
+
|
|
36
|
+
def score_fact(fact: Dict, query_terms: List[str], entity_name: str) -> float:
|
|
37
|
+
"""
|
|
38
|
+
Calculate relevance score for a fact.
|
|
39
|
+
|
|
40
|
+
Factors:
|
|
41
|
+
- Matches in fact text (weight 10)
|
|
42
|
+
- Matches in entity name (weight 5)
|
|
43
|
+
- accessCount (weight 1)
|
|
44
|
+
- Tier hot/warm (boost)
|
|
45
|
+
"""
|
|
46
|
+
score = 0.0
|
|
47
|
+
fact_text = normalize_text(fact.get('fact', ''))
|
|
48
|
+
entity_norm = normalize_text(entity_name)
|
|
49
|
+
|
|
50
|
+
# Matches in fact text
|
|
51
|
+
for term in query_terms:
|
|
52
|
+
if term in fact_text:
|
|
53
|
+
score += 10.0
|
|
54
|
+
# Bonus for exact word match
|
|
55
|
+
if re.search(r'\b' + re.escape(term) + r'\b', fact_text):
|
|
56
|
+
score += 5.0
|
|
57
|
+
|
|
58
|
+
# Matches in entity name
|
|
59
|
+
for term in query_terms:
|
|
60
|
+
if term in entity_norm:
|
|
61
|
+
score += 5.0
|
|
62
|
+
|
|
63
|
+
# Access count (popularity)
|
|
64
|
+
score += fact.get('accessCount', 0) * 1.0
|
|
65
|
+
|
|
66
|
+
# Tier boost (recently accessed facts are more relevant)
|
|
67
|
+
last_accessed = fact.get('lastAccessed', '')
|
|
68
|
+
if last_accessed:
|
|
69
|
+
try:
|
|
70
|
+
accessed_date = datetime.fromisoformat(last_accessed).date()
|
|
71
|
+
days_ago = (datetime.now().date() - accessed_date).days
|
|
72
|
+
if days_ago <= 7:
|
|
73
|
+
score *= 1.5 # Hot boost
|
|
74
|
+
elif days_ago <= 30:
|
|
75
|
+
score *= 1.2 # Warm boost
|
|
76
|
+
except (ValueError, TypeError):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
return score
|
|
80
|
+
|
|
81
|
+
def search_in_entity(entity_path: Path, query_terms: List[str]) -> List[Tuple[float, Dict, str, str]]:
|
|
82
|
+
"""Search within a specific entity."""
|
|
83
|
+
items_path = entity_path / "items.json"
|
|
84
|
+
if not items_path.exists():
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
with open(items_path, 'r', encoding='utf-8') as f:
|
|
89
|
+
data = json.load(f)
|
|
90
|
+
except (json.JSONDecodeError, IOError):
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
entity_name = data.get('entity', entity_path.name)
|
|
94
|
+
entity_type = data.get('type', 'unknown')
|
|
95
|
+
facts = data.get('facts', [])
|
|
96
|
+
|
|
97
|
+
results = []
|
|
98
|
+
for fact in facts:
|
|
99
|
+
score = score_fact(fact, query_terms, entity_name)
|
|
100
|
+
if score > 0:
|
|
101
|
+
results.append((score, fact, entity_name, entity_type))
|
|
102
|
+
|
|
103
|
+
return results
|
|
104
|
+
|
|
105
|
+
def search_facts(query: str, life_dir: Path, collection: str = None, limit: int = DEFAULT_LIMIT) -> List[Dict]:
|
|
106
|
+
"""
|
|
107
|
+
Search facts in the life directory.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
query: Search query (space-separated keywords)
|
|
111
|
+
life_dir: Root directory of the PARA knowledge base
|
|
112
|
+
collection: Filter by collection (projects, people, companies, resources, systems, events)
|
|
113
|
+
limit: Maximum number of results
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of results sorted by relevance
|
|
117
|
+
"""
|
|
118
|
+
# Normalize query
|
|
119
|
+
query_norm = normalize_text(query)
|
|
120
|
+
query_terms = [term for term in query_norm.split() if len(term) >= 2]
|
|
121
|
+
|
|
122
|
+
if not query_terms:
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
# Determine paths to search
|
|
126
|
+
search_paths = []
|
|
127
|
+
|
|
128
|
+
if collection:
|
|
129
|
+
# Map collection names to base directories for scoped search
|
|
130
|
+
collection_map = {
|
|
131
|
+
'projects': life_dir / 'projects',
|
|
132
|
+
'people': life_dir / 'areas' / 'people',
|
|
133
|
+
'companies': life_dir / 'areas' / 'companies',
|
|
134
|
+
'resources': life_dir / 'resources',
|
|
135
|
+
'systems': life_dir / 'areas' / 'systems',
|
|
136
|
+
'events': life_dir / 'areas' / 'events',
|
|
137
|
+
}
|
|
138
|
+
if collection in collection_map:
|
|
139
|
+
base = collection_map[collection]
|
|
140
|
+
search_paths = []
|
|
141
|
+
if base.exists():
|
|
142
|
+
_collect_entity_dirs(base, search_paths)
|
|
143
|
+
else:
|
|
144
|
+
print(f"Warning: Unknown collection '{collection}', searching all", file=sys.stderr)
|
|
145
|
+
search_paths = _all_search_paths(life_dir)
|
|
146
|
+
else:
|
|
147
|
+
search_paths = _all_search_paths(life_dir)
|
|
148
|
+
|
|
149
|
+
# search_paths now contains entity directories directly (each has items.json)
|
|
150
|
+
entities = [p for p in search_paths if p.is_dir()]
|
|
151
|
+
|
|
152
|
+
# Search in each entity
|
|
153
|
+
all_results = []
|
|
154
|
+
for entity_path in entities:
|
|
155
|
+
results = search_in_entity(entity_path, query_terms)
|
|
156
|
+
all_results.extend(results)
|
|
157
|
+
|
|
158
|
+
# Sort by score (descending)
|
|
159
|
+
all_results.sort(key=lambda x: x[0], reverse=True)
|
|
160
|
+
|
|
161
|
+
# Format results
|
|
162
|
+
formatted_results = []
|
|
163
|
+
for score, fact, entity_name, entity_type in all_results[:limit]:
|
|
164
|
+
formatted_results.append({
|
|
165
|
+
'score': round(score, 2),
|
|
166
|
+
'entity': entity_name,
|
|
167
|
+
'entity_type': entity_type,
|
|
168
|
+
'fact_id': fact.get('id'),
|
|
169
|
+
'fact': fact.get('fact'),
|
|
170
|
+
'category': fact.get('category'),
|
|
171
|
+
'timestamp': fact.get('timestamp'),
|
|
172
|
+
'lastAccessed': fact.get('lastAccessed'),
|
|
173
|
+
'accessCount': fact.get('accessCount', 0)
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
return formatted_results
|
|
177
|
+
|
|
178
|
+
def _all_search_paths(life_dir: Path) -> List[Path]:
|
|
179
|
+
"""
|
|
180
|
+
Recursively discover entity directories under projects/, areas/, resources/.
|
|
181
|
+
An entity directory is one that contains items.json.
|
|
182
|
+
This matches the TypeScript discoverEntities() recursive scan behavior.
|
|
183
|
+
"""
|
|
184
|
+
entity_dirs = []
|
|
185
|
+
for top_level in ['projects', 'areas', 'resources']:
|
|
186
|
+
base = life_dir / top_level
|
|
187
|
+
if not base.exists():
|
|
188
|
+
continue
|
|
189
|
+
_collect_entity_dirs(base, entity_dirs)
|
|
190
|
+
return entity_dirs
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _collect_entity_dirs(directory: Path, result: List[Path]):
|
|
194
|
+
"""Recursively collect directories that contain items.json."""
|
|
195
|
+
if not directory.is_dir():
|
|
196
|
+
return
|
|
197
|
+
for child in directory.iterdir():
|
|
198
|
+
if not child.is_dir():
|
|
199
|
+
continue
|
|
200
|
+
if (child / 'items.json').exists():
|
|
201
|
+
result.append(child)
|
|
202
|
+
else:
|
|
203
|
+
# Nested directory (e.g. areas/people/, areas/systems/) — recurse
|
|
204
|
+
_collect_entity_dirs(child, result)
|
|
205
|
+
|
|
206
|
+
def print_results(results: List[Dict], query: str):
|
|
207
|
+
"""Print results in a human-readable format."""
|
|
208
|
+
if not results:
|
|
209
|
+
print(f"No results found for: {query}")
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
print(f"Search results for: '{query}'")
|
|
213
|
+
print(f"{'=' * 80}")
|
|
214
|
+
print(f"Found {len(results)} fact(s)\n")
|
|
215
|
+
|
|
216
|
+
for i, result in enumerate(results, 1):
|
|
217
|
+
print(f"{i}. [{result['entity']}] {result['fact_id']} (score: {result['score']})")
|
|
218
|
+
print(f" {result['fact']}")
|
|
219
|
+
print(f" Category: {result['category']} | Type: {result['entity_type']}")
|
|
220
|
+
print(f" Access: {result['accessCount']}x | Last: {result['lastAccessed']}")
|
|
221
|
+
print()
|
|
222
|
+
|
|
223
|
+
def main():
|
|
224
|
+
import argparse
|
|
225
|
+
|
|
226
|
+
parser = argparse.ArgumentParser(description="Search facts in PARA knowledge graph")
|
|
227
|
+
parser.add_argument('query', type=str, help="Search query (keywords)")
|
|
228
|
+
parser.add_argument('--limit', type=int, default=DEFAULT_LIMIT, help=f"Max results (default: {DEFAULT_LIMIT})")
|
|
229
|
+
parser.add_argument('--collection', type=str, choices=['projects', 'people', 'companies', 'resources', 'systems', 'events'], help="Filter by collection")
|
|
230
|
+
parser.add_argument('--json', action='store_true', help="Output JSON instead of human-readable")
|
|
231
|
+
parser.add_argument('--life-dir', type=str, required=True, help="Root directory of the PARA knowledge base")
|
|
232
|
+
|
|
233
|
+
args = parser.parse_args()
|
|
234
|
+
life_dir = Path(args.life_dir)
|
|
235
|
+
|
|
236
|
+
if not life_dir.exists():
|
|
237
|
+
print(f"Error: life directory does not exist: {life_dir}", file=sys.stderr)
|
|
238
|
+
sys.exit(1)
|
|
239
|
+
|
|
240
|
+
results = search_facts(args.query, life_dir, collection=args.collection, limit=args.limit)
|
|
241
|
+
|
|
242
|
+
if args.json:
|
|
243
|
+
print(json.dumps(results, indent=2, ensure_ascii=False))
|
|
244
|
+
else:
|
|
245
|
+
print_results(results, args.query)
|
|
246
|
+
|
|
247
|
+
if __name__ == "__main__":
|
|
248
|
+
main()
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@dtoolkit/dproxy",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Universal adapter for invoking models via local CLIs",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": "Iván Campillo <ivncmp@gmail.com>",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/ivncmp/dtoolkit.git",
|
|
11
|
+
"directory": "packages/dproxy"
|
|
12
|
+
},
|
|
13
|
+
"homepage": "https://github.com/ivncmp/dtoolkit/tree/main/packages/dproxy",
|
|
14
|
+
"publishConfig": {
|
|
15
|
+
"access": "public"
|
|
16
|
+
},
|
|
17
|
+
"bin": {
|
|
18
|
+
"dproxy": "dist/index.js"
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"dist"
|
|
22
|
+
],
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=22"
|
|
25
|
+
},
|
|
26
|
+
"keywords": [
|
|
27
|
+
"ai",
|
|
28
|
+
"cli",
|
|
29
|
+
"proxy",
|
|
30
|
+
"claude",
|
|
31
|
+
"llm",
|
|
32
|
+
"adapter"
|
|
33
|
+
],
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"chalk": "^5.4.0",
|
|
36
|
+
"commander": "^13.0.0",
|
|
37
|
+
"yaml": "^2.7.0",
|
|
38
|
+
"@dtoolkit/core": "0.1.0"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"@types/node": "^22.19.17",
|
|
42
|
+
"tsup": "^8.0.0"
|
|
43
|
+
},
|
|
44
|
+
"scripts": {
|
|
45
|
+
"build": "tsup",
|
|
46
|
+
"dev": "tsup --watch"
|
|
47
|
+
}
|
|
48
|
+
}
|