hmdev-cli 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hmdev-cli",
3
- "version": "1.0.4",
3
+ "version": "1.0.5",
4
4
  "description": "HarmonyOS 开发 CLI 工具 — 文档查询、项目构建、设备部署",
5
5
  "keywords": [
6
6
  "harmonyos",
package/python/cli.py CHANGED
@@ -25,6 +25,8 @@ from html.parser import HTMLParser
25
25
  from typing import Any
26
26
 
27
27
  import httpx
28
+ from rapidfuzz import fuzz
29
+ import jieba
28
30
 
29
31
  from builder import HvigorTool, HDCTool
30
32
  from config import Config
@@ -295,6 +297,67 @@ async def build_index() -> dict[str, Any]:
295
297
  return result
296
298
 
297
299
 
300
+ # ── Search Ranking ─────────────────────────────────────────────────────────────
301
+
302
+ def compute_relevance_score(query: str, title: str, object_id: str, catalog_name: str) -> float:
303
+ """
304
+ Compute a relevance score for a document against the query.
305
+
306
+ Combines:
307
+ - Exact substring/title/ID/category match (highest weight)
308
+ - Fuzzy string matching via rapidfuzz (partial_ratio, token_sort, token_set)
309
+ - Chinese word segmentation overlap via jieba (semantic-like matching)
310
+ - Word prefix bonus
311
+
312
+ Higher score = more relevant.
313
+ """
314
+ q = query.lower().strip()
315
+ t = title.lower()
316
+ o = object_id.lower()
317
+ c = catalog_name.lower()
318
+
319
+ score = 0.0
320
+
321
+ # ── 1. Exact match (strongest signal) ──
322
+ if t == q:
323
+ score += 5.0
324
+ elif t.startswith(q):
325
+ score += 4.0
326
+ elif q in t:
327
+ score += 3.0
328
+
329
+ if q in o:
330
+ score += 1.5
331
+ if q in c:
332
+ score += 0.5
333
+
334
+ # ── 2. Fuzzy match on title ──
335
+ score += (fuzz.partial_ratio(q, t) / 100.0) * 2.0
336
+ score += (fuzz.token_sort_ratio(q, t) / 100.0) * 1.5
337
+ score += (fuzz.token_set_ratio(q, t) / 100.0) * 1.0
338
+
339
+ # ── 3. Fuzzy match on object_id ──
340
+ score += (fuzz.partial_ratio(q, o) / 100.0) * 0.8
341
+
342
+ # ── 4. Word overlap via jieba (handles Chinese segmentation) ──
343
+ q_words = set(w for w in jieba.lcut(q) if w.strip())
344
+ t_words = set(w for w in jieba.lcut(t) if w.strip())
345
+ if q_words and t_words:
346
+ common = q_words & t_words
347
+ score += (len(common) / len(q_words)) * 2.0
348
+
349
+ # ── 5. Prefix match: query word is prefix of title word ──
350
+ for qw in q_words:
351
+ if len(qw) < 2:
352
+ continue
353
+ for tw in t_words:
354
+ if tw != qw and tw.startswith(qw):
355
+ score += 0.5
356
+ break
357
+
358
+ return score
359
+
360
+
298
361
  # ── CLI Helpers ────────────────────────────────────────────────────────────────
299
362
 
300
363
  def parse_doc_url(url: str) -> tuple[str, str]:
@@ -345,34 +408,58 @@ async def cmd_index(args):
345
408
 
346
409
  async def cmd_search(args):
347
410
  index = await build_index()
348
- query_lower = args.query.lower()
349
- results = []
411
+ query = args.query.strip()
412
+ if not query:
413
+ print("请提供搜索关键词。")
414
+ return
415
+
416
+ query_lower = query.lower()
350
417
  seen = set()
418
+ scored = []
351
419
 
352
420
  for page in index.get("all_pages", []):
353
- title = page.get("title", "").lower()
354
- obj_id = page.get("object_id", "").lower()
355
- if query_lower in title or query_lower in obj_id or query_lower in page.get("catalog_name", ""):
421
+ title = page.get("title", "")
422
+ obj_id = page.get("object_id", "")
423
+ catalog = page.get("catalog_name", "")
424
+
425
+ score = compute_relevance_score(query, title, obj_id, catalog)
426
+
427
+ # Include if exact match exists or fuzzy score is significant
428
+ if (query_lower in title.lower()
429
+ or query_lower in obj_id.lower()
430
+ or query_lower in catalog.lower()
431
+ or score >= 1.5):
356
432
  if page.get("url") not in seen:
357
433
  seen.add(page["url"])
358
- results.append(page)
434
+ page = dict(page)
435
+ page["_score"] = round(score, 2)
436
+ scored.append(page)
437
+
438
+ # Sort: higher score first, shorter title as tiebreaker
439
+ scored.sort(key=lambda p: (-p["_score"], len(p.get("title", ""))))
359
440
 
360
441
  if args.json:
361
- print_json({"query": args.query, "total": len(results), "results": results[:50]})
442
+ out = {
443
+ "query": args.query,
444
+ "total": len(scored),
445
+ "results": scored[:50],
446
+ }
447
+ print_json(out)
362
448
  return
363
449
 
364
- if not results:
450
+ if not scored:
365
451
  print(f"未找到与 '{args.query}' 相关的文档。")
366
452
  print(f"可用分类: {', '.join(f'{v}({k})' for k, v in CATALOGS.items())}")
367
453
  return
368
454
 
369
- print(f"搜索结果: '{args.query}' (共 {len(results)} 篇)\n")
370
- for page in results[:30]:
455
+ print(f"搜索结果: '{args.query}' (共 {len(scored)} 篇)\n")
456
+ for page in scored[:30]:
371
457
  cat = CATALOGS.get(page.get("catalog_name", ""), page.get("catalog_name", ""))
372
- print(f" [{cat}] {page['title']}")
373
- print(f" {page['url']}")
374
- if len(results) > 30:
375
- print(f"\n...及另外 {len(results) - 30} 篇")
458
+ bar = "█" * min(int(page["_score"]), 10) + "░" * (10 - min(int(page["_score"]), 10))
459
+ print(f" {bar} [{cat}] {page['title']}")
460
+ print(f" {page['url']}")
461
+ if len(scored) > 30:
462
+ print(f"\n...及另外 {len(scored) - 30} 篇")
376
463
 
377
464
 
378
465
  async def cmd_get(args):
package/scripts/runner.js CHANGED
@@ -102,8 +102,8 @@ async function ensurePython() {
102
102
  });
103
103
  } catch { /* non-fatal */ }
104
104
 
105
- console.error(`${TAG} 正在安装 Python 依赖 (httpx)...`);
106
- execSync(`"${getVenvPip()}" install httpx`, {
105
+ console.error(`${TAG} 正在安装 Python 依赖 (httpx, rapidfuzz, jieba)...`);
106
+ execSync(`"${getVenvPip()}" install httpx rapidfuzz jieba`, {
107
107
  stdio: 'pipe',
108
108
  timeout: 120000,
109
109
  env: cleanEnv,