npm - clean-code-tools - Versions diffs - 1.0.1 - Mend

clean-code-tools 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/README.md +66 -0
package/configs/eslint.clean-code.recommended.mjs +211 -0
package/configs/python.clean-code.pyproject.toml +143 -0
package/data/clean-code-patterns.jsonl +264 -0
package/data/vector-record.schema.json +77 -0
package/docs/README.md +29 -0
package/docs/eslint-custom-rules.md +74 -0
package/docs/eslint-recommended-config.md +87 -0
package/docs/fastmcp-local-server.md +104 -0
package/docs/publishing.md +125 -0
package/docs/python-lint-recommended-config.md +57 -0
package/docs/python-pylint-custom-rules.md +77 -0
package/docs/semantic-weaviate.md +80 -0
package/docs/static-trigger-semantic-review.md +97 -0
package/evals/clean-code-retrieval.jsonl +13 -0
package/ops/dev/weaviate/README.md +34 -0
package/ops/dev/weaviate/compose.yaml +34 -0
package/ops/dev/weaviate/smoke.sh +28 -0
package/package.json +96 -0
package/pyproject.toml +303 -0
package/sample-apps/README.md +40 -0
package/sample-apps/python-app/pyproject.toml +113 -0
package/sample-apps/python-app/src/clean_pricing.py +10 -0
package/sample-apps/python-app/src/smelly_pricing.py +8 -0
package/sample-apps/ts-backend/eslint.config.mjs +3 -0
package/sample-apps/ts-backend/package.json +18 -0
package/sample-apps/ts-backend/src/clean-handler.ts +19 -0
package/sample-apps/ts-backend/src/smelly-handler.ts +29 -0
package/sample-apps/ts-backend/tsconfig.json +9 -0
package/sample-apps/ts-frontend/eslint.config.mjs +3 -0
package/sample-apps/ts-frontend/package.json +18 -0
package/sample-apps/ts-frontend/src/CleanWidget.tsx +18 -0
package/sample-apps/ts-frontend/src/SmellyWidget.tsx +27 -0
package/sample-apps/ts-frontend/tsconfig.json +10 -0
package/scripts/_mcp_app.py +21 -0
package/scripts/check_clean_code_review_candidates.py +302 -0
package/scripts/check_fastmcp_server.py +106 -0
package/scripts/check_packages.py +137 -0
package/scripts/check_python_config.py +130 -0
package/scripts/check_repo_python_lint.py +46 -0
package/scripts/check_retrieval_evals.py +132 -0
package/scripts/check_sample_apps.py +169 -0
package/scripts/check_semantic_search_tooling.py +102 -0
package/scripts/clean_code_eslint_triggers.py +272 -0
package/scripts/clean_code_mcp_server.py +7 -0
package/scripts/clean_code_python_triggers.py +318 -0
package/scripts/clean_code_review_candidates.py +291 -0
package/scripts/clean_code_review_io.py +36 -0
package/scripts/clean_code_review_models.py +43 -0
package/scripts/clean_code_semantic.py +27 -0
package/scripts/set_package_versions.py +82 -0
package/scripts/weaviate_ingest_clean_code.py +44 -0
package/scripts/weaviate_search_clean_code.py +51 -0
package/skills/clean-code-mcp-reviewer/SKILL.md +209 -0
package/skills/clean-code-mcp-reviewer/evals/evals.json +30 -0
package/src/js/eslint-plugin-clean-code.mjs +758 -0
package/src/python/clean_code_tools_pylint/__init__.py +14 -0
package/src/python/clean_code_tools_pylint/ast_checker.py +122 -0
package/src/python/clean_code_tools_pylint/comments.py +83 -0
package/src/python/clean_code_tools_pylint/helpers.py +196 -0
package/src/python/mcp_server/__init__.py +1 -0
package/src/python/mcp_server/corpus.py +160 -0
package/src/python/mcp_server/markdown.py +126 -0
package/src/python/mcp_server/models.py +73 -0
package/src/python/mcp_server/ranking.py +125 -0
package/src/python/mcp_server/ranking_scoring.py +232 -0
package/src/python/mcp_server/semantic.py +192 -0
package/src/python/mcp_server/server.py +235 -0
package/src/python/mcp_server/server_payloads.py +83 -0
package/src/python/mcp_server/text.py +104 -0
package/src/python/mcp_server/utils/__init__.py +1 -0
package/src/python/mcp_server/utils/httpx_loader.py +14 -0
package/src/python/mcp_server/utils/increment.py +7 -0
package/src/python/mcp_server/utils/sha256_text.py +8 -0
package/src/python/mcp_server/utils/unique_strings.py +15 -0
package/src/python/mcp_server/weaviate.py +182 -0
package/uv.lock +2012 -0

package/src/python/mcp_server/semantic.py ADDED Viewed

@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+from mcp_server.corpus import (
+    CHUNK_ID_NAMESPACE,
+    MARKDOWN_SOURCES,
+    PATTERN_RECORDS,
+    ROOT,
+    build_chunks,
+    get_pattern_record,
+    load_pattern_records,
+    markdown_chunks,
+    object_id_for,
+    pattern_record_chunks,
+)
+from mcp_server.markdown import (
+    CODE_FENCE_RE,
+    HEADING_RE,
+    MAX_SECTION_TOKENS,
+    TARGET_SECTION_TOKENS,
+    MarkdownSection,
+    infer_markdown_rule_family,
+    markdown_aliases,
+    markdown_sections,
+    semantic_blocks,
+    split_section_body,
+)
+from mcp_server.models import (
+    CHUNKER_VERSION,
+    DEFAULT_EMBEDDING_MODEL,
+    DEFAULT_EMBEDDING_PROVIDER,
+    CleanCodeChunk,
+    Confidence,
+    JsonDict,
+)
+from mcp_server.ranking import (
+    local_keyword_rows,
+    merge_search_rows,
+    rank_pattern_rows,
+    search_pattern_records,
+)
+from mcp_server.ranking_scoring import (
+    BROAD_CATEGORY_QUERY_TERMS,
+    CONSERVATIVE_CONTEXT_THRESHOLD,
+    EXACT_HIGH_CONFIDENCE_THRESHOLD,
+    EXACT_MATCH_STOPWORDS,
+    HIGH_CONFIDENCE_THRESHOLD,
+    MEDIUM_CONFIDENCE_THRESHOLD,
+    NON_SPECIFIC_EXACT_ALIASES,
+    VAGUE_QUERY_TERMS,
+    confidence_for,
+    context_penalty_and_reasons,
+    dedupe_pattern_results,
+    distance_for,
+    exact_match_terms,
+    metadata_boost_and_reasons,
+    row_matches_filters,
+    score_pattern_row,
+)
+from mcp_server.text import (
+    CC_ID_RE,
+    PHRASE_BONUS_MIN_OVERLAP,
+    PLURAL_NORMALIZATION_MIN_LENGTH,
+    SLUG_RE,
+    WORD_RE,
+    approximate_tokens,
+    clean_alias,
+    clean_topic,
+    clean_topic_text,
+    detected_record_id,
+    languages_in_text,
+    lexical_score,
+    lint_candidates_in_text,
+    normalize_token,
+    query_tokens,
+    searchable_row_text,
+    semantic_similarity,
+    slug,
+    slugless,
+)
+from mcp_server.utils.httpx_loader import require_httpx
+from mcp_server.utils.sha256_text import sha256_text
+from mcp_server.utils.unique_strings import unique_strings
+from mcp_server.weaviate import (
+    COLLECTION_NAME,
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_WEAVIATE_URL,
+    FASTEMBED_INSTALL_MESSAGE,
+    GRAPHQL_NAME_RE,
+    HTTP_NOT_FOUND,
+    VECTOR_NAME,
+    batch_failures,
+    build_search_graphql_query,
+    create_schema_payload,
+    embed_query,
+    embed_texts,
+    execute_graphql_search,
+    ingest_chunks,
+    is_successful_batch_row,
+    reset_collection,
+    search_chunks,
+    search_rows_from_payload,
+)
+__all__ = [
+    "BROAD_CATEGORY_QUERY_TERMS",
+    "CC_ID_RE",
+    "CHUNKER_VERSION",
+    "CHUNK_ID_NAMESPACE",
+    "CODE_FENCE_RE",
+    "COLLECTION_NAME",
+    "CONSERVATIVE_CONTEXT_THRESHOLD",
+    "DEFAULT_BATCH_SIZE",
+    "DEFAULT_EMBEDDING_MODEL",
+    "DEFAULT_EMBEDDING_PROVIDER",
+    "DEFAULT_WEAVIATE_URL",
+    "EXACT_HIGH_CONFIDENCE_THRESHOLD",
+    "EXACT_MATCH_STOPWORDS",
+    "FASTEMBED_INSTALL_MESSAGE",
+    "GRAPHQL_NAME_RE",
+    "HEADING_RE",
+    "HIGH_CONFIDENCE_THRESHOLD",
+    "HTTP_NOT_FOUND",
+    "MARKDOWN_SOURCES",
+    "MAX_SECTION_TOKENS",
+    "MEDIUM_CONFIDENCE_THRESHOLD",
+    "NON_SPECIFIC_EXACT_ALIASES",
+    "PATTERN_RECORDS",
+    "PHRASE_BONUS_MIN_OVERLAP",
+    "PLURAL_NORMALIZATION_MIN_LENGTH",
+    "ROOT",
+    "SLUG_RE",
+    "TARGET_SECTION_TOKENS",
+    "VAGUE_QUERY_TERMS",
+    "VECTOR_NAME",
+    "WORD_RE",
+    "CleanCodeChunk",
+    "Confidence",
+    "JsonDict",
+    "MarkdownSection",
+    "approximate_tokens",
+    "batch_failures",
+    "build_chunks",
+    "build_search_graphql_query",
+    "clean_alias",
+    "clean_topic",
+    "clean_topic_text",
+    "confidence_for",
+    "context_penalty_and_reasons",
+    "create_schema_payload",
+    "dedupe_pattern_results",
+    "detected_record_id",
+    "distance_for",
+    "embed_query",
+    "embed_texts",
+    "exact_match_terms",
+    "execute_graphql_search",
+    "get_pattern_record",
+    "infer_markdown_rule_family",
+    "ingest_chunks",
+    "is_successful_batch_row",
+    "languages_in_text",
+    "lexical_score",
+    "lint_candidates_in_text",
+    "load_pattern_records",
+    "local_keyword_rows",
+    "markdown_aliases",
+    "markdown_chunks",
+    "markdown_sections",
+    "merge_search_rows",
+    "metadata_boost_and_reasons",
+    "normalize_token",
+    "object_id_for",
+    "pattern_record_chunks",
+    "query_tokens",
+    "rank_pattern_rows",
+    "require_httpx",
+    "reset_collection",
+    "row_matches_filters",
+    "score_pattern_row",
+    "search_chunks",
+    "search_pattern_records",
+    "search_rows_from_payload",
+    "searchable_row_text",
+    "semantic_blocks",
+    "semantic_similarity",
+    "sha256_text",
+    "slug",
+    "slugless",
+    "split_section_body",
+    "unique_strings",
+]

package/src/python/mcp_server/server.py ADDED Viewed

@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import argparse
+import json
+from typing import Any
+from mcp_server import semantic
+from mcp_server.server_payloads import (
+    default_lint_targets,
+    facet_counts,
+    lint_rule_recommendation,
+    search_result,
+)
+MAX_SEARCH_LIMIT = 25
+COLLECTION_NAME = semantic.COLLECTION_NAME
+DEFAULT_EMBEDDING_MODEL = semantic.DEFAULT_EMBEDDING_MODEL
+DEFAULT_WEAVIATE_URL = semantic.DEFAULT_WEAVIATE_URL
+build_chunks = semantic.build_chunks
+create_schema_payload = semantic.create_schema_payload
+get_pattern_record = semantic.get_pattern_record
+search_pattern_records = semantic.search_pattern_records
+search_chunks = semantic.search_chunks
+try:
+    from fastmcp import FastMCP
+except ImportError as exc:  # pragma: no cover - exercised by CLI users without deps
+    raise SystemExit(
+        "Install FastMCP to run the server: python3 -m pip install 'fastmcp>=2.0.0'"
+    ) from exc
+mcp = FastMCP("clean-code-tools")
+@mcp.resource("clean-code://corpus/summary")
+def corpus_summary() -> str:
+    """Return a compact summary of the local clean-code semantic corpus."""
+    chunks = build_chunks()
+    by_kind: dict[str, int] = {}
+    for chunk in chunks:
+        by_kind[chunk.chunk_kind] = by_kind.get(chunk.chunk_kind, 0) + 1
+    return json.dumps(
+        {
+            "chunks": len(chunks),
+            "by_kind": by_kind,
+            "default_collection": COLLECTION_NAME,
+            "default_embedding_model": DEFAULT_EMBEDDING_MODEL,
+        },
+        sort_keys=True,
+    )
+@mcp.resource("clean-code://weaviate/schema")
+def weaviate_schema() -> str:
+    """Return the Weaviate schema payload used by the ingest script."""
+    return json.dumps(create_schema_payload(), sort_keys=True, indent=2)
+@mcp.resource("clean-code://patterns/{pattern_id}")
+def clean_code_pattern_resource(pattern_id: str) -> str:
+    """Return one canonical clean-code pattern by ID."""
+    return json.dumps(pattern_by_id(pattern_id), sort_keys=True, indent=2)
+@mcp.tool
+def clean_code_corpus_summary() -> dict[str, Any]:
+    """Return chunk counts for the local clean-code corpus."""
+    return json.loads(corpus_summary())
+@mcp.tool
+def clean_code_weaviate_schema() -> dict[str, Any]:
+    """Return the Weaviate collection schema used for clean-code search."""
+    return create_schema_payload()
+@mcp.tool
+def search_clean_code(
+    query: str,
+    limit: int = 8,
+    weaviate_url: str = DEFAULT_WEAVIATE_URL,
+    collection: str = COLLECTION_NAME,
+    model: str = DEFAULT_EMBEDDING_MODEL,
+) -> list[dict[str, Any]]:
+    """Search the local Weaviate clean-code collection.
+    Requires a running Weaviate instance populated with
+    `scripts/weaviate_ingest_clean_code.py --reset`.
+    """
+    if not query.strip():
+        raise ValueError("query must not be empty")
+    if limit < 1 or limit > MAX_SEARCH_LIMIT:
+        raise ValueError("limit must be between 1 and 25")
+    rows = search_chunks(
+        query=query,
+        url=weaviate_url,
+        collection_name=collection,
+        model_name=model,
+        limit=limit,
+    )
+    return [search_result(row) for row in rows]
+@mcp.tool
+# pylint: disable-next=too-many-arguments
+def search_clean_code_patterns(
+    query: str,
+    limit: int = 8,
+    language: str = "any",
+    rule_families: list[str] | None = None,
+    topics: list[str] | None = None,
+    lintability: list[str] | None = None,
+    source_kinds: list[str] | None = None,
+    weaviate_url: str = DEFAULT_WEAVIATE_URL,
+    collection: str = COLLECTION_NAME,
+    model: str = DEFAULT_EMBEDDING_MODEL,
+) -> dict[str, Any]:
+    """Find canonical clean-code patterns relevant to a concrete code concern."""
+    if not query.strip():
+        raise ValueError("query must not be empty")
+    if limit < 1 or limit > MAX_SEARCH_LIMIT:
+        raise ValueError("limit must be between 1 and 25")
+    return search_pattern_records(
+        query=query,
+        url=weaviate_url,
+        collection_name=collection,
+        model_name=model,
+        limit=limit,
+        language=language,
+        rule_families=tuple(rule_families or ()),
+        topics=tuple(topics or ()),
+        lintability=tuple(lintability or ()),
+        source_kinds=tuple(source_kinds or ("clean_code_pattern",)),
+    )
+@mcp.tool
+def get_clean_code_pattern(pattern_id: str) -> dict[str, Any]:
+    """Return the full canonical clean-code pattern record for a `CC-###` ID."""
+    return pattern_by_id(pattern_id)
+@mcp.tool
+# pylint: disable-next=too-many-arguments
+def recommend_clean_code_lint_rules(
+    query: str,
+    language: str = "any",
+    targets: list[str] | None = None,
+    limit: int = 8,
+    weaviate_url: str = DEFAULT_WEAVIATE_URL,
+    collection: str = COLLECTION_NAME,
+    model: str = DEFAULT_EMBEDDING_MODEL,
+) -> dict[str, Any]:
+    """Recommend lint-rule candidates for repeated clean-code concerns."""
+    search_payload = search_clean_code_patterns(
+        query=query,
+        limit=limit,
+        language=language,
+        lintability=["high", "medium"],
+        source_kinds=["clean_code_pattern"],
+        weaviate_url=weaviate_url,
+        collection=collection,
+        model=model,
+    )
+    requested_targets = targets or default_lint_targets(language)
+    if search_payload["no_strong_match"]:
+        return {
+            "query": query,
+            "language": language,
+            "targets": requested_targets,
+            "results": [],
+            "no_strong_match": True,
+            "no_recommendation": "No high-confidence lint-rule candidate matched this query.",
+        }
+    return {
+        "query": query,
+        "language": language,
+        "targets": requested_targets,
+        "results": [lint_rule_recommendation(result, requested_targets) for result in search_payload["results"]],
+        "no_strong_match": search_payload["no_strong_match"],
+    }
+@mcp.tool
+def list_clean_code_facets() -> dict[str, Any]:
+    """Return available filter facets for clean-code pattern search."""
+    return facet_counts(build_chunks())
+def pattern_by_id(pattern_id: str) -> dict[str, Any]:
+    normalized = pattern_id.strip().upper()
+    if not semantic.CC_ID_RE.fullmatch(normalized):
+        raise ValueError("pattern_id must use the CC-### format")
+    record = get_pattern_record(normalized)
+    if record is None:
+        raise ValueError(f"pattern not found: {normalized}")
+    return record
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the clean-code FastMCP server.")
+    parser.add_argument(
+        "--transport",
+        choices=("stdio", "http", "sse"),
+        default="stdio",
+        help="FastMCP transport to run.",
+    )
+    parser.add_argument("--host", default="127.0.0.1")
+    parser.add_argument("--port", type=int, default=8765)
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    if args.transport == "stdio":
+        mcp.run()
+        return
+    mcp.run(transport=args.transport, host=args.host, port=args.port)
+if __name__ == "__main__":
+    main()

package/src/python/mcp_server/server_payloads.py ADDED Viewed

@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+from typing import Any
+from mcp_server.models import CleanCodeChunk
+from mcp_server.utils.increment import increment
+def facet_counts(chunks: list[CleanCodeChunk]) -> dict[str, dict[str, int]]:
+    facets: dict[str, dict[str, int]] = {
+        "topics": {},
+        "rule_families": {},
+        "lintability": {},
+        "source_kinds": {},
+        "languages": {},
+        "chunk_kinds": {},
+    }
+    for chunk in chunks:
+        increment(facets["topics"], chunk.topic)
+        increment(facets["rule_families"], chunk.rule_family)
+        increment(facets["lintability"], chunk.lintability)
+        increment(facets["source_kinds"], chunk.source_kind)
+        increment(facets["chunk_kinds"], chunk.chunk_kind)
+        for language in chunk.languages:
+            increment(facets["languages"], language)
+    return facets
+def search_result(row: dict[str, Any]) -> dict[str, Any]:
+    additional = row.get("_additional") or {}
+    content = " ".join(str(row.get("contentText", "")).split())
+    return {
+        "chunk_id": row.get("chunkId", ""),
+        "record_id": row.get("recordId", ""),
+        "title": row.get("title", ""),
+        "topic": row.get("topic", ""),
+        "source_file": row.get("sourceFile", ""),
+        "source_kind": row.get("sourceKind", ""),
+        "rule_family": row.get("ruleFamily", ""),
+        "lintability": row.get("lintability", ""),
+        "distance": additional.get("distance"),
+        "snippet": content[:500],
+    }
+def lint_rule_recommendation(result: dict[str, Any], targets: list[str]) -> dict[str, Any]:
+    return {
+        "pattern_id": result["pattern_id"],
+        "title": result["title"],
+        "rule_family": result["rule_family"],
+        "lintability": result["lintability"],
+        "confidence": result["confidence"],
+        "score": result["score"],
+        "targets": targets,
+        "static_signals": result.get("lint_candidates", []),
+        "false_positive_risks": false_positive_risks(result),
+        "suppression_strategy": suppression_strategy(targets),
+        "autofix": "review required; only offer autofix for syntax-preserving local rewrites",
+        "match_reasons": result.get("match_reasons", []),
+    }
+def default_lint_targets(language: str) -> list[str]:
+    if language == "typescript":
+        return ["eslint", "semgrep"]
+    if language == "python":
+        return ["ruff", "pylint", "semgrep"]
+    return ["eslint", "ruff", "pylint", "semgrep"]
+def false_positive_risks(result: dict[str, Any]) -> list[str]:
+    risks = [
+        "local project conventions may intentionally allow this shape",
+        "tests, generated files, fixtures, and framework adapters may be safe contexts",
+    ]
+    if result.get("lintability") == "medium":
+        risks.append("medium-lintability patterns need narrower project-specific allowlists")
+    return risks
+def suppression_strategy(targets: list[str]) -> str:
+    return f"use the narrowest inline suppression supported by {', '.join(targets)} and require a reason"

package/src/python/mcp_server/text.py ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import re
+from typing import Any
+PHRASE_BONUS_MIN_OVERLAP = 2
+PLURAL_NORMALIZATION_MIN_LENGTH = 4
+SLUG_RE = re.compile(r"[^a-z0-9]+")
+WORD_RE = re.compile(r"[a-z0-9]+")
+CC_ID_RE = re.compile(r"\b(CC-\d{3})\b")
+def semantic_similarity(distance: float | None) -> float:
+    if distance is None:
+        return 0.0
+    return max(0.0, min(1.0, 1.0 - distance))
+def lexical_score(query_terms: set[str], haystack: str) -> float:
+    if not query_terms:
+        return 0.0
+    haystack_terms = set(query_tokens(haystack))
+    if not haystack_terms:
+        return 0.0
+    overlap = query_terms & haystack_terms
+    phrase_bonus = 0.2 if overlap and len(overlap) >= PHRASE_BONUS_MIN_OVERLAP else 0.0
+    return min(1.0, len(overlap) / len(query_terms) + phrase_bonus)
+def searchable_row_text(row: dict[str, Any]) -> str:
+    return " ".join(
+        [
+            str(row.get("recordId", "")),
+            str(row.get("title", "")),
+            str(row.get("topic", "")),
+            str(row.get("ruleFamily", "")),
+            str(row.get("lintability", "")),
+            " ".join(str(value) for value in row.get("aliases", [])),
+            " ".join(str(value) for value in row.get("lintCandidates", [])),
+            str(row.get("contentText", "")),
+        ]
+    )
+def query_tokens(value: str) -> list[str]:
+    return [normalize_token(token) for token in WORD_RE.findall(value.lower())]
+def normalize_token(value: str) -> str:
+    if len(value) > PLURAL_NORMALIZATION_MIN_LENGTH and value.endswith("s"):
+        return value[:-1]
+    return value
+def detected_record_id(value: str) -> str:
+    match = CC_ID_RE.search(value)
+    return match.group(1) if match else ""
+def languages_in_text(text: str) -> tuple[str, ...]:
+    languages: list[str] = []
+    if "```ts" in text or "TypeScript" in text:
+        languages.append("typescript")
+    if "```python" in text or "Python" in text:
+        languages.append("python")
+    return tuple(languages)
+def lint_candidates_in_text(text: str) -> tuple[str, ...]:
+    return tuple(
+        line.split(":", 1)[1].strip()
+        for line in text.splitlines()
+        if line.startswith("Lint candidates:")
+    )
+def slug(value: str) -> str:
+    normalized = SLUG_RE.sub("-", value.lower()).strip("-")
+    return normalized[:96] or "section"
+def slugless(value: str) -> str:
+    return re.sub(r"^[#`\s]+|[#`\s]+$", "", value)
+def clean_topic(value: str) -> str:
+    topic = re.sub(r"^chapter\s+\d+:\s*", "", value, flags=re.IGNORECASE).strip()
+    return re.sub(r"^smells and heuristics\s*-\s*", "", topic, flags=re.IGNORECASE).strip()
+def clean_alias(value: str) -> str:
+    alias = clean_topic(value)
+    return "" if re.fullmatch(r"chapter\s+\d+", alias, flags=re.IGNORECASE) else alias
+def clean_topic_text(value: str) -> str:
+    value = re.sub(r"Chapter\s+\d+:\s*", "", value)
+    value = re.sub(r"\bChapter\s+\d+\s+", "", value)
+    return re.sub(r"Smells and Heuristics\s*-\s*", "", value)
+def approximate_tokens(value: str) -> int:
+    return max(1, len(re.findall(r"\S+", value)))

package/src/python/mcp_server/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

package/src/python/mcp_server/utils/httpx_loader.py ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+from typing import Any
+HTTPX_INSTALL_MESSAGE = "Install httpx to talk to Weaviate: python3 -m pip install httpx"
+def require_httpx() -> Any:
+    try:
+        import httpx  # noqa: PLC0415
+    except ImportError as exc:
+        raise SystemExit(HTTPX_INSTALL_MESSAGE) from exc
+    return httpx

package/src/python/mcp_server/utils/increment.py ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+def increment(counter: dict[str, int], value: str) -> None:
+    if value:
+        counter[value] = counter.get(value, 0) + 1  # pylint: disable=clean-code-output-argument-mutation

package/src/python/mcp_server/utils/sha256_text.py ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import hashlib
+def sha256_text(value: str) -> str:
+    return hashlib.sha256(value.encode()).hexdigest()

package/src/python/mcp_server/utils/unique_strings.py ADDED Viewed

@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import re
+def unique_strings(values: list[str]) -> list[str]:
+    seen: set[str] = set()
+    unique: list[str] = []
+    for value in values:
+        normalized = re.sub(r"\s+", " ", value.strip())
+        if normalized and normalized not in seen:
+            seen.add(normalized)
+            unique.append(normalized)
+    return unique