@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
// Module: tier-a — Full structural extraction for 15 Tier A languages
|
|
2
|
+
|
|
3
|
+
import { basename } from "node:path";
|
|
4
|
+
import type { CandidateFact } from "@/capture/types";
|
|
5
|
+
|
|
6
|
+
/** Regex patterns grouped by extraction category for a single language. */
|
|
7
|
+
interface LanguagePatterns {
|
|
8
|
+
functions: RegExp[];
|
|
9
|
+
classes: RegExp[];
|
|
10
|
+
imports: RegExp[];
|
|
11
|
+
calls: RegExp[];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/** Return 3 surrounding lines around a match index for context. */
|
|
15
|
+
function surroundingLines(content: string, matchIndex: number): string {
|
|
16
|
+
const before = content.lastIndexOf("\n", matchIndex);
|
|
17
|
+
const lineStart = before === -1 ? 0 : before + 1;
|
|
18
|
+
let end = matchIndex;
|
|
19
|
+
for (let i = 0; i < 3; i++) {
|
|
20
|
+
const next = content.indexOf("\n", end + 1);
|
|
21
|
+
if (next === -1) {
|
|
22
|
+
end = content.length;
|
|
23
|
+
break;
|
|
24
|
+
}
|
|
25
|
+
end = next;
|
|
26
|
+
}
|
|
27
|
+
return content.slice(lineStart, end).trim();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ---------- Tier A pattern table ----------
|
|
31
|
+
|
|
32
|
+
const tsPatterns: LanguagePatterns = {
|
|
33
|
+
functions: [
|
|
34
|
+
// export async function name, function name, arrow functions
|
|
35
|
+
/(?:export\s+)?(?:async\s+)?function\s+(\w+)/gm,
|
|
36
|
+
// const name = (...) => or const name = async (...) =>
|
|
37
|
+
/(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
|
|
38
|
+
],
|
|
39
|
+
classes: [
|
|
40
|
+
/(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/gm,
|
|
41
|
+
/(?:export\s+)?interface\s+(\w+)/gm,
|
|
42
|
+
/(?:export\s+)?type\s+(\w+)\s*[<=]/gm,
|
|
43
|
+
/(?:export\s+)?enum\s+(\w+)/gm,
|
|
44
|
+
],
|
|
45
|
+
imports: [
|
|
46
|
+
// import { foo } from "bar" — captures the first named import
|
|
47
|
+
/import\s+\{\s*(\w+)/gm,
|
|
48
|
+
// import * as name from "bar"
|
|
49
|
+
/import\s+\*\s+as\s+(\w+)/gm,
|
|
50
|
+
// import name from "bar"
|
|
51
|
+
/import\s+(\w+)\s+from\s+/gm,
|
|
52
|
+
// require("bar")
|
|
53
|
+
/require\s*\(\s*["']([^"']+)["']\s*\)/gm,
|
|
54
|
+
],
|
|
55
|
+
calls: [
|
|
56
|
+
// standalone function call: name(
|
|
57
|
+
/(?<![.\w])(\w+)\s*\(/gm,
|
|
58
|
+
// method call: obj.name( — captures name
|
|
59
|
+
/\.(\w+)\s*\(/gm,
|
|
60
|
+
// new Constructor(
|
|
61
|
+
/new\s+(\w+)\s*\(/gm,
|
|
62
|
+
],
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
// JS is same as TS minus the type keyword pattern
|
|
66
|
+
const jsPatterns: LanguagePatterns = {
|
|
67
|
+
functions: tsPatterns.functions,
|
|
68
|
+
classes: [/(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/gm, /(?:export\s+)?enum\s+(\w+)/gm],
|
|
69
|
+
imports: tsPatterns.imports,
|
|
70
|
+
calls: tsPatterns.calls,
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const pythonPatterns: LanguagePatterns = {
|
|
74
|
+
functions: [/(?:async\s+)?def\s+(\w+)/gm],
|
|
75
|
+
classes: [/^class\s+(\w+)/gm],
|
|
76
|
+
imports: [
|
|
77
|
+
// from module import name — captures name
|
|
78
|
+
/from\s+\S+\s+import\s+(\w+)/gm,
|
|
79
|
+
// import module
|
|
80
|
+
/^import\s+(\w+)/gm,
|
|
81
|
+
],
|
|
82
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const goPatterns: LanguagePatterns = {
|
|
86
|
+
functions: [
|
|
87
|
+
// func Name( or func (receiver) Name(
|
|
88
|
+
/func\s+(?:\([^)]*\)\s+)?(\w+)\s*\(/gm,
|
|
89
|
+
],
|
|
90
|
+
classes: [/type\s+(\w+)\s+struct\b/gm, /type\s+(\w+)\s+interface\b/gm],
|
|
91
|
+
imports: [
|
|
92
|
+
// Single import: import "pkg"
|
|
93
|
+
/import\s+"([^"]+)"/gm,
|
|
94
|
+
// Grouped imports: each "pkg" line inside import ( ... )
|
|
95
|
+
/^\s+"([^"]+)"/gm,
|
|
96
|
+
],
|
|
97
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const rustPatterns: LanguagePatterns = {
|
|
101
|
+
functions: [
|
|
102
|
+
// pub fn name, fn name, pub async fn name, async fn name
|
|
103
|
+
/(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/gm,
|
|
104
|
+
],
|
|
105
|
+
classes: [
|
|
106
|
+
/(?:pub\s+)?struct\s+(\w+)/gm,
|
|
107
|
+
/(?:pub\s+)?enum\s+(\w+)/gm,
|
|
108
|
+
/(?:pub\s+)?trait\s+(\w+)/gm,
|
|
109
|
+
],
|
|
110
|
+
imports: [
|
|
111
|
+
// use path::Name — captures the last segment
|
|
112
|
+
/use\s+(?:\w+::)*(\w+)/gm,
|
|
113
|
+
// mod name
|
|
114
|
+
/mod\s+(\w+)/gm,
|
|
115
|
+
],
|
|
116
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /::(\w+)\s*\(/gm],
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
const javaPatterns: LanguagePatterns = {
|
|
120
|
+
functions: [
|
|
121
|
+
// method: access modifier, optional static/final, return type, name(
|
|
122
|
+
/(?:public|private|protected)\s+(?:static\s+)?(?:final\s+)?(?:(?:abstract|synchronized|native)\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(/gm,
|
|
123
|
+
// default-access methods: returnType name( — at indent
|
|
124
|
+
/^\s+\w+(?:<[^>]*>)?\s+(\w+)\s*\(/gm,
|
|
125
|
+
],
|
|
126
|
+
classes: [
|
|
127
|
+
/(?:public\s+)?(?:abstract\s+)?(?:final\s+)?class\s+(\w+)/gm,
|
|
128
|
+
/(?:public\s+)?interface\s+(\w+)/gm,
|
|
129
|
+
/(?:public\s+)?enum\s+(\w+)/gm,
|
|
130
|
+
],
|
|
131
|
+
imports: [/import\s+(?:static\s+)?[\w.]+\.(\w+)\s*;/gm],
|
|
132
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm, /new\s+(\w+)\s*\(/gm],
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const kotlinPatterns: LanguagePatterns = {
|
|
136
|
+
functions: [/(?:suspend\s+)?fun\s+(?:<[^>]*>\s+)?(\w+)/gm],
|
|
137
|
+
classes: [
|
|
138
|
+
/(?:data\s+)?class\s+(\w+)/gm,
|
|
139
|
+
/object\s+(\w+)/gm,
|
|
140
|
+
/interface\s+(\w+)/gm,
|
|
141
|
+
/enum\s+class\s+(\w+)/gm,
|
|
142
|
+
],
|
|
143
|
+
imports: [/import\s+[\w.]+\.(\w+)/gm],
|
|
144
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const swiftPatterns: LanguagePatterns = {
|
|
148
|
+
functions: [/func\s+(\w+)/gm],
|
|
149
|
+
classes: [/class\s+(\w+)/gm, /struct\s+(\w+)/gm, /enum\s+(\w+)/gm, /protocol\s+(\w+)/gm],
|
|
150
|
+
imports: [/import\s+(\w+)/gm],
|
|
151
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
const phpPatterns: LanguagePatterns = {
|
|
155
|
+
functions: [
|
|
156
|
+
// standalone function
|
|
157
|
+
/^function\s+(\w+)/gm,
|
|
158
|
+
// public/private/protected function
|
|
159
|
+
/(?:public|private|protected)\s+(?:static\s+)?function\s+(\w+)/gm,
|
|
160
|
+
],
|
|
161
|
+
classes: [/(?:abstract\s+)?class\s+(\w+)/gm, /interface\s+(\w+)/gm, /trait\s+(\w+)/gm],
|
|
162
|
+
imports: [
|
|
163
|
+
// use Namespace\Class — captures last segment
|
|
164
|
+
/use\s+[\w\\]+\\(\w+)/gm,
|
|
165
|
+
// require/include variants
|
|
166
|
+
/(?:require|require_once|include|include_once)\s+["']([^"']+)["']/gm,
|
|
167
|
+
],
|
|
168
|
+
calls: [/(?<![.\w$])(\w+)\s*\(/gm, /->(\w+)\s*\(/gm, /::(\w+)\s*\(/gm],
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
const rubyPatterns: LanguagePatterns = {
|
|
172
|
+
functions: [/def\s+(?:self\.)?(\w+)/gm],
|
|
173
|
+
classes: [/class\s+(\w+)/gm, /module\s+(\w+)/gm],
|
|
174
|
+
imports: [
|
|
175
|
+
// require "name" or require 'name'
|
|
176
|
+
/require\s+["']([^"']+)["']/gm,
|
|
177
|
+
// require_relative "path"
|
|
178
|
+
/require_relative\s+["']([^"']+)["']/gm,
|
|
179
|
+
],
|
|
180
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
const scalaPatterns: LanguagePatterns = {
|
|
184
|
+
functions: [/def\s+(\w+)/gm],
|
|
185
|
+
classes: [/(?:case\s+)?class\s+(\w+)/gm, /object\s+(\w+)/gm, /trait\s+(\w+)/gm],
|
|
186
|
+
imports: [/import\s+[\w.]+\.(\w+)/gm],
|
|
187
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
const elixirPatterns: LanguagePatterns = {
|
|
191
|
+
functions: [
|
|
192
|
+
// def name or defp name — but not defmodule
|
|
193
|
+
/\b(?:def|defp)\s+(\w+)/gm,
|
|
194
|
+
],
|
|
195
|
+
classes: [
|
|
196
|
+
// defmodule with dotted names
|
|
197
|
+
/defmodule\s+([\w.]+)/gm,
|
|
198
|
+
],
|
|
199
|
+
imports: [/\bimport\s+([\w.]+)/gm, /\balias\s+([\w.]+)/gm, /\buse\s+([\w.]+)/gm],
|
|
200
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
const dartPatterns: LanguagePatterns = {
|
|
204
|
+
functions: [
|
|
205
|
+
// return-type + name( — void main(, Widget build(
|
|
206
|
+
/(?:void|int|double|bool|String|dynamic|Future|Stream|List|Map|Set|\w+)\s+(\w+)\s*\(/gm,
|
|
207
|
+
],
|
|
208
|
+
classes: [/(?:abstract\s+)?class\s+(\w+)/gm, /mixin\s+(\w+)/gm, /extension\s+(\w+)/gm],
|
|
209
|
+
imports: [/import\s+['"]([^'"]+)['"]/gm],
|
|
210
|
+
calls: [/(?<![.\w])(\w+)\s*\(/gm, /\.(\w+)\s*\(/gm],
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
// ---------- Extension to (language name, patterns) mapping ----------
|
|
214
|
+
|
|
215
|
+
const TIER_A_PATTERNS: Record<string, { language: string; patterns: LanguagePatterns }> = {
|
|
216
|
+
".ts": { language: "typescript", patterns: tsPatterns },
|
|
217
|
+
".tsx": { language: "tsx", patterns: tsPatterns },
|
|
218
|
+
".js": { language: "javascript", patterns: jsPatterns },
|
|
219
|
+
".mjs": { language: "javascript", patterns: jsPatterns },
|
|
220
|
+
".cjs": { language: "javascript", patterns: jsPatterns },
|
|
221
|
+
".jsx": { language: "jsx", patterns: jsPatterns },
|
|
222
|
+
".py": { language: "python", patterns: pythonPatterns },
|
|
223
|
+
".go": { language: "go", patterns: goPatterns },
|
|
224
|
+
".rs": { language: "rust", patterns: rustPatterns },
|
|
225
|
+
".java": { language: "java", patterns: javaPatterns },
|
|
226
|
+
".kt": { language: "kotlin", patterns: kotlinPatterns },
|
|
227
|
+
".kts": { language: "kotlin", patterns: kotlinPatterns },
|
|
228
|
+
".swift": { language: "swift", patterns: swiftPatterns },
|
|
229
|
+
".php": { language: "php", patterns: phpPatterns },
|
|
230
|
+
".rb": { language: "ruby", patterns: rubyPatterns },
|
|
231
|
+
".scala": { language: "scala", patterns: scalaPatterns },
|
|
232
|
+
".ex": { language: "elixir", patterns: elixirPatterns },
|
|
233
|
+
".exs": { language: "elixir", patterns: elixirPatterns },
|
|
234
|
+
".dart": { language: "dart", patterns: dartPatterns },
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
// Categories for pattern extraction — must match LanguagePatterns keys
|
|
238
|
+
type Category = "function" | "class" | "import" | "call";
|
|
239
|
+
|
|
240
|
+
const CATEGORIES: { key: keyof LanguagePatterns; category: Category }[] = [
|
|
241
|
+
{ key: "functions", category: "function" },
|
|
242
|
+
{ key: "classes", category: "class" },
|
|
243
|
+
{ key: "imports", category: "import" },
|
|
244
|
+
{ key: "calls", category: "call" },
|
|
245
|
+
];
|
|
246
|
+
|
|
247
|
+
// Common keywords/noise to exclude from call extraction
|
|
248
|
+
const CALL_NOISE = new Set([
|
|
249
|
+
"if",
|
|
250
|
+
"for",
|
|
251
|
+
"while",
|
|
252
|
+
"switch",
|
|
253
|
+
"catch",
|
|
254
|
+
"return",
|
|
255
|
+
"throw",
|
|
256
|
+
"typeof",
|
|
257
|
+
"instanceof",
|
|
258
|
+
"void",
|
|
259
|
+
"delete",
|
|
260
|
+
"await",
|
|
261
|
+
"else",
|
|
262
|
+
"case",
|
|
263
|
+
"break",
|
|
264
|
+
"continue",
|
|
265
|
+
"do",
|
|
266
|
+
"in",
|
|
267
|
+
"of",
|
|
268
|
+
"let",
|
|
269
|
+
"const",
|
|
270
|
+
"var",
|
|
271
|
+
"true",
|
|
272
|
+
"false",
|
|
273
|
+
"null",
|
|
274
|
+
"undefined",
|
|
275
|
+
"try",
|
|
276
|
+
"finally",
|
|
277
|
+
"yield",
|
|
278
|
+
"import",
|
|
279
|
+
"export",
|
|
280
|
+
"from",
|
|
281
|
+
"require",
|
|
282
|
+
"include",
|
|
283
|
+
"require_once",
|
|
284
|
+
"include_once",
|
|
285
|
+
"def",
|
|
286
|
+
"class",
|
|
287
|
+
"fn",
|
|
288
|
+
"func",
|
|
289
|
+
"fun",
|
|
290
|
+
"function",
|
|
291
|
+
"pub",
|
|
292
|
+
"async",
|
|
293
|
+
"self",
|
|
294
|
+
"super",
|
|
295
|
+
"this",
|
|
296
|
+
"new",
|
|
297
|
+
"use",
|
|
298
|
+
"mod",
|
|
299
|
+
"type",
|
|
300
|
+
"interface",
|
|
301
|
+
"enum",
|
|
302
|
+
"struct",
|
|
303
|
+
"trait",
|
|
304
|
+
"impl",
|
|
305
|
+
"where",
|
|
306
|
+
"match",
|
|
307
|
+
"loop",
|
|
308
|
+
"print",
|
|
309
|
+
"println",
|
|
310
|
+
"printf",
|
|
311
|
+
"fmt",
|
|
312
|
+
"defmodule",
|
|
313
|
+
"defp",
|
|
314
|
+
]);
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Extract structural code entities from file content using language-specific
|
|
318
|
+
* regex patterns. Supports all 15 Tier A languages.
|
|
319
|
+
*
|
|
320
|
+
* @param content The file text to scan.
|
|
321
|
+
* @param filePath Path used to determine language by extension.
|
|
322
|
+
* @returns An array of CandidateFact objects for every matched entity.
|
|
323
|
+
*/
|
|
324
|
+
export function extractTierA(content: string, filePath: string): CandidateFact[] {
|
|
325
|
+
if (!content || !filePath) return [];
|
|
326
|
+
|
|
327
|
+
const dotIdx = filePath.lastIndexOf(".");
|
|
328
|
+
if (dotIdx === -1) return [];
|
|
329
|
+
|
|
330
|
+
const ext = filePath.slice(dotIdx);
|
|
331
|
+
const entry = TIER_A_PATTERNS[ext];
|
|
332
|
+
if (!entry) return [];
|
|
333
|
+
|
|
334
|
+
const { language, patterns } = entry;
|
|
335
|
+
const base = basename(filePath);
|
|
336
|
+
const facts: CandidateFact[] = [];
|
|
337
|
+
const seen = new Set<string>();
|
|
338
|
+
|
|
339
|
+
// Track positions where function/class declarations match so call extraction
|
|
340
|
+
// can skip overlapping positions (avoids treating `function foo()` as a call).
|
|
341
|
+
const declPositions = new Set<number>();
|
|
342
|
+
|
|
343
|
+
for (const { key, category } of CATEGORIES) {
|
|
344
|
+
const regexes = patterns[key];
|
|
345
|
+
for (const regex of regexes) {
|
|
346
|
+
// Reset lastIndex so the regex starts from the beginning each time
|
|
347
|
+
regex.lastIndex = 0;
|
|
348
|
+
|
|
349
|
+
let m: RegExpExecArray | null = regex.exec(content);
|
|
350
|
+
while (m !== null) {
|
|
351
|
+
const name = m[1];
|
|
352
|
+
if (!name) {
|
|
353
|
+
m = regex.exec(content);
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Skip noise words in call extraction
|
|
358
|
+
if (category === "call" && CALL_NOISE.has(name)) {
|
|
359
|
+
m = regex.exec(content);
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// For calls, skip matches whose name starts at a position covered
|
|
364
|
+
// by a function/class declaration match
|
|
365
|
+
if (category === "call") {
|
|
366
|
+
// The capture group starts at m.index + (length of text before group 1)
|
|
367
|
+
const nameStart = m.index + m[0].indexOf(name);
|
|
368
|
+
if (declPositions.has(nameStart)) {
|
|
369
|
+
m = regex.exec(content);
|
|
370
|
+
continue;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Deduplicate by name + category
|
|
375
|
+
const dedupeKey = `${category}:${name}`;
|
|
376
|
+
if (seen.has(dedupeKey)) {
|
|
377
|
+
m = regex.exec(content);
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
seen.add(dedupeKey);
|
|
381
|
+
|
|
382
|
+
// Record the name position for function/class declarations
|
|
383
|
+
if (category === "function" || category === "class") {
|
|
384
|
+
const nameStart = m.index + m[0].indexOf(name);
|
|
385
|
+
declPositions.add(nameStart);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
const context = surroundingLines(content, m.index);
|
|
389
|
+
const fact: CandidateFact = {
|
|
390
|
+
type: "CodeEntity",
|
|
391
|
+
name,
|
|
392
|
+
content: context,
|
|
393
|
+
summary: `${category} ${name} in ${base}`,
|
|
394
|
+
tags: [language, category],
|
|
395
|
+
file_paths: [filePath],
|
|
396
|
+
trust_tier: 2,
|
|
397
|
+
confidence: 0.92,
|
|
398
|
+
extraction_method: "regex-ast",
|
|
399
|
+
};
|
|
400
|
+
|
|
401
|
+
// For imports, extract source module from the matched line
|
|
402
|
+
if (category === "import") {
|
|
403
|
+
const lineEnd = content.indexOf("\n", m.index);
|
|
404
|
+
const line = content.slice(m.index, lineEnd === -1 ? undefined : lineEnd);
|
|
405
|
+
const fromMatch = /from\s+["']([^"']+)["']/.exec(line);
|
|
406
|
+
const reqMatch = /require\s*\(\s*["']([^"']+)["']\s*\)/.exec(line);
|
|
407
|
+
const sourceMod = fromMatch?.[1] ?? reqMatch?.[1];
|
|
408
|
+
if (sourceMod) {
|
|
409
|
+
fact.proposed_relationships = [
|
|
410
|
+
{ target_name: sourceMod, type: "imports", weight: 0.9 },
|
|
411
|
+
];
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
facts.push(fact);
|
|
416
|
+
|
|
417
|
+
m = regex.exec(content);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
return facts;
|
|
423
|
+
}
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
// Module: tier-b — Structural extraction (no call tracking) for 10 Tier B languages
|
|
2
|
+
|
|
3
|
+
import { basename } from "node:path";
|
|
4
|
+
import type { CandidateFact } from "@/capture/types";
|
|
5
|
+
|
|
6
|
+
/** Regex patterns grouped by extraction category for a single language. */
|
|
7
|
+
interface LanguagePatterns {
|
|
8
|
+
functions: RegExp[];
|
|
9
|
+
classes: RegExp[];
|
|
10
|
+
imports: RegExp[];
|
|
11
|
+
calls: RegExp[];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/** Return 3 surrounding lines around a match index for context. */
|
|
15
|
+
function surroundingLines(content: string, matchIndex: number): string {
|
|
16
|
+
const before = content.lastIndexOf("\n", matchIndex);
|
|
17
|
+
const lineStart = before === -1 ? 0 : before + 1;
|
|
18
|
+
let end = matchIndex;
|
|
19
|
+
for (let i = 0; i < 3; i++) {
|
|
20
|
+
const next = content.indexOf("\n", end + 1);
|
|
21
|
+
if (next === -1) {
|
|
22
|
+
end = content.length;
|
|
23
|
+
break;
|
|
24
|
+
}
|
|
25
|
+
end = next;
|
|
26
|
+
}
|
|
27
|
+
return content.slice(lineStart, end).trim();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ---------- Tier B pattern table ----------
|
|
31
|
+
|
|
32
|
+
const cPatterns: LanguagePatterns = {
|
|
33
|
+
functions: [
|
|
34
|
+
// return_type name( — covers int main(, void foo(, char* bar(
|
|
35
|
+
/(?:unsigned\s+)?(?:void|int|char|float|double|long|short|size_t|bool|(?:struct\s+)?\w+)\s*\*?\s+(\w+)\s*\(/gm,
|
|
36
|
+
],
|
|
37
|
+
classes: [
|
|
38
|
+
/\bstruct\s+(\w+)/gm,
|
|
39
|
+
/\btypedef\s+(?:struct|union|enum)\s*\{[^}]*\}\s*(\w+)/gm,
|
|
40
|
+
/\bunion\s+(\w+)/gm,
|
|
41
|
+
],
|
|
42
|
+
imports: [
|
|
43
|
+
// #include <header.h> or #include "header.h"
|
|
44
|
+
/#include\s+[<"]([^>"]+)[>"]/gm,
|
|
45
|
+
],
|
|
46
|
+
calls: [],
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
const cppPatterns: LanguagePatterns = {
|
|
50
|
+
functions: [
|
|
51
|
+
// template<...> return_type name(
|
|
52
|
+
/template\s*<[^>]*>\s*(?:\w[\w:*&\s]*)\s+(\w+)\s*\(/gm,
|
|
53
|
+
// return_type Class::method(
|
|
54
|
+
/(?:\w[\w:*&\s]*)\s+\w+::(\w+)\s*\(/gm,
|
|
55
|
+
// standalone: return_type name(
|
|
56
|
+
/(?:unsigned\s+)?(?:void|int|char|float|double|long|short|bool|auto|size_t|std::\w+|(?:struct\s+)?\w+)\s*[*&]?\s+(\w+)\s*\(/gm,
|
|
57
|
+
],
|
|
58
|
+
classes: [/\bclass\s+(\w+)/gm, /\bstruct\s+(\w+)/gm, /\bnamespace\s+(\w+)/gm],
|
|
59
|
+
imports: [/#include\s+[<"]([^>"]+)[>"]/gm, /\busing\s+(?:namespace\s+)?(\w[\w:]*)/gm],
|
|
60
|
+
calls: [],
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const csharpPatterns: LanguagePatterns = {
|
|
64
|
+
functions: [
|
|
65
|
+
// access_modifier [static] return_type name(
|
|
66
|
+
/(?:public|private|protected|internal)\s+(?:static\s+)?(?:virtual\s+)?(?:override\s+)?(?:async\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(/gm,
|
|
67
|
+
],
|
|
68
|
+
classes: [
|
|
69
|
+
/(?:public\s+|private\s+|protected\s+|internal\s+)?(?:static\s+)?(?:abstract\s+)?(?:sealed\s+)?class\s+(\w+)/gm,
|
|
70
|
+
/(?:public\s+|private\s+|protected\s+|internal\s+)?interface\s+(\w+)/gm,
|
|
71
|
+
/(?:public\s+|private\s+|protected\s+|internal\s+)?struct\s+(\w+)/gm,
|
|
72
|
+
/(?:public\s+|private\s+|protected\s+|internal\s+)?enum\s+(\w+)/gm,
|
|
73
|
+
],
|
|
74
|
+
imports: [
|
|
75
|
+
// using System; or using System.Collections.Generic;
|
|
76
|
+
/^using\s+([\w.]+)\s*;/gm,
|
|
77
|
+
],
|
|
78
|
+
calls: [],
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const bashPatterns: LanguagePatterns = {
|
|
82
|
+
functions: [
|
|
83
|
+
// function name { or function name() {
|
|
84
|
+
/\bfunction\s+(\w+)/gm,
|
|
85
|
+
// name() { — shorthand
|
|
86
|
+
/^(\w+)\s*\(\s*\)\s*\{/gm,
|
|
87
|
+
],
|
|
88
|
+
classes: [],
|
|
89
|
+
imports: [
|
|
90
|
+
// source path or . path
|
|
91
|
+
/\bsource\s+(\S+)/gm,
|
|
92
|
+
/^\.\s+(\S+)/gm,
|
|
93
|
+
],
|
|
94
|
+
calls: [],
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const luaPatterns: LanguagePatterns = {
|
|
98
|
+
functions: [
|
|
99
|
+
// function name(
|
|
100
|
+
/\bfunction\s+(\w+)\s*\(/gm,
|
|
101
|
+
// local function name(
|
|
102
|
+
/\blocal\s+function\s+(\w+)\s*\(/gm,
|
|
103
|
+
],
|
|
104
|
+
classes: [],
|
|
105
|
+
imports: [
|
|
106
|
+
// require("name") or require("dotted.path")
|
|
107
|
+
/\brequire\s*\(\s*["']([^"']+)["']\s*\)/gm,
|
|
108
|
+
],
|
|
109
|
+
calls: [],
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
const zigPatterns: LanguagePatterns = {
|
|
113
|
+
functions: [
|
|
114
|
+
// pub fn name or fn name
|
|
115
|
+
/(?:pub\s+)?fn\s+(\w+)/gm,
|
|
116
|
+
],
|
|
117
|
+
classes: [
|
|
118
|
+
// const Name = struct/enum/union
|
|
119
|
+
/\b(\w+)\s*=\s*(?:packed\s+)?(?:struct|enum|union)/gm,
|
|
120
|
+
],
|
|
121
|
+
imports: [
|
|
122
|
+
// @import("name")
|
|
123
|
+
/@import\s*\(\s*["']([^"']+)["']\s*\)/gm,
|
|
124
|
+
],
|
|
125
|
+
calls: [],
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const perlPatterns: LanguagePatterns = {
|
|
129
|
+
functions: [/\bsub\s+(\w+)/gm],
|
|
130
|
+
classes: [
|
|
131
|
+
// package Name or package Name::Sub
|
|
132
|
+
/\bpackage\s+([\w:]+)/gm,
|
|
133
|
+
],
|
|
134
|
+
imports: [
|
|
135
|
+
// use Module; or use Module::Sub;
|
|
136
|
+
/\buse\s+([\w:]+)/gm,
|
|
137
|
+
// require "file" or require Module
|
|
138
|
+
/\brequire\s+["']([^"']+)["']/gm,
|
|
139
|
+
],
|
|
140
|
+
calls: [],
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const rPatterns: LanguagePatterns = {
|
|
144
|
+
functions: [
|
|
145
|
+
// name <- function(
|
|
146
|
+
/(\w+)\s*<-\s*function\s*\(/gm,
|
|
147
|
+
// name = function(
|
|
148
|
+
/(\w+)\s*=\s*function\s*\(/gm,
|
|
149
|
+
],
|
|
150
|
+
classes: [/\bsetClass\s*\(\s*["'](\w+)["']/gm, /\bR6Class\s*\(\s*["'](\w+)["']/gm],
|
|
151
|
+
imports: [/\blibrary\s*\(\s*(\w+)\s*\)/gm, /\brequire\s*\(\s*(\w+)\s*\)/gm],
|
|
152
|
+
calls: [],
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
const ocamlPatterns: LanguagePatterns = {
|
|
156
|
+
functions: [
|
|
157
|
+
// let rec name ... = or let name ... =
|
|
158
|
+
/\blet\s+rec\s+(\w+)/gm,
|
|
159
|
+
/\blet\s+(\w+)\b.*(?::|=)/gm,
|
|
160
|
+
// val name : (in .mli)
|
|
161
|
+
/\bval\s+(\w+)\s*:/gm,
|
|
162
|
+
],
|
|
163
|
+
classes: [
|
|
164
|
+
// module Name or module type Name
|
|
165
|
+
/\bmodule\s+(?:type\s+)?(\w+)/gm,
|
|
166
|
+
// type name
|
|
167
|
+
/\btype\s+(\w+)/gm,
|
|
168
|
+
],
|
|
169
|
+
imports: [/\bopen\s+([\w.]+)/gm],
|
|
170
|
+
calls: [],
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
const haskellPatterns: LanguagePatterns = {
|
|
174
|
+
functions: [
|
|
175
|
+
// type signature: name :: Type
|
|
176
|
+
/^(\w+)\s*::\s*.+$/gm,
|
|
177
|
+
],
|
|
178
|
+
classes: [
|
|
179
|
+
/\bdata\s+(\w+)/gm,
|
|
180
|
+
/\bnewtype\s+(\w+)/gm,
|
|
181
|
+
/\bclass\s+(?:\([^)]*\)\s*=>)?\s*(\w+)/gm,
|
|
182
|
+
/\binstance\s+(?:\([^)]*\)\s*=>)?\s*(\w+)/gm,
|
|
183
|
+
],
|
|
184
|
+
imports: [
|
|
185
|
+
// import [qualified] Module.Name [as X]
|
|
186
|
+
/\bimport\s+(?:qualified\s+)?([\w.]+)/gm,
|
|
187
|
+
],
|
|
188
|
+
calls: [],
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
// ---------- Extension to (language name, patterns) mapping ----------
|
|
192
|
+
|
|
193
|
+
const TIER_B_PATTERNS: Record<string, { language: string; patterns: LanguagePatterns }> = {
|
|
194
|
+
".c": { language: "c", patterns: cPatterns },
|
|
195
|
+
".h": { language: "c", patterns: cPatterns },
|
|
196
|
+
".cpp": { language: "cpp", patterns: cppPatterns },
|
|
197
|
+
".cc": { language: "cpp", patterns: cppPatterns },
|
|
198
|
+
".cxx": { language: "cpp", patterns: cppPatterns },
|
|
199
|
+
".hpp": { language: "cpp", patterns: cppPatterns },
|
|
200
|
+
".hxx": { language: "cpp", patterns: cppPatterns },
|
|
201
|
+
".cs": { language: "csharp", patterns: csharpPatterns },
|
|
202
|
+
".sh": { language: "bash", patterns: bashPatterns },
|
|
203
|
+
".bash": { language: "bash", patterns: bashPatterns },
|
|
204
|
+
".lua": { language: "lua", patterns: luaPatterns },
|
|
205
|
+
".zig": { language: "zig", patterns: zigPatterns },
|
|
206
|
+
".pl": { language: "perl", patterns: perlPatterns },
|
|
207
|
+
".pm": { language: "perl", patterns: perlPatterns },
|
|
208
|
+
".r": { language: "r", patterns: rPatterns },
|
|
209
|
+
".R": { language: "r", patterns: rPatterns },
|
|
210
|
+
".ml": { language: "ocaml", patterns: ocamlPatterns },
|
|
211
|
+
".mli": { language: "ocaml", patterns: ocamlPatterns },
|
|
212
|
+
".hs": { language: "haskell", patterns: haskellPatterns },
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
// Categories for pattern extraction — must match LanguagePatterns keys
|
|
216
|
+
// Tier B omits "call" since calls: [] for all languages.
|
|
217
|
+
type Category = "function" | "class" | "import";
|
|
218
|
+
|
|
219
|
+
const CATEGORIES: { key: keyof LanguagePatterns; category: Category }[] = [
|
|
220
|
+
{ key: "functions", category: "function" },
|
|
221
|
+
{ key: "classes", category: "class" },
|
|
222
|
+
{ key: "imports", category: "import" },
|
|
223
|
+
];
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Extract structural code entities from file content using language-specific
|
|
227
|
+
* regex patterns. Supports all 10 Tier B languages. No call extraction.
|
|
228
|
+
*
|
|
229
|
+
* @param content The file text to scan.
|
|
230
|
+
* @param filePath Path used to determine language by extension.
|
|
231
|
+
* @returns An array of CandidateFact objects for every matched entity.
|
|
232
|
+
*/
|
|
233
|
+
export function extractTierB(content: string, filePath: string): CandidateFact[] {
|
|
234
|
+
if (!content || !filePath) return [];
|
|
235
|
+
|
|
236
|
+
const dotIdx = filePath.lastIndexOf(".");
|
|
237
|
+
if (dotIdx === -1) return [];
|
|
238
|
+
|
|
239
|
+
const ext = filePath.slice(dotIdx);
|
|
240
|
+
const entry = TIER_B_PATTERNS[ext];
|
|
241
|
+
if (!entry) return [];
|
|
242
|
+
|
|
243
|
+
const { language, patterns } = entry;
|
|
244
|
+
const base = basename(filePath);
|
|
245
|
+
const facts: CandidateFact[] = [];
|
|
246
|
+
const seen = new Set<string>();
|
|
247
|
+
|
|
248
|
+
for (const { key, category } of CATEGORIES) {
|
|
249
|
+
const regexes = patterns[key];
|
|
250
|
+
for (const regex of regexes) {
|
|
251
|
+
// Reset lastIndex so the regex starts from the beginning each time
|
|
252
|
+
regex.lastIndex = 0;
|
|
253
|
+
|
|
254
|
+
let m: RegExpExecArray | null = regex.exec(content);
|
|
255
|
+
while (m !== null) {
|
|
256
|
+
const name = m[1];
|
|
257
|
+
if (!name) {
|
|
258
|
+
m = regex.exec(content);
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Deduplicate by name + category
|
|
263
|
+
const dedupeKey = `${category}:${name}`;
|
|
264
|
+
if (seen.has(dedupeKey)) {
|
|
265
|
+
m = regex.exec(content);
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
seen.add(dedupeKey);
|
|
269
|
+
|
|
270
|
+
const context = surroundingLines(content, m.index);
|
|
271
|
+
facts.push({
|
|
272
|
+
type: "CodeEntity",
|
|
273
|
+
name,
|
|
274
|
+
content: context,
|
|
275
|
+
summary: `${category} ${name} in ${base}`,
|
|
276
|
+
tags: [language, category],
|
|
277
|
+
file_paths: [filePath],
|
|
278
|
+
trust_tier: 2,
|
|
279
|
+
confidence: 0.92,
|
|
280
|
+
extraction_method: "regex-ast",
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
m = regex.exec(content);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
return facts;
|
|
289
|
+
}
|