neuroscore 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ # Python
2
+ __pycache__/
3
+ .ruff_cache/
4
+ *.py[cod]
5
+ *$py.class
6
+ *.egg-info/
7
+ dist/
8
+ build/
9
+ .venv/
10
+ .env
11
+ .env.local
12
+
13
+ # uv
14
+ uv.lock
15
+
16
+ # Node
17
+ node_modules/
18
+ *.tsbuildinfo
19
+
20
+ # Turbo
21
+ .turbo/
22
+
23
+ # pnpm
24
+ pnpm-debug.log*
25
+
26
+ # SQLite
27
+ *.db
28
+ *.sqlite
29
+
30
+ # IDE
31
+ .vscode/
32
+ .idea/
33
+ *.swp
34
+ *.swo
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # Build outputs
41
+ packages/*/dist/
42
+ apps/*/dist/
43
+
44
+ # Test
45
+ .coverage
46
+ htmlcov/
47
+ .pytest_cache/
48
+
49
+ # NeuroScore local data
50
+ .neuroscore/
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: neuroscore
3
+ Version: 0.1.0
4
+ Summary: Cognitive scoring engine for developer artifacts
5
+ Project-URL: Homepage, https://github.com/hoklims/neuroscore
6
+ Project-URL: Documentation, https://github.com/hoklims/neuroscore/tree/main/docs
7
+ Project-URL: Repository, https://github.com/hoklims/neuroscore
8
+ Project-URL: Issues, https://github.com/hoklims/neuroscore/issues
9
+ Author: NeuroScore Contributors
10
+ License-Expression: MIT
11
+ Keywords: cognitive,documentation,prompts,quality,scoring
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Quality Assurance
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: pydantic<3,>=2.10
22
+ Provides-Extra: dev
23
+ Requires-Dist: pyright>=1.1; extra == 'dev'
24
+ Requires-Dist: pytest-cov>=6.0; extra == 'dev'
25
+ Requires-Dist: pytest>=8.0; extra == 'dev'
26
+ Requires-Dist: ruff>=0.9; extra == 'dev'
27
+ Provides-Extra: embedding
28
+ Requires-Dist: numpy<3,>=1.26; extra == 'embedding'
29
+ Requires-Dist: scikit-learn<2,>=1.5; extra == 'embedding'
30
+ Description-Content-Type: text/plain
31
+
32
+ Cognitive scoring engine for developer artifacts. See https://github.com/hoklims/neuroscore
@@ -0,0 +1,50 @@
1
+ [project]
2
+ name = "neuroscore"
3
+ version = "0.1.0"
4
+ description = "Cognitive scoring engine for developer artifacts"
5
+ requires-python = ">=3.12"
6
+ license = "MIT"
7
+ readme = {text = "Cognitive scoring engine for developer artifacts. See https://github.com/hoklims/neuroscore", content-type = "text/plain"}
8
+ authors = [{ name = "NeuroScore Contributors" }]
9
+ keywords = ["scoring", "cognitive", "prompts", "documentation", "quality"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.12",
16
+ "Programming Language :: Python :: 3.13",
17
+ "Topic :: Software Development :: Quality Assurance",
18
+ "Typing :: Typed",
19
+ ]
20
+ dependencies = [
21
+ "pydantic>=2.10,<3",
22
+ ]
23
+
24
+ [project.urls]
25
+ Homepage = "https://github.com/hoklims/neuroscore"
26
+ Documentation = "https://github.com/hoklims/neuroscore/tree/main/docs"
27
+ Repository = "https://github.com/hoklims/neuroscore"
28
+ Issues = "https://github.com/hoklims/neuroscore/issues"
29
+
30
+ [project.optional-dependencies]
31
+ embedding = [
32
+ "scikit-learn>=1.5,<2",
33
+ "numpy>=1.26,<3",
34
+ ]
35
+ dev = [
36
+ "pytest>=8.0",
37
+ "pytest-cov>=6.0",
38
+ "ruff>=0.9",
39
+ "pyright>=1.1",
40
+ ]
41
+
42
+ [build-system]
43
+ requires = ["hatchling"]
44
+ build-backend = "hatchling.build"
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/neuroscore"]
48
+
49
+ [tool.pytest.ini_options]
50
+ testpaths = ["tests"]
@@ -0,0 +1,30 @@
1
+ """NeuroScore — Cognitive scoring engine for developer artifacts."""
2
+
3
+ from neuroscore.models import (
4
+ Artifact,
5
+ ArtifactType,
6
+ ComparisonRun,
7
+ ConfidenceEstimate,
8
+ EvidenceSpan,
9
+ ScoreDimension,
10
+ ScoreRun,
11
+ Suggestion,
12
+ )
13
+ from neuroscore.persistence import PersistenceError
14
+ from neuroscore.pipeline import compare_artifacts, score_artifact
15
+
16
+ __all__ = [
17
+ "Artifact",
18
+ "ArtifactType",
19
+ "ComparisonRun",
20
+ "ConfidenceEstimate",
21
+ "EvidenceSpan",
22
+ "PersistenceError",
23
+ "ScoreDimension",
24
+ "ScoreRun",
25
+ "Suggestion",
26
+ "compare_artifacts",
27
+ "score_artifact",
28
+ ]
29
+
30
+ __version__ = "0.1.0"
@@ -0,0 +1,84 @@
1
+ """Configuration management for NeuroScore.
2
+
3
+ All configuration is driven by environment variables with sensible defaults.
4
+ No config files — keep it simple and 12-factor compliant.
5
+
6
+ Environment variables:
7
+ NEUROSCORE_DB_PATH — SQLite database path (default: ~/.neuroscore/runs.db)
8
+ NEUROSCORE_DB_ENABLED — Enable persistence (default: true)
9
+ NEUROSCORE_PROVIDER — Default scoring provider (default: rules_baseline)
10
+ NEUROSCORE_API_PORT — API server port (default: 8420)
11
+ NEUROSCORE_API_HOST — API server host (default: 127.0.0.1)
12
+ NEUROSCORE_LOG_LEVEL — Logging level (default: WARNING)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import os
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+
21
+ from neuroscore.types import ProviderName
22
+
23
+ _TRUE_VALUES = frozenset({"1", "true", "yes", "on"})
24
+ _FALSE_VALUES = frozenset({"0", "false", "no", "off"})
25
+
26
+
27
+ def _env_bool(key: str, default: bool) -> bool:
28
+ val = os.environ.get(key, "").lower()
29
+ if val in _TRUE_VALUES:
30
+ return True
31
+ if val in _FALSE_VALUES:
32
+ return False
33
+ return default
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class NeuroScoreConfig:
38
+ """Immutable configuration snapshot."""
39
+
40
+ db_path: Path
41
+ db_enabled: bool
42
+ default_provider: ProviderName
43
+ api_port: int
44
+ api_host: str
45
+ log_level: str
46
+
47
+ @staticmethod
48
+ def from_env() -> NeuroScoreConfig:
49
+ """Build configuration from environment variables."""
50
+ db_path_str = os.environ.get("NEUROSCORE_DB_PATH")
51
+ db_path = Path(db_path_str) if db_path_str else Path.home() / ".neuroscore" / "runs.db"
52
+
53
+ provider_str = os.environ.get("NEUROSCORE_PROVIDER", "rules_baseline")
54
+ try:
55
+ provider = ProviderName(provider_str)
56
+ except ValueError:
57
+ provider = ProviderName.RULES_BASELINE
58
+
59
+ return NeuroScoreConfig(
60
+ db_path=db_path,
61
+ db_enabled=_env_bool("NEUROSCORE_DB_ENABLED", default=True),
62
+ default_provider=provider,
63
+ api_port=int(os.environ.get("NEUROSCORE_API_PORT", "8420")),
64
+ api_host=os.environ.get("NEUROSCORE_API_HOST", "127.0.0.1"),
65
+ log_level=os.environ.get("NEUROSCORE_LOG_LEVEL", "WARNING").upper(),
66
+ )
67
+
68
+
69
+ # Module-level singleton — loaded once, immutable
70
+ _config: NeuroScoreConfig | None = None
71
+
72
+
73
+ def get_config() -> NeuroScoreConfig:
74
+ """Get the current configuration (lazy-loaded from environment)."""
75
+ global _config
76
+ if _config is None:
77
+ _config = NeuroScoreConfig.from_env()
78
+ return _config
79
+
80
+
81
+ def reset_config() -> None:
82
+ """Reset config cache. Useful for testing."""
83
+ global _config
84
+ _config = None
@@ -0,0 +1,56 @@
1
+ """Artifact type detection heuristics."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from neuroscore.types import ArtifactType
8
+
9
+ # File extension → type mapping
10
+ _EXTENSION_MAP: dict[str, ArtifactType] = {
11
+ ".md": ArtifactType.MARKDOWN,
12
+ ".markdown": ArtifactType.MARKDOWN,
13
+ ".py": ArtifactType.CODE,
14
+ ".js": ArtifactType.CODE,
15
+ ".ts": ArtifactType.CODE,
16
+ ".tsx": ArtifactType.CODE,
17
+ ".jsx": ArtifactType.CODE,
18
+ ".rs": ArtifactType.CODE,
19
+ ".go": ArtifactType.CODE,
20
+ ".java": ArtifactType.CODE,
21
+ ".c": ArtifactType.CODE,
22
+ ".cpp": ArtifactType.CODE,
23
+ ".h": ArtifactType.CODE,
24
+ ".rb": ArtifactType.CODE,
25
+ ".sh": ArtifactType.CODE,
26
+ ".diff": ArtifactType.DIFF,
27
+ ".patch": ArtifactType.DIFF,
28
+ ".txt": ArtifactType.PLAINTEXT,
29
+ }
30
+
31
+ # Content-based heuristics (checked in order)
32
+ _DIFF_PATTERN = re.compile(r"^(diff --git|---|\+\+\+|@@)", re.MULTILINE)
33
+ _MARKDOWN_PATTERN = re.compile(r"^#{1,6}\s|\*\*|__|\[.+\]\(.+\)|```", re.MULTILINE)
34
+ _CODE_INDICATORS = re.compile(
35
+ r"(def |class |function |const |let |var |import |from |#include|package )", re.MULTILINE
36
+ )
37
+
38
+
39
+ def detect_artifact_type(content: str, filename: str | None = None) -> ArtifactType:
40
+ """Detect artifact type from filename extension or content heuristics."""
41
+ if filename:
42
+ for ext, artifact_type in _EXTENSION_MAP.items():
43
+ if filename.endswith(ext):
44
+ return artifact_type
45
+
46
+ # Content-based detection
47
+ if _DIFF_PATTERN.search(content):
48
+ return ArtifactType.DIFF
49
+
50
+ if _MARKDOWN_PATTERN.search(content):
51
+ return ArtifactType.MARKDOWN
52
+
53
+ if _CODE_INDICATORS.search(content):
54
+ return ArtifactType.CODE
55
+
56
+ return ArtifactType.PLAINTEXT
@@ -0,0 +1,220 @@
1
+ """Text feature extraction for scoring pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class TextFeatures:
11
+ """Extracted text features used by scoring providers."""
12
+
13
+ char_count: int
14
+ word_count: int
15
+ line_count: int
16
+ sentence_count: int
17
+ avg_word_length: float
18
+ avg_sentence_length: float
19
+ avg_line_length: float
20
+ vocabulary_richness: float # unique words / total words
21
+ question_count: int
22
+ exclamation_count: int
23
+ imperative_ratio: float # estimated ratio of imperative sentences
24
+ list_item_count: int
25
+ heading_count: int
26
+ code_block_count: int
27
+ link_count: int
28
+ paragraph_count: int
29
+ max_nesting_depth: int
30
+ whitespace_ratio: float
31
+ uppercase_ratio: float
32
+ punctuation_density: float
33
+ repeated_phrase_count: int
34
+ long_sentence_count: int # sentences > 30 words
35
+ short_sentence_count: int # sentences < 5 words
36
+ passive_voice_estimate: int
37
+ hedge_word_count: int
38
+
39
+
40
+ _SENTENCE_SPLIT = re.compile(r"[.!?]+\s+|[.!?]+$|\n\n+")
41
+ _WORD_SPLIT = re.compile(r"\s+")
42
+ _HEDGE_WORDS = frozenset(
43
+ {
44
+ "maybe",
45
+ "perhaps",
46
+ "possibly",
47
+ "somewhat",
48
+ "relatively",
49
+ "fairly",
50
+ "might",
51
+ "could",
52
+ "would",
53
+ "should",
54
+ "seem",
55
+ "seems",
56
+ "appeared",
57
+ "apparently",
58
+ "arguably",
59
+ "roughly",
60
+ "approximately",
61
+ "basically",
62
+ "generally",
63
+ "usually",
64
+ "often",
65
+ "sometimes",
66
+ "probably",
67
+ }
68
+ )
69
+ _PASSIVE_PATTERN = re.compile(r"\b(is|are|was|were|been|being|be)\s+\w+ed\b", re.IGNORECASE)
70
+ _IMPERATIVE_STARTS = frozenset(
71
+ {
72
+ "do",
73
+ "don't",
74
+ "use",
75
+ "add",
76
+ "remove",
77
+ "create",
78
+ "delete",
79
+ "run",
80
+ "make",
81
+ "set",
82
+ "get",
83
+ "put",
84
+ "check",
85
+ "ensure",
86
+ "verify",
87
+ "note",
88
+ "see",
89
+ "try",
90
+ "avoid",
91
+ "keep",
92
+ "write",
93
+ "read",
94
+ "install",
95
+ "update",
96
+ "configure",
97
+ "implement",
98
+ "define",
99
+ "return",
100
+ "pass",
101
+ "call",
102
+ }
103
+ )
104
+ _LIST_ITEM = re.compile(r"^\s*[-*+•]\s|^\s*\d+[.)]\s", re.MULTILINE)
105
+ _HEADING = re.compile(r"^#{1,6}\s", re.MULTILINE)
106
+ _CODE_BLOCK = re.compile(r"```[\s\S]*?```|~~~[\s\S]*?~~~")
107
+ _LINK = re.compile(r"\[.+?\]\(.+?\)|https?://\S+")
108
+ _REPEATED_PHRASE_MIN_WORDS = 3
109
+
110
+
111
+ def extract_features(content: str) -> TextFeatures:
112
+ """Extract text features from artifact content."""
113
+ lines = content.split("\n")
114
+ words = _WORD_SPLIT.split(content.strip())
115
+ words = [w for w in words if w]
116
+ sentences = [s.strip() for s in _SENTENCE_SPLIT.split(content) if s.strip()]
117
+
118
+ char_count = len(content)
119
+ word_count = len(words)
120
+ line_count = len(lines)
121
+ sentence_count = max(len(sentences), 1)
122
+
123
+ avg_word_length = sum(len(w) for w in words) / word_count if word_count > 0 else 0.0
124
+ avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0.0
125
+ avg_line_length = char_count / line_count if line_count > 0 else 0.0
126
+
127
+ unique_words = set(w.lower() for w in words)
128
+ vocabulary_richness = len(unique_words) / word_count if word_count > 0 else 0.0
129
+
130
+ lower_words = [w.lower() for w in words]
131
+ hedge_word_count = sum(1 for w in lower_words if w in _HEDGE_WORDS)
132
+
133
+ question_count = content.count("?")
134
+ exclamation_count = content.count("!")
135
+
136
+ # Estimate imperative sentences
137
+ imperative_count = 0
138
+ for sent in sentences:
139
+ first_word = sent.split()[0].lower().rstrip(".,!?:") if sent.split() else ""
140
+ if first_word in _IMPERATIVE_STARTS:
141
+ imperative_count += 1
142
+ imperative_ratio = imperative_count / sentence_count if sentence_count > 0 else 0.0
143
+
144
+ list_item_count = len(_LIST_ITEM.findall(content))
145
+ heading_count = len(_HEADING.findall(content))
146
+ code_block_count = len(_CODE_BLOCK.findall(content))
147
+ link_count = len(_LINK.findall(content))
148
+
149
+ paragraphs = re.split(r"\n\s*\n", content)
150
+ paragraph_count = len([p for p in paragraphs if p.strip()])
151
+
152
+ # Estimate nesting depth from indentation
153
+ max_nesting = 0
154
+ for line in lines:
155
+ stripped = line.lstrip()
156
+ if stripped:
157
+ indent = len(line) - len(stripped)
158
+ depth = indent // 2 # assume 2-space indent
159
+ max_nesting = max(max_nesting, depth)
160
+
161
+ whitespace_ratio = (
162
+ sum(1 for c in content if c.isspace()) / char_count if char_count > 0 else 0.0
163
+ )
164
+ uppercase_ratio = sum(1 for c in content if c.isupper()) / char_count if char_count > 0 else 0.0
165
+ punct_chars = sum(1 for c in content if c in ".,;:!?()[]{}\"'-/\\@#$%^&*")
166
+ punctuation_density = punct_chars / char_count if char_count > 0 else 0.0
167
+
168
+ # Detect repeated phrases (3+ words, appearing 2+ times)
169
+ repeated = _count_repeated_phrases(lower_words)
170
+
171
+ # Sentence length distribution
172
+ long_sentences = 0
173
+ short_sentences = 0
174
+ for sent in sentences:
175
+ wc = len(sent.split())
176
+ if wc > 30:
177
+ long_sentences += 1
178
+ elif wc < 5:
179
+ short_sentences += 1
180
+
181
+ passive_count = len(_PASSIVE_PATTERN.findall(content))
182
+
183
+ return TextFeatures(
184
+ char_count=char_count,
185
+ word_count=word_count,
186
+ line_count=line_count,
187
+ sentence_count=sentence_count,
188
+ avg_word_length=avg_word_length,
189
+ avg_sentence_length=avg_sentence_length,
190
+ avg_line_length=avg_line_length,
191
+ vocabulary_richness=vocabulary_richness,
192
+ question_count=question_count,
193
+ exclamation_count=exclamation_count,
194
+ imperative_ratio=imperative_ratio,
195
+ list_item_count=list_item_count,
196
+ heading_count=heading_count,
197
+ code_block_count=code_block_count,
198
+ link_count=link_count,
199
+ paragraph_count=paragraph_count,
200
+ max_nesting_depth=max_nesting,
201
+ whitespace_ratio=whitespace_ratio,
202
+ uppercase_ratio=uppercase_ratio,
203
+ punctuation_density=punctuation_density,
204
+ repeated_phrase_count=repeated,
205
+ long_sentence_count=long_sentences,
206
+ short_sentence_count=short_sentences,
207
+ passive_voice_estimate=passive_count,
208
+ hedge_word_count=hedge_word_count,
209
+ )
210
+
211
+
212
+ def _count_repeated_phrases(words: list[str], min_words: int = 3) -> int:
213
+ """Count n-gram phrases that appear more than once."""
214
+ if len(words) < min_words * 2:
215
+ return 0
216
+ phrases: dict[tuple[str, ...], int] = {}
217
+ for i in range(len(words) - min_words + 1):
218
+ phrase = tuple(words[i : i + min_words])
219
+ phrases[phrase] = phrases.get(phrase, 0) + 1
220
+ return sum(1 for count in phrases.values() if count > 1)
@@ -0,0 +1,100 @@
1
+ """Domain models for NeuroScore."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from datetime import UTC, datetime
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ from neuroscore.types import ArtifactType, DimensionName, ProviderName
12
+
13
+
14
+ def _utc_now() -> datetime:
15
+ return datetime.now(UTC)
16
+
17
+
18
+ def _new_id() -> str:
19
+ return uuid.uuid4().hex
20
+
21
+
22
+ # Bumped on breaking changes to the output schema.
23
+ # Consumers should check this before parsing.
24
+ SCHEMA_VERSION = "0.1.0"
25
+
26
+
27
+ class Artifact(BaseModel):
28
+ """An input artifact to be scored."""
29
+
30
+ content: str
31
+ artifact_type: ArtifactType | None = None
32
+ filename: str | None = None
33
+ metadata: dict[str, Any] = Field(default_factory=dict)
34
+
35
+
36
+ class ConfidenceEstimate(BaseModel):
37
+ """Confidence level for a score, with method attribution."""
38
+
39
+ value: float = Field(ge=0.0, le=1.0, description="Confidence level 0-1")
40
+ method: str = Field(description="How confidence was estimated")
41
+
42
+
43
+ class EvidenceSpan(BaseModel):
44
+ """A text range supporting a score."""
45
+
46
+ start: int = Field(ge=0, description="Start byte offset in artifact content")
47
+ end: int = Field(ge=0, description="End byte offset in artifact content")
48
+ text: str = Field(description="The matched text content")
49
+ explanation: str = Field(description="Why this span is relevant")
50
+ dimension: DimensionName = Field(description="Which dimension this evidence supports")
51
+
52
+
53
+ class Suggestion(BaseModel):
54
+ """A rewrite suggestion for improving the artifact."""
55
+
56
+ start: int = Field(ge=0)
57
+ end: int = Field(ge=0)
58
+ original: str
59
+ replacement: str
60
+ rationale: str
61
+ dimension: DimensionName
62
+
63
+
64
+ class ScoreDimension(BaseModel):
65
+ """A single scoring dimension result."""
66
+
67
+ name: DimensionName
68
+ value: float = Field(ge=0.0, le=1.0, description="Normalized score 0-1")
69
+ confidence: ConfidenceEstimate
70
+ explanation: str
71
+ evidence: list[EvidenceSpan] = Field(default_factory=list)
72
+
73
+
74
+ class ScoreRun(BaseModel):
75
+ """A complete scoring execution result."""
76
+
77
+ schema_version: str = Field(default=SCHEMA_VERSION, description="Output schema version")
78
+ id: str = Field(default_factory=_new_id)
79
+ artifact: Artifact
80
+ artifact_type_resolved: ArtifactType
81
+ provider: ProviderName
82
+ overall_score: float = Field(ge=0.0, le=1.0)
83
+ dimensions: list[ScoreDimension]
84
+ suggestions: list[Suggestion] = Field(default_factory=list)
85
+ created_at: datetime = Field(default_factory=_utc_now)
86
+ duration_ms: float = Field(ge=0.0, description="Scoring duration in milliseconds")
87
+ metadata: dict[str, Any] = Field(default_factory=dict)
88
+
89
+
90
+ class ComparisonRun(BaseModel):
91
+ """A/B comparison of two scored artifacts."""
92
+
93
+ schema_version: str = Field(default=SCHEMA_VERSION, description="Output schema version")
94
+ id: str = Field(default_factory=_new_id)
95
+ run_a: ScoreRun
96
+ run_b: ScoreRun
97
+ delta_overall: float = Field(description="Score B - Score A")
98
+ dimension_deltas: dict[DimensionName, float] = Field(default_factory=dict)
99
+ winner: str = Field(description="'a', 'b', or 'tie'")
100
+ created_at: datetime = Field(default_factory=_utc_now)