resolvekit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. resolvekit/README.md +134 -0
  2. resolvekit/__init__.py +67 -0
  3. resolvekit/api/README.md +165 -0
  4. resolvekit/api/__init__.py +10 -0
  5. resolvekit/api/convenience.py +53 -0
  6. resolvekit/api/resolver.py +457 -0
  7. resolvekit/builders/README.md +173 -0
  8. resolvekit/builders/__init__.py +0 -0
  9. resolvekit/calibration/README.md +351 -0
  10. resolvekit/calibration/__init__.py +12 -0
  11. resolvekit/calibration/calibrator.py +184 -0
  12. resolvekit/calibration/features.py +139 -0
  13. resolvekit/calibration/models.py +78 -0
  14. resolvekit/cli/README.md +215 -0
  15. resolvekit/cli/__init__.py +0 -0
  16. resolvekit/cli/main.py +18 -0
  17. resolvekit/config.py +128 -0
  18. resolvekit/constants.py +252 -0
  19. resolvekit/constraints/README.md +102 -0
  20. resolvekit/constraints/__init__.py +17 -0
  21. resolvekit/constraints/constraint_engine.py +111 -0
  22. resolvekit/constraints/hierarchy_validator.py +148 -0
  23. resolvekit/constraints/membership_validator.py +60 -0
  24. resolvekit/constraints/protocols.py +33 -0
  25. resolvekit/constraints/temporal_validator.py +43 -0
  26. resolvekit/constraints/type_validator.py +42 -0
  27. resolvekit/data/README.md +165 -0
  28. resolvekit/data/__init__.py +14 -0
  29. resolvekit/data/alias_repository.py +206 -0
  30. resolvekit/data/code_repository.py +85 -0
  31. resolvekit/data/context_filters.py +49 -0
  32. resolvekit/data/db_manager.py +196 -0
  33. resolvekit/data/entity_repository.py +466 -0
  34. resolvekit/data/membership_repository.py +107 -0
  35. resolvekit/data/query_builder.py +177 -0
  36. resolvekit/data/schema.py +122 -0
  37. resolvekit/disambiguation/README.md +72 -0
  38. resolvekit/disambiguation/__init__.py +0 -0
  39. resolvekit/extraction/README.md +204 -0
  40. resolvekit/extraction/__init__.py +0 -0
  41. resolvekit/matchers/README.md +77 -0
  42. resolvekit/matchers/__init__.py +65 -0
  43. resolvekit/matchers/alias_exact.py +65 -0
  44. resolvekit/matchers/canonical_name.py +62 -0
  45. resolvekit/matchers/cascade.py +127 -0
  46. resolvekit/matchers/code_validators.py +250 -0
  47. resolvekit/matchers/exact_code.py +177 -0
  48. resolvekit/matchers/fts_matcher.py +106 -0
  49. resolvekit/matchers/fuzzy_matcher.py +142 -0
  50. resolvekit/matchers/priorities.py +174 -0
  51. resolvekit/matchers/protocols.py +75 -0
  52. resolvekit/normalization/README.md +192 -0
  53. resolvekit/normalization/__init__.py +8 -0
  54. resolvekit/normalization/normalizer.py +164 -0
  55. resolvekit/overlays/README.md +226 -0
  56. resolvekit/overlays/__init__.py +0 -0
  57. resolvekit/types.py +534 -0
  58. resolvekit/utils/README.md +188 -0
  59. resolvekit/utils/__init__.py +48 -0
  60. resolvekit/utils/cache.py +109 -0
  61. resolvekit/utils/dates.py +339 -0
  62. resolvekit/utils/errors.py +145 -0
  63. resolvekit/utils/files.py +366 -0
  64. resolvekit/utils/logging.py +219 -0
  65. resolvekit/utils/text.py +475 -0
  66. resolvekit/utils/validation.py +301 -0
  67. resolvekit-0.0.1.dist-info/METADATA +36 -0
  68. resolvekit-0.0.1.dist-info/RECORD +70 -0
  69. resolvekit-0.0.1.dist-info/WHEEL +4 -0
  70. resolvekit-0.0.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,145 @@
1
+ """Error classes for resolvekit."""
2
+
3
+
4
+ class ResolvekitError(Exception):
5
+ """Base exception for all resolvekit errors."""
6
+
7
+ def __init__(self, message: str, details: dict | None = None):
8
+ """Initialize error with message and optional details."""
9
+ super().__init__(message)
10
+ self.message = message
11
+ self.details = details or {}
12
+
13
+ def __str__(self) -> str:
14
+ """String representation of error."""
15
+ if self.details:
16
+ details_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
17
+ return f"{self.message} ({details_str})"
18
+ return self.message
19
+
20
+
21
+ class ConfigError(ResolvekitError):
22
+ """Configuration error."""
23
+
24
+ pass
25
+
26
+
27
+ class DataPackError(ResolvekitError):
28
+ """Data pack error (corrupt, missing, incompatible)."""
29
+
30
+ pass
31
+
32
+
33
+ class ResolutionError(ResolvekitError):
34
+ """Resolution error."""
35
+
36
+ pass
37
+
38
+
39
+ class ValidationError(ResolvekitError):
40
+ """Validation error."""
41
+
42
+ pass
43
+
44
+
45
+ class OverlayError(ResolvekitError):
46
+ """Overlay error."""
47
+
48
+ pass
49
+
50
+
51
+ class DatabaseError(ResolvekitError):
52
+ """Database error."""
53
+
54
+ pass
55
+
56
+
57
+ class CalibrationError(ResolvekitError):
58
+ """Calibration error."""
59
+
60
+ pass
61
+
62
+
63
+ class ExtractionError(ResolvekitError):
64
+ """Entity extraction error."""
65
+
66
+ pass
67
+
68
+
69
+ class CodeFormatError(ValidationError):
70
+ """Code format validation error."""
71
+
72
+ def __init__(self, code: str, code_system: str, expected_format: str):
73
+ """Initialize code format error."""
74
+ super().__init__(
75
+ f"Invalid {code_system} code format: '{code}'. Expected format: {expected_format}",
76
+ details={"code": code, "system": code_system, "format": expected_format},
77
+ )
78
+
79
+
80
+ class EntityNotFoundError(ResolutionError):
81
+ """Entity not found error."""
82
+
83
+ def __init__(self, query: str, min_confidence: float | None = None):
84
+ """Initialize entity not found error."""
85
+ msg = f"No entity found for query: '{query}'"
86
+ if min_confidence:
87
+ msg += f" (min_confidence={min_confidence})"
88
+ super().__init__(
89
+ msg, details={"query": query, "min_confidence": min_confidence}
90
+ )
91
+
92
+
93
+ class AmbiguousQueryError(ResolutionError):
94
+ """Ambiguous query error when strict mode is enabled."""
95
+
96
+ def __init__(self, query: str, candidates: list[str]):
97
+ """Initialize ambiguous query error."""
98
+ super().__init__(
99
+ f"Ambiguous query: '{query}'. Multiple candidates: {', '.join(candidates[:5])}",
100
+ details={"query": query, "num_candidates": len(candidates)},
101
+ )
102
+
103
+
104
+ class TemporalValidityError(ValidationError):
105
+ """Temporal validity error."""
106
+
107
+ def __init__(self, entity: str, date: str, reason: str):
108
+ """Initialize temporal validity error."""
109
+ super().__init__(
110
+ f"Entity '{entity}' not valid at date '{date}': {reason}",
111
+ details={"entity": entity, "date": date, "reason": reason},
112
+ )
113
+
114
+
115
+ class HierarchyError(ValidationError):
116
+ """Hierarchy validation error."""
117
+
118
+ def __init__(self, child: str, parent: str, reason: str):
119
+ """Initialize hierarchy error."""
120
+ super().__init__(
121
+ f"Invalid hierarchy: '{child}' not a child of '{parent}': {reason}",
122
+ details={"child": child, "parent": parent, "reason": reason},
123
+ )
124
+
125
+
126
+ class ChecksumMismatchError(DataPackError):
127
+ """Checksum mismatch error."""
128
+
129
+ def __init__(self, file_path: str, expected: str, actual: str):
130
+ """Initialize checksum mismatch error."""
131
+ super().__init__(
132
+ f"Checksum mismatch for '{file_path}'. Expected: {expected[:16]}..., Got: {actual[:16]}...",
133
+ details={"file": file_path, "expected": expected, "actual": actual},
134
+ )
135
+
136
+
137
+ class IncompatibleVersionError(DataPackError):
138
+ """Incompatible version error."""
139
+
140
+ def __init__(self, component: str, required: str, found: str):
141
+ """Initialize incompatible version error."""
142
+ super().__init__(
143
+ f"Incompatible {component} version. Required: {required}, Found: {found}",
144
+ details={"component": component, "required": required, "found": found},
145
+ )
@@ -0,0 +1,366 @@
1
+ """File utilities for resolvekit."""
2
+
3
+ import hashlib
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from resolvekit.utils.errors import ChecksumMismatchError, ValidationError
9
+
10
+
11
+ def compute_checksum(
12
+ file_path: Path | str,
13
+ algorithm: str = "sha256",
14
+ chunk_size: int = 8192,
15
+ ) -> str:
16
+ """
17
+ Compute checksum of a file.
18
+
19
+ Args:
20
+ file_path: Path to file
21
+ algorithm: Hash algorithm (sha256, md5, sha1)
22
+ chunk_size: Chunk size for reading file
23
+
24
+ Returns:
25
+ Hex digest of checksum
26
+
27
+ Raises:
28
+ FileNotFoundError: If file doesn't exist
29
+ ValidationError: If algorithm is unsupported
30
+
31
+ Examples:
32
+ >>> # compute_checksum("data.sqlite")
33
+ >>> # 'a3b2c1d4...'
34
+ """
35
+ file_path = Path(file_path)
36
+
37
+ if not file_path.exists():
38
+ raise FileNotFoundError(f"File not found: {file_path}")
39
+
40
+ # Get hash function
41
+ try:
42
+ hash_func = hashlib.new(algorithm)
43
+ except ValueError as e:
44
+ raise ValidationError(
45
+ f"Unsupported hash algorithm: {algorithm}",
46
+ details={"algorithm": algorithm},
47
+ ) from e
48
+
49
+ # Compute hash
50
+ with open(file_path, "rb") as f:
51
+ while chunk := f.read(chunk_size):
52
+ hash_func.update(chunk)
53
+
54
+ return hash_func.hexdigest()
55
+
56
+
57
+ def verify_checksum(
58
+ file_path: Path | str,
59
+ expected_checksum: str,
60
+ algorithm: str = "sha256",
61
+ strict: bool = True,
62
+ ) -> bool:
63
+ """
64
+ Verify file checksum.
65
+
66
+ Args:
67
+ file_path: Path to file
68
+ expected_checksum: Expected checksum
69
+ algorithm: Hash algorithm
70
+ strict: If True, raise exception on mismatch
71
+
72
+ Returns:
73
+ True if checksum matches
74
+
75
+ Raises:
76
+ ChecksumMismatchError: If strict=True and checksum doesn't match
77
+ """
78
+ actual_checksum = compute_checksum(file_path, algorithm)
79
+
80
+ matches = actual_checksum == expected_checksum
81
+
82
+ if not matches and strict:
83
+ raise ChecksumMismatchError(str(file_path), expected_checksum, actual_checksum)
84
+
85
+ return matches
86
+
87
+
88
+ def ensure_directory(dir_path: Path | str) -> Path:
89
+ """
90
+ Ensure directory exists, create if it doesn't.
91
+
92
+ Args:
93
+ dir_path: Directory path
94
+
95
+ Returns:
96
+ Path object for the directory
97
+ """
98
+ dir_path = Path(dir_path)
99
+ dir_path.mkdir(parents=True, exist_ok=True)
100
+ return dir_path
101
+
102
+
103
+ def read_json(file_path: Path | str) -> Any:
104
+ """
105
+ Read JSON file.
106
+
107
+ Args:
108
+ file_path: Path to JSON file
109
+
110
+ Returns:
111
+ Parsed JSON data
112
+
113
+ Raises:
114
+ FileNotFoundError: If file doesn't exist
115
+ json.JSONDecodeError: If JSON is invalid
116
+ """
117
+ file_path = Path(file_path)
118
+
119
+ if not file_path.exists():
120
+ raise FileNotFoundError(f"File not found: {file_path}")
121
+
122
+ with open(file_path, encoding="utf-8") as f:
123
+ return json.load(f)
124
+
125
+
126
+ def write_json(
127
+ file_path: Path | str,
128
+ data: Any,
129
+ indent: int | None = 2,
130
+ ensure_dir: bool = True,
131
+ ) -> None:
132
+ """
133
+ Write data to JSON file.
134
+
135
+ Args:
136
+ file_path: Path to JSON file
137
+ data: Data to write
138
+ indent: Indentation level (None for compact)
139
+ ensure_dir: Create parent directories if they don't exist
140
+ """
141
+ file_path = Path(file_path)
142
+
143
+ if ensure_dir:
144
+ ensure_directory(file_path.parent)
145
+
146
+ with open(file_path, "w", encoding="utf-8") as f:
147
+ json.dump(data, f, indent=indent, ensure_ascii=False)
148
+
149
+
150
+ def read_text(file_path: Path | str, encoding: str = "utf-8") -> str:
151
+ """
152
+ Read text file.
153
+
154
+ Args:
155
+ file_path: Path to text file
156
+ encoding: File encoding
157
+
158
+ Returns:
159
+ File contents as string
160
+ """
161
+ file_path = Path(file_path)
162
+
163
+ if not file_path.exists():
164
+ raise FileNotFoundError(f"File not found: {file_path}")
165
+
166
+ return file_path.read_text(encoding=encoding)
167
+
168
+
169
+ def write_text(
170
+ file_path: Path | str,
171
+ content: str,
172
+ encoding: str = "utf-8",
173
+ ensure_dir: bool = True,
174
+ ) -> None:
175
+ """
176
+ Write text to file.
177
+
178
+ Args:
179
+ file_path: Path to text file
180
+ content: Text content
181
+ encoding: File encoding
182
+ ensure_dir: Create parent directories if they don't exist
183
+ """
184
+ file_path = Path(file_path)
185
+
186
+ if ensure_dir:
187
+ ensure_directory(file_path.parent)
188
+
189
+ file_path.write_text(content, encoding=encoding)
190
+
191
+
192
+ def get_file_size(file_path: Path | str) -> int:
193
+ """
194
+ Get file size in bytes.
195
+
196
+ Args:
197
+ file_path: Path to file
198
+
199
+ Returns:
200
+ File size in bytes
201
+ """
202
+ file_path = Path(file_path)
203
+ return file_path.stat().st_size
204
+
205
+
206
+ def get_file_size_mb(file_path: Path | str) -> float:
207
+ """
208
+ Get file size in megabytes.
209
+
210
+ Args:
211
+ file_path: Path to file
212
+
213
+ Returns:
214
+ File size in MB
215
+ """
216
+ return get_file_size(file_path) / (1024 * 1024)
217
+
218
+
219
+ def list_files(
220
+ directory: Path | str,
221
+ pattern: str = "*",
222
+ recursive: bool = False,
223
+ ) -> list[Path]:
224
+ """
225
+ List files in directory matching pattern.
226
+
227
+ Args:
228
+ directory: Directory path
229
+ pattern: Glob pattern (e.g., "*.json", "**/*.sqlite")
230
+ recursive: Whether to search recursively
231
+
232
+ Returns:
233
+ List of matching file paths
234
+
235
+ Examples:
236
+ >>> # list_files("data/", "*.sqlite")
237
+ >>> # [Path('data/base.sqlite'), Path('data/overlay.sqlite')]
238
+ """
239
+ dir_path = Path(directory)
240
+
241
+ if not dir_path.exists():
242
+ return []
243
+
244
+ if recursive and "**" not in pattern:
245
+ pattern = f"**/{pattern}"
246
+
247
+ return sorted(dir_path.glob(pattern))
248
+
249
+
250
+ def safe_remove(file_path: Path | str) -> bool:
251
+ """
252
+ Safely remove file if it exists.
253
+
254
+ Args:
255
+ file_path: Path to file
256
+
257
+ Returns:
258
+ True if file was removed, False if it didn't exist
259
+ """
260
+ file_path = Path(file_path)
261
+
262
+ if file_path.exists():
263
+ file_path.unlink()
264
+ return True
265
+
266
+ return False
267
+
268
+
269
+ def copy_file(src: Path | str, dst: Path | str, ensure_dir: bool = True) -> None:
270
+ """
271
+ Copy file from source to destination.
272
+
273
+ Args:
274
+ src: Source file path
275
+ dst: Destination file path
276
+ ensure_dir: Create destination directory if it doesn't exist
277
+ """
278
+ import shutil
279
+
280
+ src = Path(src)
281
+ dst = Path(dst)
282
+
283
+ if not src.exists():
284
+ raise FileNotFoundError(f"Source file not found: {src}")
285
+
286
+ if ensure_dir:
287
+ ensure_directory(dst.parent)
288
+
289
+ shutil.copy2(src, dst)
290
+
291
+
292
+ def expand_path(path: Path | str) -> Path:
293
+ """
294
+ Expand user home directory and resolve path.
295
+
296
+ Args:
297
+ path: Path (may contain ~)
298
+
299
+ Returns:
300
+ Expanded and resolved path
301
+
302
+ Examples:
303
+ >>> # expand_path("~/.resolvekit/data")
304
+ >>> # Path('/home/user/.resolvekit/data')
305
+ """
306
+ return Path(path).expanduser().resolve()
307
+
308
+
309
+ def get_manifest_checksums(directory: Path | str) -> dict[str, str]:
310
+ """
311
+ Compute checksums for all files in directory for manifest.
312
+
313
+ Args:
314
+ directory: Directory path
315
+
316
+ Returns:
317
+ Dictionary mapping filename to checksum
318
+
319
+ Examples:
320
+ >>> # get_manifest_checksums("data/")
321
+ >>> # {'base.sqlite': 'abc123...', 'calibration.json': 'def456...'}
322
+ """
323
+ directory = Path(directory)
324
+ checksums = {}
325
+
326
+ for file_path in directory.iterdir():
327
+ if file_path.is_file():
328
+ checksums[file_path.name] = compute_checksum(file_path)
329
+
330
+ return checksums
331
+
332
+
333
+ def verify_manifest_checksums(
334
+ directory: Path | str,
335
+ checksums: dict[str, str],
336
+ strict: bool = True,
337
+ ) -> dict[str, bool]:
338
+ """
339
+ Verify checksums from manifest.
340
+
341
+ Args:
342
+ directory: Directory path
343
+ checksums: Dictionary mapping filename to expected checksum
344
+ strict: If True, raise exception on first mismatch
345
+
346
+ Returns:
347
+ Dictionary mapping filename to verification result
348
+
349
+ Raises:
350
+ ChecksumMismatchError: If strict=True and any checksum doesn't match
351
+ """
352
+ directory = Path(directory)
353
+ results = {}
354
+
355
+ for filename, expected_checksum in checksums.items():
356
+ file_path = directory / filename
357
+ try:
358
+ results[filename] = verify_checksum(
359
+ file_path, expected_checksum, strict=strict
360
+ )
361
+ except ChecksumMismatchError:
362
+ if strict:
363
+ raise
364
+ results[filename] = False
365
+
366
+ return results