py-devo 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devo/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """DEVO — CSV to iCSV enrichment and validation.
2
+
3
+ Public API:
4
+ from devo.enrich import ICSVEnricher
5
+ from devo.validate import validate_icsv
6
+
7
+ Intentionally imports nothing on package load to avoid side effects
8
+ (frictionless, flask) in environments where only one function is needed.
9
+ """
10
+
11
+ __version__ = "0.2.0"
devo/_infer.py ADDED
@@ -0,0 +1,112 @@
1
+ """Pure type-inference functions — no I/O, no side effects.
2
+
3
+ All functions in this module are deterministic and dependency-free.
4
+ They are shared by the enricher (CSV → type) and the validator (data re-inference).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from datetime import datetime
10
+
11
+ # --- Constants ---
12
+
13
+ INT_RE = re.compile(r"^-?\d+$")
14
+ # Optional decimal: matches "5" and "5.0" — needed so mixed int/float columns
15
+ # resolve to 'number' rather than falling through to 'string'.
16
+ FLOAT_RE = re.compile(r"^-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$")
17
+
18
+ # Tried after fromisoformat fails; restored from DEVO_enricher.py (was dropped in refactor).
19
+ STRPTIME_FORMATS: tuple[str, ...] = (
20
+ "%Y-%m-%d %H:%M:%S",
21
+ "%Y-%m-%d %H:%M",
22
+ "%Y-%m-%d",
23
+ "%d.%m.%Y",
24
+ "%d/%m/%Y",
25
+ "%m/%d/%Y",
26
+ "%Y/%m/%d",
27
+ "%d-%m-%Y",
28
+ "%Y%m%dT%H%M%S",
29
+ "%Y-%m-%dT%H:%M:%S%z",
30
+ "%Y-%m-%dT%H:%M:%S",
31
+ )
32
+
33
+ # iCSV spec EBNF: field_delimiter ::= [,|\/:;] — tab is not in the allowed set.
34
+ VALID_ICSV_DELIMITERS: frozenset[str] = frozenset({",", "|", "\\", "/", ":", ";"})
35
+
36
+ # Common missing-value sentinels. Single source of truth shared by enricher and validator.
37
+ # EnviDat has no standardised sentinel, so we cast a wide net.
38
+ COMMON_MISSING: frozenset[str] = frozenset({
39
+ "", "NA", "N/A", "na", "n/a", "NULL", "null", "nan", "NaN",
40
+ "-999", "-999.0", "-999.000000",
41
+ })
42
+
43
+ # Type subtype lattice: inferred → set of declared types it is valid under.
44
+ # integer ⊂ number ⊂ string; datetime ⊂ string.
45
+ # Used by the validator for Option-A cross-check (declared type is authoritative).
46
+ _SUBTYPES: dict[str, frozenset[str]] = {
47
+ "integer": frozenset({"integer", "number", "string"}),
48
+ "number": frozenset({"number", "string"}),
49
+ "datetime": frozenset({"datetime", "string"}),
50
+ "string": frozenset({"string"}),
51
+ }
52
+
53
+
54
+ # --- Type checkers ---
55
+
56
+ def _is_integer(s: str) -> bool:
57
+ return bool(INT_RE.match(s))
58
+
59
+
60
+ def _is_number(s: str) -> bool:
61
+ return bool(INT_RE.match(s) or FLOAT_RE.match(s))
62
+
63
+
64
+ def _is_datetime(s: str) -> bool:
65
+ """Try fromisoformat first, then a fixed list of strptime formats."""
66
+ s = s.strip()
67
+ if not s:
68
+ return False
69
+ try:
70
+ datetime.fromisoformat(s)
71
+ return True
72
+ except (ValueError, TypeError):
73
+ pass
74
+ for fmt in STRPTIME_FORMATS:
75
+ try:
76
+ datetime.strptime(s, fmt)
77
+ return True
78
+ except (ValueError, TypeError):
79
+ continue
80
+ return False
81
+
82
+
83
+ # --- Public API ---
84
+
85
+ def infer_type(values: list[str], missing: frozenset[str] = COMMON_MISSING) -> str:
86
+ """
87
+ Infer a Frictionless field type from a list of string values.
88
+ Cascade: integer → number → datetime → string.
89
+ Missing-value sentinels are excluded before testing.
90
+ An all-missing or empty column returns 'string'.
91
+ """
92
+ pruned = [v.strip() for v in values if v.strip() not in missing]
93
+ if not pruned:
94
+ return "string"
95
+ if all(_is_integer(v) for v in pruned):
96
+ return "integer"
97
+ if all(_is_number(v) for v in pruned):
98
+ return "number"
99
+ if all(_is_datetime(v) for v in pruned):
100
+ return "datetime"
101
+ return "string"
102
+
103
+
104
+ def is_subtype_or_equal(inferred: str, declared: str) -> bool:
105
+ """
106
+ True when the inferred type is at least as specific as (or equal to) the declared type.
107
+ This means existing data satisfies the declared schema:
108
+ - inferred=integer, declared=number → True (integers pass number validation)
109
+ - inferred=number, declared=integer → False (floats fail integer validation)
110
+ Used by the validator to produce [WARN] when inferred is wider than declared.
111
+ """
112
+ return declared in _SUBTYPES.get(inferred, frozenset())
devo/_parser.py ADDED
@@ -0,0 +1,91 @@
1
+ """Canonical iCSV header parser — single implementation shared by enricher and validator.
2
+
3
+ Parses [METADATA] and [FIELDS] sections from iCSV files per the iCSV 1.0 spec.
4
+ Stops at # [DATA] and does not read data rows.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ from .exceptions import ParseError
12
+
13
+
14
+ @dataclass
15
+ class ICSVHeader:
16
+ metadata: dict[str, str]
17
+ fields_meta: dict[str, list[str]]
18
+ field_delimiter: str
19
+
20
+
21
+ def is_icsv(path: Path) -> bool:
22
+ """Return True if the file's first line marks it as an iCSV file."""
23
+ try:
24
+ # utf-8-sig strips the BOM if present
25
+ with open(path, "r", encoding="utf-8-sig") as fh:
26
+ return fh.readline().strip().startswith("# iCSV")
27
+ except (OSError, UnicodeDecodeError):
28
+ return False
29
+
30
+
31
+ def parse_header(path: Path) -> ICSVHeader:
32
+ """
33
+ Parse [METADATA] and [FIELDS] sections of an iCSV file.
34
+
35
+ field_delimiter is read from metadata before the FIELDS section is split,
36
+ so key order in the file does not matter — the correct delimiter is always used.
37
+ Raises ParseError if the file is unreadable or has no [METADATA] section.
38
+ """
39
+ metadata: dict[str, str] = {}
40
+ raw_fields: dict[str, str] = {} # key → unsplit value string; split after delimiter known
41
+ section: str | None = None
42
+
43
+ try:
44
+ with open(path, "r", encoding="utf-8-sig") as fh:
45
+ for line in fh:
46
+ stripped = line.rstrip("\r\n")
47
+
48
+ if not stripped.startswith("#"):
49
+ continue
50
+
51
+ content = stripped.lstrip("#").strip()
52
+
53
+ if content == "[METADATA]":
54
+ section = "metadata"
55
+ continue
56
+ if content == "[FIELDS]":
57
+ section = "fields"
58
+ continue
59
+ if content == "[DATA]":
60
+ break
61
+
62
+ # Skip blank comment lines and section headers
63
+ if not content or "=" not in content or section is None:
64
+ continue
65
+
66
+ key, _, val = content.partition("=")
67
+ key = key.strip()
68
+ val = val.strip()
69
+
70
+ if section == "metadata":
71
+ metadata[key] = val
72
+ else:
73
+ raw_fields[key] = val
74
+
75
+ except OSError as e:
76
+ raise ParseError(f"Cannot read {path}: {e}") from e
77
+
78
+ if not metadata:
79
+ raise ParseError(f"{path.name}: no [METADATA] section found or file is empty")
80
+
81
+ field_delimiter = metadata.get("field_delimiter", ",")
82
+ fields_meta = {
83
+ k: [v.strip() for v in raw.split(field_delimiter)]
84
+ for k, raw in raw_fields.items()
85
+ }
86
+
87
+ return ICSVHeader(
88
+ metadata=metadata,
89
+ fields_meta=fields_meta,
90
+ field_delimiter=field_delimiter,
91
+ )
devo/_report.py ADDED
@@ -0,0 +1,88 @@
1
+ """Plain-text validation report writer.
2
+
3
+ Produces a human-readable .txt file covering three checks:
4
+ 1. Metadata completeness
5
+ 2. Type consistency (declared vs re-inferred)
6
+ 3. Frictionless data validation
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+
15
+ def write_report(
16
+ path: Path,
17
+ icsv_name: str,
18
+ metadata_issues: list[str],
19
+ type_issues: list[tuple[str, str, str, bool]],
20
+ frictionless_report: Any,
21
+ is_valid: bool,
22
+ ) -> None:
23
+ """
24
+ Write a plain-text DEVO validation report to `path`.
25
+
26
+ type_issues: list of (column_name, declared_type, inferred_type, is_ok).
27
+ is_ok=True means inferred is a subtype of (or equal to) declared.
28
+ frictionless_report: the object returned by frictionless Resource.validate().
29
+ """
30
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
31
+ _SEP = "-" * 40
32
+
33
+ with open(path, "w", encoding="utf-8") as fh:
34
+
35
+ fh.write("DEVO Validation Report\n")
36
+ fh.write("=" * 22 + "\n")
37
+ fh.write(f"File: {icsv_name}\n")
38
+ fh.write(f"Date: {now}\n")
39
+ fh.write(f"Valid: {'YES' if is_valid else 'NO'}\n\n")
40
+
41
+ # --- Metadata ---
42
+ fh.write("METADATA\n")
43
+ fh.write(_SEP + "\n")
44
+ if metadata_issues:
45
+ for issue in metadata_issues:
46
+ fh.write(f"{issue}\n")
47
+ else:
48
+ fh.write("[OK] All required metadata present.\n")
49
+ fh.write("\n")
50
+
51
+ # --- Type consistency (Option A cross-check) ---
52
+ fh.write("TYPE CONSISTENCY\n")
53
+ fh.write(_SEP + "\n")
54
+ if not type_issues:
55
+ fh.write("[OK] No declared types to cross-check.\n")
56
+ else:
57
+ for col, declared, inferred, ok in type_issues:
58
+ if ok:
59
+ fh.write(f"[OK] {col}: declared={declared}, inferred={inferred}\n")
60
+ else:
61
+ fh.write(f"[WARN] {col}: declared={declared}, inferred={inferred}\n")
62
+ fh.write(
63
+ f" Inferred type is wider than declared. "
64
+ f"Data may not satisfy '{declared}' constraints.\n"
65
+ )
66
+ fh.write("\n")
67
+
68
+ # --- Frictionless data validation ---
69
+ fh.write("DATA VALIDATION\n")
70
+ fh.write(_SEP + "\n")
71
+ try:
72
+ errors = frictionless_report.flatten(
73
+ ["rowNumber", "fieldNumber", "fieldName", "code", "message"]
74
+ )
75
+ except (AttributeError, TypeError):
76
+ errors = []
77
+ fh.write("[WARN] Could not extract error details from frictionless report.\n")
78
+
79
+ if not errors:
80
+ fh.write("[PASS] No data errors found.\n")
81
+ else:
82
+ shown = errors[:50]
83
+ suffix = f" (showing first 50 of {len(errors)})" if len(errors) > 50 else ""
84
+ fh.write(f"[FAIL] {len(errors)} error(s) found{suffix}:\n")
85
+ for row, col_num, col_name, code, message in shown:
86
+ row_str = str(row) if row is not None else "?"
87
+ col_str = col_name or (str(col_num) if col_num is not None else "?")
88
+ fh.write(f" Row {row_str}, Col {col_str} [{code}]: {message}\n")
devo/_schema.py ADDED
@@ -0,0 +1,111 @@
1
+ """Frictionless schema builder and per-column statistics.
2
+
3
+ Separates DEVO-specific stats (min, max, missing_count — written to the iCSV FIELDS section)
4
+ from the Frictionless schema JSON (which must only contain standard Frictionless keys).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from typing import Any, Optional
9
+
10
+ from ._infer import STRPTIME_FORMATS, COMMON_MISSING
11
+
12
+
13
+ def _numeric_minmax(
14
+ pruned: list[str], as_type: str
15
+ ) -> tuple[Optional[float | int], Optional[float | int]]:
16
+ """Compute min/max for integer or number columns. Returns (None, None) on failure."""
17
+ if not pruned:
18
+ return None, None
19
+ try:
20
+ nums = [int(x) if as_type == "integer" else float(x) for x in pruned]
21
+ return min(nums), max(nums)
22
+ except (ValueError, TypeError):
23
+ return None, None
24
+
25
+
26
+ def _datetime_minmax(pruned: list[str]) -> tuple[Optional[str], Optional[str]]:
27
+ """
28
+ Compute min/max for datetime columns.
29
+ Returns ISO-format strings or (None, None) if nothing can be parsed.
30
+ Uses the same format list as _infer.py to stay consistent.
31
+ """
32
+ from datetime import datetime
33
+
34
+ parsed = []
35
+ for v in pruned:
36
+ try:
37
+ parsed.append(datetime.fromisoformat(v))
38
+ continue
39
+ except (ValueError, TypeError):
40
+ pass
41
+ for fmt in STRPTIME_FORMATS:
42
+ try:
43
+ parsed.append(datetime.strptime(v, fmt))
44
+ break
45
+ except (ValueError, TypeError):
46
+ continue
47
+ if not parsed:
48
+ return None, None
49
+ return min(parsed).isoformat(), max(parsed).isoformat()
50
+
51
+
52
+ def compute_col_stats(
53
+ vals: list[str],
54
+ inferred_type: str,
55
+ missing: frozenset[str] = COMMON_MISSING,
56
+ ) -> dict[str, Any]:
57
+ """
58
+ Compute per-column statistics for the iCSV [FIELDS] section.
59
+ These values go into # min =, # max =, # missing_count =.
60
+ They do NOT appear in the Frictionless schema JSON.
61
+ """
62
+ pruned = [v for v in vals if v not in missing and v.strip() != ""]
63
+ missing_count = len(vals) - len(pruned)
64
+ stats: dict[str, Any] = {
65
+ "type": inferred_type,
66
+ "min": None,
67
+ "max": None,
68
+ "missing_count": missing_count,
69
+ # required only if no missing values were observed in the current data
70
+ "required": missing_count == 0 and len(vals) > 0,
71
+ }
72
+ if inferred_type in ("integer", "number") and pruned:
73
+ stats["min"], stats["max"] = _numeric_minmax(pruned, inferred_type)
74
+ elif inferred_type == "datetime" and pruned:
75
+ stats["min"], stats["max"] = _datetime_minmax(pruned)
76
+ return stats
77
+
78
+
79
+ def build_frictionless_schema(
80
+ header: list[str],
81
+ col_stats: list[dict[str, Any]],
82
+ missing: frozenset[str] = COMMON_MISSING,
83
+ ) -> dict[str, Any]:
84
+ """
85
+ Build a clean Frictionless Table Schema dict.
86
+ Only standard Frictionless keys are written here.
87
+ DEVO-specific stats (min, max, missing_count) live in the iCSV FIELDS section only.
88
+ """
89
+ fields = []
90
+ for name, stats in zip(header, col_stats):
91
+ field: dict[str, Any] = {"name": name, "type": stats["type"]}
92
+ # frictionless datetime/default rejects partial datetime strings (e.g. date-only).
93
+ # format=any tells frictionless to accept any parseable datetime representation,
94
+ # consistent with DEVO's own broad datetime detection.
95
+ if stats["type"] == "datetime":
96
+ field["format"] = "any"
97
+ constraints: dict[str, Any] = {}
98
+ if stats["min"] is not None:
99
+ constraints["minimum"] = stats["min"]
100
+ if stats["max"] is not None:
101
+ constraints["maximum"] = stats["max"]
102
+ if stats.get("required"):
103
+ constraints["required"] = True
104
+ if constraints:
105
+ field["constraints"] = constraints
106
+ fields.append(field)
107
+
108
+ return {
109
+ "fields": fields,
110
+ "missingValues": sorted(missing),
111
+ }
devo/cli.py ADDED
@@ -0,0 +1,104 @@
1
+ """Command-line front-end for DEVO.
2
+
3
+ Three subcommands:
4
+ enrich — CSV → iCSV + schema
5
+ validate — iCSV + schema → report
6
+ run — enrich then validate (or just validate if input is already .icsv)
7
+
8
+ Exit codes: 0 = success, 1 = validation failed (data errors), 2 = usage/runtime error.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+
15
+ from .enrich import ICSVEnricher
16
+ from .exceptions import DEVOError
17
+ from .validate import validate_icsv
18
+
19
+
20
+ def build_parser() -> argparse.ArgumentParser:
21
+ p = argparse.ArgumentParser(prog="devo", description="DEVO — CSV to iCSV enrichment and validation")
22
+ sub = p.add_subparsers(dest="cmd", required=True)
23
+
24
+ p_enrich = sub.add_parser("enrich", help="Convert a CSV to iCSV + Frictionless schema")
25
+ p_enrich.add_argument("infile", help="Input CSV file")
26
+ p_enrich.add_argument("--out", default="DEVO_output", metavar="DIR", help="Output directory")
27
+ p_enrich.add_argument("--delimiter", metavar="CHAR", help="Force input delimiter")
28
+ p_enrich.add_argument("--nodata", metavar="VALUE", help="Force nodata sentinel")
29
+ p_enrich.add_argument("--app", metavar="PROFILE", help="iCSV application profile")
30
+
31
+ p_val = sub.add_parser("validate", help="Validate an iCSV against its schema")
32
+ p_val.add_argument("infile", help="Input .icsv file")
33
+ p_val.add_argument("--schema", metavar="PATH", help="Schema JSON path (default: auto-discover)")
34
+ p_val.add_argument("--out", default="DEVO_output", metavar="DIR", help="Output directory")
35
+
36
+ p_run = sub.add_parser(
37
+ "run",
38
+ help="Enrich then validate. If input is already .icsv, skips enrichment.",
39
+ )
40
+ p_run.add_argument("infile", help="Input CSV or iCSV file")
41
+ p_run.add_argument("--out", default="DEVO_output", metavar="DIR", help="Output directory")
42
+ p_run.add_argument("--delimiter", metavar="CHAR", help="Force input delimiter (CSV only)")
43
+ p_run.add_argument("--nodata", metavar="VALUE", help="Force nodata sentinel (CSV only)")
44
+ p_run.add_argument("--app", metavar="PROFILE", help="iCSV application profile (CSV only)")
45
+
46
+ return p
47
+
48
+
49
+ def main(argv=None) -> None:
50
+ p = build_parser()
51
+ args = p.parse_args(argv)
52
+
53
+ try:
54
+ if args.cmd == "enrich":
55
+ enr = ICSVEnricher()
56
+ icsv, schema = enr.make_icsv(
57
+ args.infile, args.out,
58
+ user_delimiter=args.delimiter,
59
+ nodata_override=args.nodata,
60
+ application_profile=args.app,
61
+ )
62
+ print(f"[OK] {icsv}")
63
+ print(f"[OK] {schema}")
64
+
65
+ elif args.cmd == "validate":
66
+ report, valid = validate_icsv(
67
+ args.infile, schema_path=args.schema, outdir=args.out
68
+ )
69
+ print(f"[{'OK' if valid else 'FAIL'}] Report: {report}")
70
+ if not valid:
71
+ sys.exit(1)
72
+
73
+ elif args.cmd == "run":
74
+ from pathlib import Path
75
+ from ._parser import is_icsv
76
+
77
+ inpath = Path(args.infile)
78
+ if is_icsv(inpath):
79
+ # Already enriched — skip enrichment, use sibling schema if it exists
80
+ icsv = str(inpath)
81
+ schema = str(inpath.with_name(inpath.stem + "_schema.json"))
82
+ print(f"[OK] Input is already an iCSV — skipping enrichment.")
83
+ else:
84
+ enr = ICSVEnricher()
85
+ icsv, schema = enr.make_icsv(
86
+ args.infile, args.out,
87
+ user_delimiter=args.delimiter,
88
+ nodata_override=args.nodata,
89
+ application_profile=args.app,
90
+ )
91
+ print(f"[OK] Enriched: {icsv}")
92
+
93
+ report, valid = validate_icsv(icsv, schema_path=schema, outdir=args.out)
94
+ print(f"[{'OK' if valid else 'FAIL'}] Report: {report}")
95
+ if not valid:
96
+ sys.exit(1)
97
+
98
+ except (DEVOError, FileNotFoundError) as e:
99
+ print(f"[ERROR] {e}", file=sys.stderr)
100
+ sys.exit(2)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
devo/enrich.py ADDED
@@ -0,0 +1,234 @@
1
+ """CSV → iCSV enrichment.
2
+
3
+ Public API: ICSVEnricher().make_icsv(infile, outdir, ...)
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import csv
8
+ import json
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ from ._infer import COMMON_MISSING, VALID_ICSV_DELIMITERS, infer_type
14
+ from ._parser import is_icsv
15
+ from ._schema import build_frictionless_schema, compute_col_stats
16
+ from .exceptions import EnrichError
17
+
18
+
19
+ def _detect_delimiter(sample: str) -> str:
20
+ """Sniff a delimiter from a text sample; fall back to comma on failure."""
21
+ try:
22
+ dialect = csv.Sniffer().sniff(sample, delimiters=",|;:\t/")
23
+ return dialect.delimiter
24
+ except csv.Error:
25
+ return ","
26
+
27
+
28
+ def _to_icsv_delimiter(detected: str) -> str:
29
+ """
30
+ Map the detected input delimiter to a valid iCSV output delimiter.
31
+ - Comma → pipe: avoids ambiguity with the ',' separator inside metadata lines.
32
+ - Tab → pipe: tab is not in the iCSV spec's allowed set [,|/:;].
33
+ - Anything else not in the spec → pipe as a safe default.
34
+ """
35
+ if detected not in VALID_ICSV_DELIMITERS:
36
+ return "|"
37
+ return "|" if detected == "," else detected
38
+
39
+
40
+ def _detect_geometry(header: list[str]) -> tuple[Optional[str], Optional[str]]:
41
+ """
42
+ Heuristic spatial-column detection.
43
+ Returns (geometry_value, srid_value) or (None, None) when no spatial columns are found.
44
+ Per Q1: geometry and srid are written only when spatial columns are detected.
45
+ """
46
+ lower = [h.lower() for h in header]
47
+ if "geometry" in lower:
48
+ idx = lower.index("geometry")
49
+ return f"column:{header[idx]}", None
50
+ lat_idx = lon_idx = None
51
+ for i, h in enumerate(lower):
52
+ if h in ("lat", "latitude"):
53
+ lat_idx = i
54
+ if h in ("lon", "lng", "longitude"):
55
+ lon_idx = i
56
+ if lat_idx is not None and lon_idx is not None:
57
+ return f"column:{header[lat_idx]},{header[lon_idx]}", "EPSG:4326"
58
+ return None, None
59
+
60
+
61
+ class ICSVEnricher:
62
+ def __init__(self, nodata_candidates: Optional[set[str]] = None):
63
+ self.missing: frozenset[str] = (
64
+ frozenset(nodata_candidates) if nodata_candidates else COMMON_MISSING
65
+ )
66
+
67
+ def _load_rows(
68
+ self,
69
+ path: Path,
70
+ user_delimiter: Optional[str],
71
+ ) -> tuple[list[str], list[list[str]], str]:
72
+ """
73
+ Read a CSV in a single pass: collect a 10-line sample, sniff the delimiter,
74
+ then parse all rows. Returns (header, rows, detected_delimiter).
75
+ Using utf-8-sig to transparently strip a BOM if present.
76
+ """
77
+ lines: list[str] = []
78
+ try:
79
+ with open(path, "r", encoding="utf-8-sig", errors="replace") as fh:
80
+ lines = fh.readlines()
81
+ except OSError as e:
82
+ raise EnrichError(f"Cannot read {path}: {e}") from e
83
+
84
+ if not lines:
85
+ raise EnrichError(f"{path.name}: file is empty")
86
+
87
+ sample = "".join(lines[:10])
88
+ delimiter = user_delimiter or _detect_delimiter(sample)
89
+
90
+ header: list[str] = []
91
+ rows: list[list[str]] = []
92
+ for i, line in enumerate(lines):
93
+ row = list(csv.reader([line], delimiter=delimiter))[0]
94
+ if i == 0:
95
+ header = [c.strip() for c in row]
96
+ else:
97
+ rows.append(row)
98
+
99
+ if not header or all(c == "" for c in header):
100
+ raise EnrichError(f"{path.name}: no usable header row found")
101
+
102
+ return header, rows, delimiter
103
+
104
+ def _detect_nodata(self, rows: list[list[str]]) -> str:
105
+ """Return the most common missing-value sentinel seen in the data, or ''."""
106
+ counts: dict[str, int] = {}
107
+ for row in rows:
108
+ for cell in row:
109
+ if cell in self.missing:
110
+ counts[cell] = counts.get(cell, 0) + 1
111
+ return max(counts, key=lambda k: counts[k]) if counts else ""
112
+
113
+ def make_icsv(
114
+ self,
115
+ infile: str,
116
+ outdir: str,
117
+ user_delimiter: Optional[str] = None,
118
+ nodata_override: Optional[str] = None,
119
+ application_profile: Optional[str] = None,
120
+ ) -> tuple[str, str]:
121
+ """
122
+ Convert a CSV to an iCSV + Frictionless schema JSON.
123
+ Returns (icsv_path, schema_path) as strings.
124
+ Raises EnrichError if the input is already an iCSV or cannot be read.
125
+ """
126
+ path = Path(infile)
127
+ if is_icsv(path):
128
+ raise EnrichError(
129
+ f"{path.name} is already an iCSV file. "
130
+ "Use 'devo validate' to validate it, or 'devo run' which handles both."
131
+ )
132
+
133
+ header, rows, detected_delim = self._load_rows(path, user_delimiter)
134
+ icsv_delim = _to_icsv_delimiter(detected_delim)
135
+ bad_names = [c for c in header if icsv_delim in c]
136
+ if bad_names:
137
+ raise EnrichError(
138
+ f"Column name(s) contain the iCSV delimiter '{icsv_delim}': {bad_names}. "
139
+ "Rename the columns or force a different delimiter with --delimiter."
140
+ )
141
+ nodata = nodata_override if nodata_override is not None else self._detect_nodata(rows)
142
+
143
+ # Normalise all rows to header length once, then transpose to column lists.
144
+ padded = []
145
+ for row in rows:
146
+ if len(row) < len(header):
147
+ row = row + [""] * (len(header) - len(row))
148
+ else:
149
+ row = row[: len(header)]
150
+ padded.append([c.strip() for c in row])
151
+
152
+ col_values: list[list[str]] = (
153
+ [[row[i] for row in padded] for i in range(len(header))]
154
+ if padded else [[] for _ in header]
155
+ )
156
+
157
+ types = [infer_type(col, self.missing) for col in col_values]
158
+ col_stats = [
159
+ compute_col_stats(col_values[i], types[i], self.missing)
160
+ for i in range(len(header))
161
+ ]
162
+ schema = build_frictionless_schema(header, col_stats, self.missing)
163
+
164
+ geometry, srid = _detect_geometry(header)
165
+
166
+ metadata: dict[str, str] = {
167
+ "iCSV_version": "1.0",
168
+ }
169
+ if application_profile:
170
+ metadata["application_profile"] = application_profile
171
+ metadata["field_delimiter"] = icsv_delim
172
+ metadata["rows"] = str(len(rows))
173
+ metadata["columns"] = str(len(header))
174
+ metadata["creation_date"] = (
175
+ datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
176
+ )
177
+ if nodata:
178
+ metadata["nodata"] = nodata
179
+ if geometry:
180
+ metadata["geometry"] = geometry
181
+ if srid:
182
+ metadata["srid"] = srid
183
+ metadata["generator"] = "DEVO"
184
+
185
+ def _join(vals: list) -> str:
186
+ return icsv_delim.join("" if v is None else str(v) for v in vals)
187
+
188
+ fields_lines = [
189
+ f"fields = {_join(header)}",
190
+ f"types = {_join(types)}",
191
+ f"min = {_join(s['min'] for s in col_stats)}",
192
+ f"max = {_join(s['max'] for s in col_stats)}",
193
+ f"missing_count = {_join(s['missing_count'] for s in col_stats)}",
194
+ f"description = {_join('' for _ in header)}",
195
+ ]
196
+
197
+ out = Path(outdir)
198
+ out.mkdir(parents=True, exist_ok=True)
199
+ base = path.stem
200
+ icsv_path = out / f"{base}.icsv"
201
+ schema_path = out / f"{base}_schema.json"
202
+
203
+ self._write_icsv(icsv_path, metadata, fields_lines, header, padded, icsv_delim)
204
+
205
+ with open(schema_path, "w", encoding="utf-8") as fh:
206
+ json.dump(schema, fh, indent=2, ensure_ascii=False)
207
+
208
+ return str(icsv_path), str(schema_path)
209
+
210
+ def _write_icsv(
211
+ self,
212
+ path: Path,
213
+ metadata: dict[str, str],
214
+ fields_lines: list[str],
215
+ header: list[str],
216
+ rows: list[list[str]],
217
+ field_delimiter: str,
218
+ ) -> None:
219
+ path.parent.mkdir(parents=True, exist_ok=True)
220
+ with open(path, "w", encoding="utf-8", newline="") as fh:
221
+ fh.write("# iCSV 1.0 UTF-8\n")
222
+ fh.write("# [METADATA]\n")
223
+ for k, v in metadata.items():
224
+ fh.write(f"# {k} = {v}\n")
225
+ fh.write("\n")
226
+ fh.write("# [FIELDS]\n")
227
+ for line in fields_lines:
228
+ fh.write(f"# {line}\n")
229
+ fh.write("\n")
230
+ fh.write("# [DATA]\n")
231
+ writer = csv.writer(fh, delimiter=field_delimiter)
232
+ writer.writerow(header)
233
+ for row in rows:
234
+ writer.writerow(row)
devo/exceptions.py ADDED
@@ -0,0 +1,14 @@
1
+ class DEVOError(Exception):
2
+ """Base for all DEVO errors — catch this to handle any DEVO failure."""
3
+
4
+
5
+ class EnrichError(DEVOError):
6
+ """Raised during CSV → iCSV conversion (bad input, unreadable file, etc.)."""
7
+
8
+
9
+ class ParseError(DEVOError):
10
+ """Raised when an iCSV file cannot be parsed (missing sections, malformed lines)."""
11
+
12
+
13
+ class ValidationError(DEVOError):
14
+ """Raised when validation infrastructure fails — not for data errors themselves."""
devo/validate.py ADDED
@@ -0,0 +1,219 @@
1
+ """iCSV validation.
2
+
3
+ Public API: validate_icsv(icsv_path, schema_path=None, outdir="DEVO_output")
4
+
5
+ Three-stage check:
6
+ 1. Metadata completeness (field_delimiter required; geometry/srid conditional on Q1).
7
+ 2. Type consistency: re-infer column types from data and compare to declared types
8
+ (Option A: declared type is authoritative; inferred wider than declared → [WARN]).
9
+ 3. Frictionless data validation against the schema JSON.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import csv
14
+ import json
15
+ import os
16
+ import tempfile
17
+ from pathlib import Path
18
+ from typing import Optional
19
+
20
+ from ._infer import COMMON_MISSING, infer_type, is_subtype_or_equal
21
+ from ._parser import ICSVHeader, parse_header
22
+ from ._report import write_report
23
+ from .exceptions import ParseError, ValidationError
24
+
25
+ # How many data rows (excluding header) to sample for type re-inference.
26
+ _INFER_SAMPLE = 500
27
+
28
+
29
+ def _check_metadata(header: ICSVHeader) -> list[str]:
30
+ """
31
+ Return a list of issue strings (empty = clean).
32
+ geometry/srid are only flagged when spatial column names are present (Q1).
33
+ srid is only required for lat/lon columns — WKT geometry embeds its own CRS.
34
+ """
35
+ issues = []
36
+
37
+ if "field_delimiter" not in header.metadata:
38
+ issues.append("[FAIL] Missing required metadata key: field_delimiter")
39
+
40
+ fields = header.fields_meta.get("fields", [])
41
+ lat_lon_names = {"lat", "latitude", "lon", "lng", "longitude"}
42
+ wkt_names = {"geometry"}
43
+ has_lat_lon = any(f.lower() in lat_lon_names for f in fields)
44
+ has_wkt = any(f.lower() in wkt_names for f in fields)
45
+
46
+ if has_lat_lon or has_wkt:
47
+ if "geometry" not in header.metadata:
48
+ issues.append(
49
+ "[WARN] Spatial columns detected but 'geometry' metadata key is missing"
50
+ )
51
+ if has_lat_lon:
52
+ if "srid" not in header.metadata:
53
+ issues.append(
54
+ "[WARN] Spatial columns detected but 'srid' metadata key is missing"
55
+ )
56
+
57
+ return issues
58
+
59
+
60
+ def _extract_data(
61
+ icsv_path: Path,
62
+ tmp_csv: Path,
63
+ field_delimiter: str,
64
+ ) -> list[list[str]]:
65
+ """
66
+ Write the [DATA] section of the iCSV to a comma-delimited temp CSV.
67
+ Returns the first _INFER_SAMPLE data rows (excluding the header row) for type inference.
68
+
69
+ Writing with comma delimiter avoids fighting the Frictionless dialect API across v4/v5;
70
+ csv.writer quotes any values that contain a comma, so round-tripping is lossless.
71
+ """
72
+ sampled: list[list[str]] = []
73
+ in_data = False
74
+ header_done = False # first DATA row is the column header, not a data row
75
+
76
+ with open(icsv_path, "r", encoding="utf-8-sig") as src, \
77
+ open(tmp_csv, "w", encoding="utf-8", newline="") as tgt:
78
+ writer = csv.writer(tgt)
79
+ for line in src:
80
+ if line.strip() == "# [DATA]":
81
+ in_data = True
82
+ continue
83
+ if not in_data:
84
+ continue
85
+ stripped = line.strip()
86
+ if not stripped or stripped.startswith("#"):
87
+ continue
88
+ row = list(csv.reader([line.rstrip("\r\n")], delimiter=field_delimiter))[0]
89
+ writer.writerow(row)
90
+ if not header_done:
91
+ header_done = True
92
+ elif len(sampled) < _INFER_SAMPLE:
93
+ sampled.append(row)
94
+
95
+ return sampled
96
+
97
+
98
+ def _cross_check_types(
99
+ declared_types: list[str],
100
+ data_rows: list[list[str]],
101
+ field_names: list[str],
102
+ missing: frozenset[str] = COMMON_MISSING,
103
+ ) -> list[tuple[str, str, str, bool]]:
104
+ """
105
+ Re-infer column types from data_rows and compare to declared types.
106
+ Returns list of (col_name, declared, inferred, is_ok).
107
+ is_ok=True means inferred is a subtype of or equal to declared (Option A).
108
+ Pass the iCSV's own nodata sentinel merged into missing so custom sentinels
109
+ are not treated as real data values during re-inference.
110
+ """
111
+ if not declared_types or not data_rows:
112
+ return []
113
+
114
+ n = len(declared_types)
115
+ col_values: list[list[str]] = [[] for _ in range(n)]
116
+ for row in data_rows:
117
+ for i in range(min(len(row), n)):
118
+ col_values[i].append(row[i])
119
+
120
+ results = []
121
+ for i, declared in enumerate(declared_types):
122
+ name = field_names[i] if i < len(field_names) else str(i)
123
+ inferred = infer_type(col_values[i], missing)
124
+ results.append((name, declared, inferred, is_subtype_or_equal(inferred, declared)))
125
+ return results
126
+
127
+
128
+ def _import_frictionless_schema():
129
+ """Lazy import for frictionless.Schema — avoids module-level import of an optional dep."""
130
+ try:
131
+ from frictionless import Schema
132
+ return Schema
133
+ except ImportError as exc:
134
+ raise ValidationError(
135
+ "The 'frictionless' package is required. Install it: pip install frictionless"
136
+ ) from exc
137
+
138
+
139
+ def validate_icsv(
140
+ icsv_path: str,
141
+ schema_path: Optional[str] = None,
142
+ outdir: str = "DEVO_output",
143
+ ) -> tuple[str, bool]:
144
+ """
145
+ Validate an iCSV file. Returns (report_path, valid).
146
+ valid=True only when metadata is clean AND Frictionless reports no data errors.
147
+ Type-consistency [WARN] entries do not affect the valid flag.
148
+ Raises ValidationError if frictionless is not installed.
149
+ Raises FileNotFoundError if no schema can be found.
150
+ """
151
+ try:
152
+ from frictionless import Resource
153
+ except ImportError as exc:
154
+ raise ValidationError(
155
+ "The 'frictionless' package is required. Install it: pip install frictionless"
156
+ ) from exc
157
+
158
+ path = Path(icsv_path)
159
+ out = Path(outdir)
160
+ out.mkdir(parents=True, exist_ok=True)
161
+
162
+ header = parse_header(path)
163
+ metadata_issues = _check_metadata(header)
164
+ metadata_ok = not any(line.startswith("[FAIL]") for line in metadata_issues)
165
+
166
+ declared_types = header.fields_meta.get("types", [])
167
+ field_names = header.fields_meta.get("fields", [])
168
+
169
+ if not schema_path:
170
+ candidate = path.with_name(path.stem + "_schema.json")
171
+ if candidate.exists():
172
+ schema_path = str(candidate)
173
+ else:
174
+ raise FileNotFoundError(
175
+ f"No schema provided and none found alongside {path.name}. "
176
+ "Run 'devo enrich' first or pass --schema."
177
+ )
178
+
179
+ # Create a unique temp file so concurrent calls on different inputs do not collide.
180
+ fd, tmp_str = tempfile.mkstemp(suffix=".csv", dir=out)
181
+ os.close(fd)
182
+ tmp_csv = Path(tmp_str)
183
+
184
+ try:
185
+ data_rows = _extract_data(path, tmp_csv, header.field_delimiter)
186
+ nodata_val = header.metadata.get("nodata", "")
187
+ effective_missing = COMMON_MISSING | {nodata_val} if nodata_val else COMMON_MISSING
188
+ type_issues = _cross_check_types(declared_types, data_rows, field_names, effective_missing)
189
+
190
+ # frictionless v5 rejects absolute paths outside the working directory.
191
+ # Fix: pass the filename relative to its parent via basepath.
192
+ # Schema is loaded as a dict first so schema-path resolution is ours, not theirs.
193
+ schema_dict = json.loads(Path(schema_path).read_text(encoding="utf-8"))
194
+ Schema = _import_frictionless_schema()
195
+ schema_obj = Schema.from_descriptor(schema_dict)
196
+ resource = Resource(
197
+ path=tmp_csv.name,
198
+ basepath=str(tmp_csv.parent),
199
+ schema=schema_obj,
200
+ )
201
+ report = resource.validate()
202
+ data_valid = report.valid
203
+
204
+ finally:
205
+ if tmp_csv.exists():
206
+ tmp_csv.unlink()
207
+
208
+ is_valid = metadata_ok and data_valid
209
+ report_path = out / f"{path.stem}_DEVO_report.txt"
210
+ write_report(
211
+ path=report_path,
212
+ icsv_name=path.name,
213
+ metadata_issues=metadata_issues,
214
+ type_issues=type_issues,
215
+ frictionless_report=report,
216
+ is_valid=is_valid,
217
+ )
218
+
219
+ return str(report_path), is_valid
devo/webui.py ADDED
@@ -0,0 +1,46 @@
1
+ """A tiny Flask web UI which allows uploading a CSV, creating an iCSV, and running validation.
2
+
3
+ This is intentionally minimal — suitable for local testing and demonstration.
4
+ """
5
+ from flask import Flask, request, render_template_string, send_file
6
+ from pathlib import Path
7
+ from .enrich import ICSVEnricher
8
+ from .validate import validate_icsv
9
+
10
+ app = Flask(__name__)
11
+
12
+ TEMPLATE = """
13
+ <!doctype html>
14
+ <title>DEVO demo</title>
15
+ <h1>DEVO — upload CSV</h1>
16
+ <form method=post enctype=multipart/form-data>
17
+ <input type=file name=file>
18
+ <input type=submit value=Upload>
19
+ </form>
20
+ {% if message %}
21
+ <hr>
22
+ <h2>Result</h2>
23
+ <pre>{{ message }}</pre>
24
+ {% endif %}
25
+ """
26
+
27
+ @app.route("/", methods=["GET", "POST"])
28
+ def index():
29
+ message = None
30
+ if request.method == "POST":
31
+ f = request.files.get("file")
32
+ if not f:
33
+ message = "No file uploaded"
34
+ else:
35
+ outdir = Path("DEVO_output")
36
+ outdir.mkdir(exist_ok=True)
37
+ infile = outdir / f.filename
38
+ f.save(infile)
39
+ enr = ICSVEnricher()
40
+ try:
41
+ icsv, schema = enr.make_icsv(str(infile), str(outdir))
42
+ report, valid = validate_icsv(icsv, schema_path=schema, outdir=str(outdir))
43
+ message = f"iCSV: {icsv}\nSchema: {schema}\nReport: {report}\nValid: {valid}"
44
+ except Exception as e: # top-level demo catch-all; render any error to the UI rather than 500
45
+ message = f"Error: {e}"
46
+ return render_template_string(TEMPLATE, message=message)
@@ -0,0 +1,167 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-devo
3
+ Version: 0.2.0
4
+ Summary: DEVO — CSV to iCSV enrichment and Frictionless validation
5
+ License-Expression: MIT
6
+ Project-URL: Source, https://github.com/envidat/devo
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: frictionless>=4.0.0
11
+ Provides-Extra: webui
12
+ Requires-Dist: flask>=2.0.0; extra == "webui"
13
+ Dynamic: license-file
14
+
15
+ # DEVO
16
+ <img title="whip it" alt="you know you should" height="50" src="/images/DEVO_Pixels_1.webp">
17
+
18
+ **Data Enrichment and Validation Operator.** Takes a plain CSV, infers types and constraints, writes a self-documenting [iCSV](https://envidat.github.io/iCSV/) file plus a Frictionless schema, and validates the data against it.
19
+
20
+ If you give it a `.csv`, it enriches → schema → validates. If you give it an `.icsv`, it skips enrichment.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install -e .
26
+ ```
27
+
28
+ For the Flask web demo:
29
+
30
+ ```bash
31
+ pip install -e ".[webui]"
32
+ ```
33
+
34
+ Requires Python 3.9+ and `frictionless` (v4 or v5).
35
+
36
+ ## Try it out
37
+
38
+ A small sample dataset lives at `examples/sample.csv` — three columns (`timestamp`, `PSUM`, `TA`) representing hourly weather observations. Use it to take DEVO for a spin without needing your own data.
39
+
40
+ ### CLI
41
+
42
+ ```bash
43
+ # Enrich, build schema, and validate in one command
44
+ devo run examples/sample.csv
45
+
46
+ # Results land in DEVO_output/ by default:
47
+ # sample.icsv — annotated iCSV
48
+ # sample_schema.json — Frictionless Table Schema
49
+ # sample_DEVO_report.txt — human-readable validation report
50
+ ```
51
+
52
+ Run `devo run examples/sample.csv --out my_output` to write to a different directory.
53
+
54
+ ### Python
55
+
56
+ ```python
57
+ from devo.enrich import ICSVEnricher
58
+ from devo.validate import validate_icsv
59
+
60
+ icsv, schema = ICSVEnricher().make_icsv("examples/sample.csv", "DEVO_output")
61
+ report_path, valid = validate_icsv(icsv, schema_path=schema)
62
+
63
+ print(f"Valid: {valid}")
64
+ print(f"Report written to: {report_path}")
65
+ ```
66
+
67
+ ### Web demo
68
+
69
+ Install the optional Flask dependency first (if you haven't already):
70
+
71
+ ```bash
72
+ pip install -e ".[webui]"
73
+ ```
74
+
75
+ Start the local server:
76
+
77
+ ```bash
78
+ flask --app devo.webui run
79
+ ```
80
+
81
+ Then open `http://127.0.0.1:5000` in your browser. Click **Choose File**, select `examples/sample.csv`, and click **Upload**. The page will display the paths to the generated iCSV, schema, and report, along with the overall `Valid` result.
82
+
83
+ > The web UI is a local demo only — do not expose it to a network.
84
+
85
+ ---
86
+
87
+ ## CLI
88
+
89
+ ```bash
90
+ devo enrich data.csv # write data.icsv + data_schema.json
91
+ devo validate data.icsv # validate against neighbouring schema
92
+ devo run data.csv # do both in one go
93
+ ```
94
+
95
+ Common flags: `--out DIR` (default `DEVO_output/`), `--delimiter CHAR`, `--nodata VALUE`, `--app PROFILE`, `--schema PATH`.
96
+
97
+ Exit codes: `0` = success, `1` = validation failed, `2` = usage or runtime error.
98
+
99
+ ## What lands on disk
100
+
101
+ For input `data.csv`, after `devo run`:
102
+
103
+ | File | What |
104
+ |---|---|
105
+ | `DEVO_output/data.icsv` | iCSV with `# [METADATA]`, `# [FIELDS]`, `# [DATA]` |
106
+ | `DEVO_output/data_schema.json` | Frictionless Table Schema JSON |
107
+ | `DEVO_output/data_DEVO_report.txt` | Validation report (read this) |
108
+
109
+ ## Python API
110
+
111
+ ```python
112
+ from devo.enrich import ICSVEnricher
113
+ from devo.validate import validate_icsv
114
+
115
+ icsv, schema = ICSVEnricher().make_icsv("data.csv", "DEVO_output")
116
+ report_path, valid = validate_icsv(icsv, schema_path=schema)
117
+ ```
118
+
119
+ ## Files
120
+
121
+ ```
122
+ devo/
123
+ ├── cli.py # argparse front-end (enrich / validate / run)
124
+ ├── enrich.py # CSV → iCSV + schema (ICSVEnricher class)
125
+ ├── validate.py # iCSV + schema → Frictionless validation + report
126
+ ├── _infer.py # pure type-inference functions (shared by enrich + validate)
127
+ ├── _parser.py # iCSV header parser (shared by enrich + validate)
128
+ ├── _schema.py # per-column statistics + Frictionless schema builder
129
+ ├── _report.py # plain-text report writer
130
+ ├── exceptions.py # DEVOError hierarchy
131
+ └── webui.py # Flask demo (optional; requires pip install -e ".[webui]")
132
+ tests/
133
+ ├── conftest.py
134
+ ├── fixtures/ # sample CSV and iCSV files
135
+ └── test_*.py
136
+ ```
137
+
138
+ ## How it works
139
+
140
+ ### Enrichment (`devo enrich`)
141
+
142
+ 1. **Read** — the CSV is read in one pass. If no `--delimiter` is given, `csv.Sniffer` detects it from the first 10 lines.
143
+ 2. **Delimiter mapping** — comma is remapped to pipe in the iCSV output (pipe is also the default fallback for non-spec delimiters). Column names that contain the output delimiter are rejected with a clear error.
144
+ 3. **Normalisation** — every row is padded or clipped to header length and stripped of leading/trailing whitespace.
145
+ 4. **Type inference** — each column is classified: `integer → number → datetime → string`. Scientific notation (`1.5e-3`, `2E10`) is recognised as `number`. Missing-value sentinels (and any custom `--nodata` value) are excluded before inference.
146
+ 5. **Statistics** — per-column `min`, `max`, and `missing_count` are computed from the normalised data and written to the iCSV `# [FIELDS]` section. They do not appear in the Frictionless schema JSON.
147
+ 6. **Geometry detection** — if the header contains `lat`/`latitude` + `lon`/`lng`/`longitude`, DEVO writes `geometry = column:lat,lon` and `srid = EPSG:4326` to metadata. A single column named `geometry` (WKT) gets `geometry = column:geometry` only — no `srid`, because WKT embeds its own CRS.
148
+ 7. **Write** — the normalised rows are written to the iCSV `# [DATA]` section, and the Frictionless schema is written to `_schema.json`.
149
+
150
+ ### Validation (`devo validate`)
151
+
152
+ 1. **Parse header** — `_parser.py` reads the `# [METADATA]` and `# [FIELDS]` sections, using `field_delimiter` from metadata to split field values.
153
+ 2. **Metadata check** — required keys are verified. `geometry` and `srid` are only checked when spatial column names are present; `srid` is only required for lat/lon columns (not WKT).
154
+ 3. **Type cross-check (Option A)** — column types are re-inferred from up to 500 data rows and compared to the declared types. The iCSV's own `nodata` sentinel is merged with the standard missing-value set before re-inference so custom sentinels are not mistaken for real data. Inferred type narrower than or equal to declared → `[OK]`. Inferred wider → `[WARN]`.
155
+ 4. **Frictionless validation** — data is written to a temporary comma-delimited CSV and validated against the schema using `frictionless.Resource`. The temp file is always deleted in a `finally` block.
156
+ 5. **Report** — a plain-text `.txt` report is written with three sections: `METADATA`, `TYPE CONSISTENCY`, and `DATA VALIDATION`. `Valid: YES` only when metadata has no `[FAIL]` entries and Frictionless reports no data errors. Type warnings do not affect the valid flag.
157
+
158
+ ## Limitations
159
+
160
+ - Type inference is conservative: `integer → number → datetime → string`. Mixed-format columns fall back to `string`.
161
+ - Datetime detection uses `datetime.fromisoformat()` and a fixed list of common strptime formats. Unusual formats need a custom schema.
162
+ - Column descriptions are left blank in the iCSV `# [FIELDS]` section; fill them in by hand.
163
+ - The web UI (`webui.py`) is a local demo only — do not expose it to a network.
164
+
165
+ ## License
166
+
167
+ MIT. See `LICENSE`.
@@ -0,0 +1,16 @@
1
+ devo/__init__.py,sha256=bB7C_WzwA9iA3r0u17gOINgpbNFQanfii1opOtnvNkY,318
2
+ devo/_infer.py,sha256=_hUPO_4VVMmvuLYksHqbRxMb05iaW1YKNOrqSgghIVY,3719
3
+ devo/_parser.py,sha256=TzQOIeIscQIh-gGzqfyiaE_cq-9cT2dxbLqNYKNyzNY,2867
4
+ devo/_report.py,sha256=DHd58YYoM5l0SmalbVx3gKTk7ClNALP5G_34OXvkdlo,3251
5
+ devo/_schema.py,sha256=c1HDTHqbk8QHukVruAHsyHQhCoe27wrIcbqnQ45H0IQ,3904
6
+ devo/cli.py,sha256=Tb715NlmBMewejt9rDdohZKKxYYBtaw8Yiau7US1iZY,4059
7
+ devo/enrich.py,sha256=3zNa7LIeusw6jfqK1WozZJYlbawIUuSHdSVKRY4jkZc,8491
8
+ devo/exceptions.py,sha256=AtE1OEeK3CIgwWugiaWDu7jO9YpegBpWvuLu8TBH7tA,468
9
+ devo/validate.py,sha256=oN4AN8xIIys9nS1tpG7dgRPQCPHV6jgWehL93X1KgwM,7861
10
+ devo/webui.py,sha256=vemf9W-aMa0Nq69TXIfsUx349s60X5CU3dhJ7B5JqOs,1532
11
+ py_devo-0.2.0.dist-info/licenses/LICENSE,sha256=EG2ApufGa51t5fPfMM9V79YYJ3QVUDqTOR251pNEQ3s,157
12
+ py_devo-0.2.0.dist-info/METADATA,sha256=MOyDwbMwvKortJTeaBMRR8RzvCoaYfb7V4Wf4slJVLA,7343
13
+ py_devo-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ py_devo-0.2.0.dist-info/entry_points.txt,sha256=HRSFN7b01VzDUIpM9sqW-kked7vUOBkXSSAwBSFVfqQ,39
15
+ py_devo-0.2.0.dist-info/top_level.txt,sha256=_jGERvww3l96x1xDBpeKD7weHkxKWVR3UVMs7ty3Qqw,5
16
+ py_devo-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ devo = devo.cli:main
@@ -0,0 +1,6 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ ... (standard MIT text elided for brevity) ...
@@ -0,0 +1 @@
1
+ devo