interoperable-csv 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsv/__init__.py +14 -0
- icsv/acdd.py +77 -0
- icsv/application_profile.py +123 -0
- icsv/factory.py +79 -0
- icsv/header.py +174 -0
- icsv/icsv_file.py +173 -0
- icsv/utility.py +7 -0
- interoperable_csv-0.2.0.dist-info/METADATA +44 -0
- interoperable_csv-0.2.0.dist-info/RECORD +10 -0
- interoperable_csv-0.2.0.dist-info/WHEEL +4 -0
icsv/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .application_profile import append_timepoint, iCSV2DTimeseries
|
|
2
|
+
from .factory import from_smet, read
|
|
3
|
+
from .header import FieldsSection, MetaDataSection
|
|
4
|
+
from .icsv_file import iCSVFile
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"iCSVFile",
|
|
8
|
+
"read",
|
|
9
|
+
"from_smet",
|
|
10
|
+
"MetaDataSection",
|
|
11
|
+
"FieldsSection",
|
|
12
|
+
"iCSV2DTimeseries",
|
|
13
|
+
"append_timepoint",
|
|
14
|
+
]
|
icsv/acdd.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
class ACDDMetadata:
|
|
2
|
+
def __init__(self, strict: bool = False) -> None:
|
|
3
|
+
self.attributes = {
|
|
4
|
+
"title": None,
|
|
5
|
+
"summary": None,
|
|
6
|
+
"keywords": None,
|
|
7
|
+
"conventions": None,
|
|
8
|
+
"id": None,
|
|
9
|
+
"naming_authority": None,
|
|
10
|
+
"source": None,
|
|
11
|
+
"history": None,
|
|
12
|
+
"comment": None,
|
|
13
|
+
"date_created": None,
|
|
14
|
+
"creator_name": None,
|
|
15
|
+
"creator_url": None,
|
|
16
|
+
"creator_email": None,
|
|
17
|
+
"institution": None,
|
|
18
|
+
"processing_level": None,
|
|
19
|
+
"project": None,
|
|
20
|
+
"geospatial_bounds": None,
|
|
21
|
+
"geospatial_lat_min": None,
|
|
22
|
+
"geospatial_lat_max": None,
|
|
23
|
+
"geospatial_lon_min": None,
|
|
24
|
+
"geospatial_lon_max": None,
|
|
25
|
+
"geospatial_vertical_min": None,
|
|
26
|
+
"geospatial_vertical_max": None,
|
|
27
|
+
"time_coverage_start": None,
|
|
28
|
+
"time_coverage_end": None,
|
|
29
|
+
"Wigos ID": None,
|
|
30
|
+
}
|
|
31
|
+
self.unknown_attributes = {}
|
|
32
|
+
self.strict = strict
|
|
33
|
+
|
|
34
|
+
def set_attribute(self, attribute_name, value):
|
|
35
|
+
attribute_name = attribute_name.replace("acdd_", "")
|
|
36
|
+
if attribute_name in self.attributes:
|
|
37
|
+
self.attributes[attribute_name] = value
|
|
38
|
+
else:
|
|
39
|
+
if self.strict:
|
|
40
|
+
return False
|
|
41
|
+
self.unknown_attributes[attribute_name] = value
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
def get_attribute(self, attribute_name):
|
|
45
|
+
if attribute_name in self.attributes:
|
|
46
|
+
return self.attributes[attribute_name]
|
|
47
|
+
if attribute_name in self.unknown_attributes:
|
|
48
|
+
return self.unknown_attributes[attribute_name]
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def adjusted_dict(self):
|
|
53
|
+
return {
|
|
54
|
+
**{k: v for k, v in self.attributes.items() if v is not None},
|
|
55
|
+
**{k: v for k, v in self.unknown_attributes.items() if v is not None},
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
def join(self, other: "ACDDMetadata"):
|
|
59
|
+
for attr_dict in [other.attributes, other.unknown_attributes]:
|
|
60
|
+
for attribute, value in attr_dict.items():
|
|
61
|
+
self_value = self.get_attribute(attribute)
|
|
62
|
+
if value and self_value is None:
|
|
63
|
+
self.set_attribute(attribute, value)
|
|
64
|
+
|
|
65
|
+
def __eq__(self, value: object) -> bool:
|
|
66
|
+
if not isinstance(value, ACDDMetadata):
|
|
67
|
+
return False
|
|
68
|
+
for attr in ["attributes", "unknown_attributes"]:
|
|
69
|
+
self_dict = getattr(self, attr)
|
|
70
|
+
value_dict = getattr(value, attr)
|
|
71
|
+
common_keys = self_dict.keys() & value_dict.keys()
|
|
72
|
+
for key in common_keys:
|
|
73
|
+
if self_dict[key] is not None and value_dict[key] is not None:
|
|
74
|
+
if self_dict[key] != value_dict[key]:
|
|
75
|
+
return False
|
|
76
|
+
return True
|
|
77
|
+
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import xarray as xr
|
|
7
|
+
|
|
8
|
+
from .icsv_file import VERSIONS, iCSVFile
|
|
9
|
+
|
|
10
|
+
FIRSTLINES_2DTIMESERIES = [f"# iCSV {version} UTF-8 2DTIMESERIES" for version in VERSIONS]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class iCSV2DTimeseries(iCSVFile):
|
|
14
|
+
def __init__(self, filename: str = None):
|
|
15
|
+
self.dates = []
|
|
16
|
+
self.date_lines = []
|
|
17
|
+
super().__init__(filename)
|
|
18
|
+
|
|
19
|
+
def _parse_comment_line(self, line, section, line_number):
|
|
20
|
+
if line == "[METADATA]":
|
|
21
|
+
return "metadata"
|
|
22
|
+
if line == "[FIELDS]":
|
|
23
|
+
self.metadata.check_validity()
|
|
24
|
+
return "fields"
|
|
25
|
+
if line == "[DATA]":
|
|
26
|
+
return "data"
|
|
27
|
+
return self._parse_section_line(line, section, line_number)
|
|
28
|
+
|
|
29
|
+
def _parse_section_line(self, line, section, line_number):
|
|
30
|
+
if not section:
|
|
31
|
+
raise ValueError("No section specified")
|
|
32
|
+
line_vals = line.split("=")
|
|
33
|
+
if len(line_vals) != 2:
|
|
34
|
+
raise ValueError(f"Invalid {section} line: {line}")
|
|
35
|
+
if section == "metadata":
|
|
36
|
+
self.metadata.set_attribute(line_vals[0].strip(), line_vals[1].strip())
|
|
37
|
+
elif section == "fields":
|
|
38
|
+
fields_vec = [field.strip() for field in line_vals[1].split(self.metadata.get_attribute("field_delimiter"))]
|
|
39
|
+
self.fields.set_attribute(line_vals[0].strip(), fields_vec)
|
|
40
|
+
elif section == "data":
|
|
41
|
+
if "[DATE=" in line:
|
|
42
|
+
date_str = line.split("[DATE=")[1].split("]")[0].strip()
|
|
43
|
+
self.dates.append(datetime.datetime.fromisoformat(date_str))
|
|
44
|
+
self.date_lines.append(line_number)
|
|
45
|
+
else:
|
|
46
|
+
raise ValueError(f"Invalid data line: {line}")
|
|
47
|
+
return section
|
|
48
|
+
|
|
49
|
+
def load_file(self, filename: str = None):
|
|
50
|
+
self.data = {}
|
|
51
|
+
if filename:
|
|
52
|
+
self.filename = filename
|
|
53
|
+
section = ""
|
|
54
|
+
with open(self.filename, "r", encoding="utf-8") as file:
|
|
55
|
+
first_line = file.readline().rstrip()
|
|
56
|
+
if first_line not in FIRSTLINES_2DTIMESERIES:
|
|
57
|
+
raise ValueError("Not an iCSV file with the 2D timeseries application profile")
|
|
58
|
+
line_number = 1
|
|
59
|
+
for line in file:
|
|
60
|
+
line_number += 1
|
|
61
|
+
if line.startswith("#"):
|
|
62
|
+
line = line[1:].strip()
|
|
63
|
+
section = self._parse_comment_line(line.strip(), section, line_number)
|
|
64
|
+
elif section != "data":
|
|
65
|
+
raise ValueError("Data section was not specified")
|
|
66
|
+
|
|
67
|
+
for i, date in enumerate(self.dates):
|
|
68
|
+
first_data_line = self.date_lines[i]
|
|
69
|
+
last_data_line = self.date_lines[i + 1] if i + 1 < len(self.dates) else line_number + 1
|
|
70
|
+
self.data[date] = pd.read_csv(self.filename, skiprows=first_data_line, nrows=last_data_line - first_data_line - 1, header=None, sep=self.metadata.get_attribute("field_delimiter"))
|
|
71
|
+
self.data[date].columns = self.fields.fields
|
|
72
|
+
|
|
73
|
+
self.fields.check_validity(self.data[self.dates[0]].shape[1])
|
|
74
|
+
self.parse_geometry()
|
|
75
|
+
|
|
76
|
+
def to_xarray(self):
|
|
77
|
+
arrays = []
|
|
78
|
+
for date in self.dates:
|
|
79
|
+
df = self.data[date].copy()
|
|
80
|
+
if "layer_index" in df.columns:
|
|
81
|
+
df.set_index("layer_index", inplace=True)
|
|
82
|
+
arrays.append(df.to_xarray())
|
|
83
|
+
ds = xr.concat(arrays, dim="time")
|
|
84
|
+
ds = ds.assign_coords(time=self.dates)
|
|
85
|
+
ds.attrs = self.metadata.metadata
|
|
86
|
+
return ds
|
|
87
|
+
|
|
88
|
+
def setData(self, timestamp: datetime.datetime, data: pd.DataFrame, colnames: Optional[list] = None):
|
|
89
|
+
if not self.data:
|
|
90
|
+
self.data = {}
|
|
91
|
+
self.dates.append(timestamp)
|
|
92
|
+
self.data[timestamp] = data
|
|
93
|
+
|
|
94
|
+
def write(self, filename: str = None):
|
|
95
|
+
if filename:
|
|
96
|
+
self.filename = filename
|
|
97
|
+
self.metadata.check_validity()
|
|
98
|
+
if "source" not in self.metadata.metadata:
|
|
99
|
+
warnings.warn("source is a recommended metadata for the 2D timeseries application profile, but could not be found")
|
|
100
|
+
first_key = self.dates[0]
|
|
101
|
+
self.fields.check_validity(self.data[first_key].shape[1])
|
|
102
|
+
if "layer_index" not in self.fields.fields:
|
|
103
|
+
warnings.warn("layer_index is a recommended field for the 2D timeseries application profile, but could not be found")
|
|
104
|
+
with open(self.filename, "w", encoding="utf-8") as file:
|
|
105
|
+
file.write(f"{FIRSTLINES_2DTIMESERIES[-1]}\n")
|
|
106
|
+
file.write("# [METADATA]\n")
|
|
107
|
+
for key, val in self.metadata.metadata.items():
|
|
108
|
+
file.write(f"# {key} = {val}\n")
|
|
109
|
+
file.write("# [FIELDS]\n")
|
|
110
|
+
for key, val in self.fields.all_fields.items():
|
|
111
|
+
fields_string = self.metadata.get_attribute("field_delimiter").join(str(value) for value in val)
|
|
112
|
+
file.write(f"# {key} = {fields_string}\n")
|
|
113
|
+
file.write("# [DATA]\n")
|
|
114
|
+
for date in self.dates:
|
|
115
|
+
file.write(f"# [DATE={date.isoformat()}]\n")
|
|
116
|
+
self.data[date].to_csv(file, mode="a", index=False, header=False, sep=self.metadata.get_attribute("field_delimiter"))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def append_timepoint(filename: str, timestamp: datetime.datetime, data: pd.DataFrame, field_delimiter: str = ","):
|
|
120
|
+
with open(filename, "a", encoding="utf-8") as file:
|
|
121
|
+
file.write(f"# [DATE={timestamp.isoformat()}]\n")
|
|
122
|
+
data.to_csv(file, mode="a", index=False, header=False, sep=field_delimiter)
|
|
123
|
+
|
icsv/factory.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
|
|
3
|
+
from .application_profile import FIRSTLINES_2DTIMESERIES, iCSV2DTimeseries
|
|
4
|
+
from .icsv_file import FIRSTLINES, iCSVFile
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class _SmetLike(Protocol):
|
|
8
|
+
meta_data: object
|
|
9
|
+
optional_meta_data: object
|
|
10
|
+
other_meta_data: dict
|
|
11
|
+
acdd_meta_data: object
|
|
12
|
+
data: object
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def read(filename: str) -> iCSVFile:
|
|
16
|
+
firstline = open(filename, encoding="utf-8").readline().rstrip()
|
|
17
|
+
if firstline in FIRSTLINES_2DTIMESERIES:
|
|
18
|
+
return iCSV2DTimeseries(filename)
|
|
19
|
+
if firstline in FIRSTLINES:
|
|
20
|
+
return iCSVFile(filename)
|
|
21
|
+
raise ValueError("Not an iCSV file")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def from_smet(smet: _SmetLike) -> iCSVFile:
|
|
25
|
+
icsv = iCSVFile()
|
|
26
|
+
_set_fields_and_location(icsv, smet)
|
|
27
|
+
_set_metadata(icsv, smet)
|
|
28
|
+
icsv.data = smet.data
|
|
29
|
+
_check_validity_and_parse_geometry(icsv, icsv.data.shape[1])
|
|
30
|
+
return icsv
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _set_fields_and_location(icsv, smet):
|
|
34
|
+
icsv.fields.set_attribute("fields", smet.meta_data.fields)
|
|
35
|
+
loc = smet.meta_data.location
|
|
36
|
+
_set_location_attributes(icsv, loc)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _set_location_attributes(icsv, loc):
|
|
40
|
+
if not loc.epsg and not loc.is_latlon():
|
|
41
|
+
raise ValueError("EPSG code not provided")
|
|
42
|
+
elif loc.is_latlon():
|
|
43
|
+
loc.epsg = 4326
|
|
44
|
+
x = loc.longitude
|
|
45
|
+
y = loc.latitude
|
|
46
|
+
else:
|
|
47
|
+
x = loc.easting
|
|
48
|
+
y = loc.northing
|
|
49
|
+
z = loc.altitude
|
|
50
|
+
icsv.metadata.set_attribute("geometry", f"POINTZ({x} {y} {z})")
|
|
51
|
+
icsv.metadata.set_attribute("srid", f"EPSG:{loc.epsg}")
|
|
52
|
+
icsv.metadata.set_attribute("field_delimiter", ",")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _set_metadata(icsv: iCSVFile, smet: _SmetLike):
|
|
56
|
+
icsv.metadata.set_attribute("nodata", smet.meta_data.nodata)
|
|
57
|
+
icsv.metadata.set_attribute("station_id", smet.meta_data.station_id)
|
|
58
|
+
_set_meta_data_attributes(icsv, smet.optional_meta_data.adjusted_dict)
|
|
59
|
+
_set_meta_data_attributes(icsv, smet.other_meta_data)
|
|
60
|
+
for key, value in smet.acdd_meta_data.adjusted_dict.items():
|
|
61
|
+
icsv.metadata.set_attribute(key, value)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _set_meta_data_attributes(icsv: iCSVFile, meta_data):
|
|
65
|
+
for key, value in meta_data.items():
|
|
66
|
+
if value:
|
|
67
|
+
if isinstance(value, list) and len(value) == len(icsv.fields.fields):
|
|
68
|
+
icsv.fields.set_attribute(key, value)
|
|
69
|
+
elif isinstance(value, str) and len(value.split(" ")) == len(icsv.fields.fields):
|
|
70
|
+
icsv.fields.set_attribute(key, value.split(" "))
|
|
71
|
+
else:
|
|
72
|
+
icsv.metadata.set_attribute(key, value)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _check_validity_and_parse_geometry(icsv, ncols: int):
|
|
76
|
+
icsv.metadata.check_validity()
|
|
77
|
+
icsv.fields.check_validity(ncols)
|
|
78
|
+
icsv.parse_geometry()
|
|
79
|
+
|
icsv/header.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from dataclasses import dataclass as dat
|
|
2
|
+
|
|
3
|
+
from .acdd import ACDDMetadata
|
|
4
|
+
from .utility import is_number
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MetaDataSection:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self.required_attributes = {
|
|
10
|
+
"field_delimiter": None,
|
|
11
|
+
"geometry": None,
|
|
12
|
+
"srid": None,
|
|
13
|
+
}
|
|
14
|
+
self.recommended_attributes = {
|
|
15
|
+
"station_id": None,
|
|
16
|
+
"nodata": None,
|
|
17
|
+
"timezone": None,
|
|
18
|
+
"doi": None,
|
|
19
|
+
"timestamp_meaning": None,
|
|
20
|
+
}
|
|
21
|
+
self.acdd_metadata = ACDDMetadata()
|
|
22
|
+
self.other_metadata = {}
|
|
23
|
+
|
|
24
|
+
def __eq__(self, value: object) -> bool:
|
|
25
|
+
if not isinstance(value, MetaDataSection):
|
|
26
|
+
return False
|
|
27
|
+
for attr in ["required_attributes", "recommended_attributes", "other_metadata"]:
|
|
28
|
+
self_dict = getattr(self, attr)
|
|
29
|
+
value_dict = getattr(value, attr)
|
|
30
|
+
common_keys = self_dict.keys() & value_dict.keys()
|
|
31
|
+
for key in common_keys:
|
|
32
|
+
if self_dict[key] is not None and value_dict[key] is not None:
|
|
33
|
+
if self_dict[key] != value_dict[key]:
|
|
34
|
+
return False
|
|
35
|
+
return self.acdd_metadata == value.acdd_metadata
|
|
36
|
+
|
|
37
|
+
def check_validity(self):
|
|
38
|
+
for key, value in self.required_attributes.items():
|
|
39
|
+
if value is None:
|
|
40
|
+
raise ValueError(f"Required attribute {key} is missing")
|
|
41
|
+
|
|
42
|
+
def set_attribute(self, attribute_name, value):
|
|
43
|
+
if is_number(value):
|
|
44
|
+
value = float(value)
|
|
45
|
+
if attribute_name in self.required_attributes:
|
|
46
|
+
self.required_attributes[attribute_name] = value
|
|
47
|
+
elif attribute_name in self.recommended_attributes:
|
|
48
|
+
self.recommended_attributes[attribute_name] = value
|
|
49
|
+
if not self.acdd_metadata.set_attribute(attribute_name, value):
|
|
50
|
+
self.other_metadata[attribute_name] = value
|
|
51
|
+
|
|
52
|
+
def get_attribute(self, attribute_name):
|
|
53
|
+
if attribute_name in self.required_attributes:
|
|
54
|
+
return self.required_attributes[attribute_name]
|
|
55
|
+
if attribute_name in self.recommended_attributes:
|
|
56
|
+
return self.recommended_attributes[attribute_name]
|
|
57
|
+
value = self.acdd_metadata.get_attribute(attribute_name)
|
|
58
|
+
if value is not None:
|
|
59
|
+
return value
|
|
60
|
+
return self.other_metadata.get(attribute_name)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def metadata(self) -> dict:
|
|
64
|
+
return {
|
|
65
|
+
**self.required_attributes,
|
|
66
|
+
**{k: v for k, v in self.recommended_attributes.items() if v},
|
|
67
|
+
**{k: v for k, v in self.other_metadata.items() if v},
|
|
68
|
+
**self.acdd_metadata.adjusted_dict,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class FieldsSection:
|
|
73
|
+
def __init__(self):
|
|
74
|
+
self.fields = []
|
|
75
|
+
self.recommended_fields = {
|
|
76
|
+
"units_multiplier": [],
|
|
77
|
+
"units": [],
|
|
78
|
+
"long_name": [],
|
|
79
|
+
"standard_name": [],
|
|
80
|
+
}
|
|
81
|
+
self.other_fields = {}
|
|
82
|
+
|
|
83
|
+
def __eq__(self, value: object) -> bool:
|
|
84
|
+
if not isinstance(value, FieldsSection):
|
|
85
|
+
return False
|
|
86
|
+
for attr in ["fields", "recommended_fields", "other_fields"]:
|
|
87
|
+
if getattr(self, attr) != getattr(value, attr):
|
|
88
|
+
return False
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
def check_validity(self, n_cols: int):
|
|
92
|
+
if not self.fields:
|
|
93
|
+
raise ValueError("No fields provided")
|
|
94
|
+
if len(self.fields) != n_cols:
|
|
95
|
+
raise ValueError("Number of fields does not match the number of columns")
|
|
96
|
+
for key, val in self.recommended_fields.items():
|
|
97
|
+
if val and len(self.recommended_fields[key]) != n_cols:
|
|
98
|
+
raise ValueError(f"Number of {key} does not match the number of columns")
|
|
99
|
+
for key, val in self.other_fields.items():
|
|
100
|
+
if val and len(self.other_fields[key]) != n_cols:
|
|
101
|
+
raise ValueError(f"Number of {key} does not match the number of columns")
|
|
102
|
+
|
|
103
|
+
def set_attribute(self, attribute_name, value):
|
|
104
|
+
value = [float(val) if is_number(val) else val for val in value]
|
|
105
|
+
if attribute_name == "fields":
|
|
106
|
+
self.fields = value
|
|
107
|
+
elif attribute_name in self.recommended_fields:
|
|
108
|
+
self.recommended_fields[attribute_name] = value
|
|
109
|
+
else:
|
|
110
|
+
self.other_fields[attribute_name] = value
|
|
111
|
+
|
|
112
|
+
def get_attribute(self, attribute_name):
|
|
113
|
+
if attribute_name == "fields":
|
|
114
|
+
return self.fields
|
|
115
|
+
if attribute_name in self.recommended_fields:
|
|
116
|
+
return self.recommended_fields[attribute_name]
|
|
117
|
+
return self.other_fields.get(attribute_name)
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def all_fields(self):
|
|
121
|
+
return {
|
|
122
|
+
"fields": self.fields,
|
|
123
|
+
**{k: v for k, v in self.recommended_fields.items() if v},
|
|
124
|
+
**{k: v for k, v in self.other_fields.items() if v},
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def miscalleneous_fields(self):
|
|
129
|
+
return {
|
|
130
|
+
**{k: v for k, v in self.recommended_fields.items() if v},
|
|
131
|
+
**{k: v for k, v in self.other_fields.items() if v},
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dat
|
|
136
|
+
class Loc:
|
|
137
|
+
x: float = None
|
|
138
|
+
y: float = None
|
|
139
|
+
z: float = None
|
|
140
|
+
epsg: int = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class Geometry:
|
|
144
|
+
def __init__(self):
|
|
145
|
+
self.geometry = None
|
|
146
|
+
self.srid = None
|
|
147
|
+
self.column_name = None
|
|
148
|
+
self.location = Loc()
|
|
149
|
+
|
|
150
|
+
def __eq__(self, value: object) -> bool:
|
|
151
|
+
if not isinstance(value, Geometry):
|
|
152
|
+
return False
|
|
153
|
+
return self.geometry == value.geometry and self.srid == value.srid
|
|
154
|
+
|
|
155
|
+
def set_location(self):
|
|
156
|
+
if "POINTZ" in self.geometry:
|
|
157
|
+
content = self.geometry.split("(")[1].split(")")[0]
|
|
158
|
+
vals = content.split(" ")
|
|
159
|
+
if len(vals) != 3:
|
|
160
|
+
raise ValueError("Invalid POINTZ geometry")
|
|
161
|
+
self.location.x = float(vals[0])
|
|
162
|
+
self.location.y = float(vals[1])
|
|
163
|
+
self.location.z = float(vals[2])
|
|
164
|
+
elif "POINT" in self.geometry:
|
|
165
|
+
content = self.geometry.split("(")[1].split(")")[0]
|
|
166
|
+
vals = content.split(" ")
|
|
167
|
+
if len(vals) != 2:
|
|
168
|
+
raise ValueError("Invalid POINT geometry")
|
|
169
|
+
self.location.x = float(vals[0])
|
|
170
|
+
self.location.y = float(vals[1])
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError("Unsupported geometry type")
|
|
173
|
+
self.location.epsg = self.srid.split(":")[1]
|
|
174
|
+
|
icsv/icsv_file.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import xarray as xr
|
|
5
|
+
|
|
6
|
+
from .header import FieldsSection, Geometry, MetaDataSection
|
|
7
|
+
|
|
8
|
+
VERSIONS = ["1.0"]
|
|
9
|
+
FIRSTLINES = [f"# iCSV {version} UTF-8" for version in VERSIONS]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class iCSVFile:
|
|
13
|
+
def __init__(self, filename: str = None):
|
|
14
|
+
self.metadata = MetaDataSection()
|
|
15
|
+
self.fields = FieldsSection()
|
|
16
|
+
self.geometry = Geometry()
|
|
17
|
+
self.data = None
|
|
18
|
+
self.filename = filename
|
|
19
|
+
self.skip_lines = 0
|
|
20
|
+
if self.filename:
|
|
21
|
+
self.load_file()
|
|
22
|
+
|
|
23
|
+
def __eq__(self, value: object) -> bool:
|
|
24
|
+
try:
|
|
25
|
+
for attr in ["metadata", "fields", "geometry"]:
|
|
26
|
+
if getattr(self, attr) != getattr(value, attr):
|
|
27
|
+
return False
|
|
28
|
+
return True
|
|
29
|
+
except AttributeError:
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
def set_metadata(self, attribute_name, value):
|
|
33
|
+
self.metadata.set_attribute(attribute_name, value)
|
|
34
|
+
|
|
35
|
+
def set_fields(self, attribute_name, value):
|
|
36
|
+
self.fields.set_attribute(attribute_name, value)
|
|
37
|
+
|
|
38
|
+
def get_metadata(self, attribute_name):
|
|
39
|
+
return self.metadata.get_attribute(attribute_name)
|
|
40
|
+
|
|
41
|
+
def get_field(self, attribute_name):
|
|
42
|
+
return self.fields.get_attribute(attribute_name)
|
|
43
|
+
|
|
44
|
+
def _parse_comment_line(self, line, section):
|
|
45
|
+
if line == "[METADATA]":
|
|
46
|
+
return "metadata"
|
|
47
|
+
if line == "[FIELDS]":
|
|
48
|
+
self.metadata.check_validity()
|
|
49
|
+
return "fields"
|
|
50
|
+
if line == "[DATA]":
|
|
51
|
+
return "data"
|
|
52
|
+
return self._parse_section_line(line, section)
|
|
53
|
+
|
|
54
|
+
def _parse_section_line(self, line, section):
|
|
55
|
+
if not section:
|
|
56
|
+
raise ValueError("No section specified")
|
|
57
|
+
line_vals = line.split("=")
|
|
58
|
+
if len(line_vals) != 2:
|
|
59
|
+
raise ValueError(f"Invalid {section} line: {line}")
|
|
60
|
+
if section == "metadata":
|
|
61
|
+
self.metadata.set_attribute(line_vals[0].strip(), line_vals[1].strip())
|
|
62
|
+
elif section == "fields":
|
|
63
|
+
fields_vec = [
|
|
64
|
+
field.strip()
|
|
65
|
+
for field in line_vals[1].split(
|
|
66
|
+
self.metadata.get_attribute("field_delimiter")
|
|
67
|
+
)
|
|
68
|
+
]
|
|
69
|
+
self.fields.set_attribute(line_vals[0].strip(), fields_vec)
|
|
70
|
+
elif section == "data":
|
|
71
|
+
raise TypeError("Data section should not contain any comments")
|
|
72
|
+
return section
|
|
73
|
+
|
|
74
|
+
def _update_columns(self):
|
|
75
|
+
self.data.columns = self.fields.fields
|
|
76
|
+
for field in ["time", "timestamp"]:
|
|
77
|
+
if field in self.fields.fields:
|
|
78
|
+
self.data[field] = pd.to_datetime(self.data[field])
|
|
79
|
+
|
|
80
|
+
def load_file(self, filename: str = None):
|
|
81
|
+
if filename:
|
|
82
|
+
self.filename = filename
|
|
83
|
+
section = ""
|
|
84
|
+
with open(self.filename, "r", encoding="utf-8") as file:
|
|
85
|
+
first_line = file.readline().rstrip()
|
|
86
|
+
if first_line not in FIRSTLINES:
|
|
87
|
+
raise ValueError("Not an iCSV file")
|
|
88
|
+
line_number = 1
|
|
89
|
+
for line in file:
|
|
90
|
+
if line.startswith("#"):
|
|
91
|
+
line_number += 1
|
|
92
|
+
line = line[1:].strip()
|
|
93
|
+
section = self._parse_comment_line(line.strip(), section)
|
|
94
|
+
else:
|
|
95
|
+
if section != "data":
|
|
96
|
+
raise ValueError("Data section was not specified")
|
|
97
|
+
self.skip_lines = line_number
|
|
98
|
+
break
|
|
99
|
+
self.data = pd.read_csv(
|
|
100
|
+
self.filename,
|
|
101
|
+
skiprows=self.skip_lines,
|
|
102
|
+
header=None,
|
|
103
|
+
sep=self.metadata.get_attribute("field_delimiter"),
|
|
104
|
+
)
|
|
105
|
+
self.fields.check_validity(self.data.shape[1])
|
|
106
|
+
self._update_columns()
|
|
107
|
+
self.parse_geometry()
|
|
108
|
+
|
|
109
|
+
def parse_geometry(self):
|
|
110
|
+
self.geometry.geometry = self.metadata.get_attribute("geometry")
|
|
111
|
+
self.geometry.srid = self.metadata.get_attribute("srid")
|
|
112
|
+
if self.metadata.get_attribute("geometry") in self.fields.get_attribute(
|
|
113
|
+
"fields"
|
|
114
|
+
):
|
|
115
|
+
self.geometry.column_name = self.metadata.get_attribute("column_name")
|
|
116
|
+
else:
|
|
117
|
+
self.geometry.set_location()
|
|
118
|
+
|
|
119
|
+
def to_xarray(self) -> xr.Dataset:
|
|
120
|
+
arr = self.data.to_xarray()
|
|
121
|
+
arr.attrs = self.metadata.metadata
|
|
122
|
+
for i, var in enumerate(arr.data_vars):
|
|
123
|
+
for _, vec in self.fields.miscalleneous_fields.items():
|
|
124
|
+
arr[var].attrs = vec[i]
|
|
125
|
+
return arr
|
|
126
|
+
|
|
127
|
+
def setData(self, data: pd.DataFrame, colnames: Optional[list] = None):
|
|
128
|
+
self.data = data
|
|
129
|
+
if colnames:
|
|
130
|
+
if len(colnames) != self.data.shape[1]:
|
|
131
|
+
raise ValueError(
|
|
132
|
+
"Number of columns in data does not match the number of column names"
|
|
133
|
+
)
|
|
134
|
+
self.fields.set_attribute("fields", colnames)
|
|
135
|
+
else:
|
|
136
|
+
colnames = self.data.columns.to_list()
|
|
137
|
+
if colnames[0] == "0" or colnames[0] == 0:
|
|
138
|
+
raise ValueError("Column names are not provided")
|
|
139
|
+
self.fields.set_attribute("fields", colnames)
|
|
140
|
+
if "timestamp" in self.data.columns:
|
|
141
|
+
cols = self.data.columns.tolist()
|
|
142
|
+
if cols[0] != "timestamp":
|
|
143
|
+
cols.insert(0, cols.pop(cols.index("timestamp")))
|
|
144
|
+
self.data = self.data[cols]
|
|
145
|
+
self.fields.set_attribute("fields", self.data.columns)
|
|
146
|
+
|
|
147
|
+
def write(self, filename: str = None):
|
|
148
|
+
if filename:
|
|
149
|
+
self.filename = filename
|
|
150
|
+
self.metadata.check_validity()
|
|
151
|
+
self.fields.check_validity(self.data.shape[1])
|
|
152
|
+
nodata = self.metadata.get_attribute("nodata")
|
|
153
|
+
nodata = "-999.0" if nodata is None else str(nodata)
|
|
154
|
+
with open(self.filename, "w", encoding="utf-8") as file:
|
|
155
|
+
file.write(f"{FIRSTLINES[-1]}\n")
|
|
156
|
+
file.write("# [METADATA]\n")
|
|
157
|
+
for key, val in self.metadata.metadata.items():
|
|
158
|
+
file.write(f"# {key} = {val}\n")
|
|
159
|
+
file.write("# [FIELDS]\n")
|
|
160
|
+
for key, val in self.fields.all_fields.items():
|
|
161
|
+
fields_string = self.metadata.get_attribute("field_delimiter").join(
|
|
162
|
+
str(value) for value in val
|
|
163
|
+
)
|
|
164
|
+
file.write(f"# {key} = {fields_string}\n")
|
|
165
|
+
file.write("# [DATA]\n")
|
|
166
|
+
self.data.to_csv(
|
|
167
|
+
self.filename,
|
|
168
|
+
mode="a",
|
|
169
|
+
index=False,
|
|
170
|
+
header=False,
|
|
171
|
+
sep=self.metadata.get_attribute("field_delimiter"),
|
|
172
|
+
na_rep=nodata,
|
|
173
|
+
)
|
icsv/utility.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: interoperable-csv
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Read, write, and transform iCSV files.
|
|
5
|
+
License: GPL-3.0-only
|
|
6
|
+
Keywords: icsv,snow,timeseries,geospatial,climate
|
|
7
|
+
Author: Patrick Leibersperger
|
|
8
|
+
Author-email: patrick.leibersperger@slf.ch
|
|
9
|
+
Requires-Python: >=3.9,<4.0
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Dist: pandas (>=2.0,<3.0)
|
|
17
|
+
Requires-Dist: xarray (>=2024.3.0,<2025.0.0)
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# icsv
|
|
21
|
+
|
|
22
|
+
Standalone Python package for reading, writing, and transforming iCSV files.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install icsv
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quickstart
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import icsv
|
|
34
|
+
|
|
35
|
+
f = icsv.read("input.icsv")
|
|
36
|
+
print(f.fields.get_attribute("fields"))
|
|
37
|
+
f.write("copy.icsv")
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Documentation
|
|
41
|
+
|
|
42
|
+
Project documentation is built with MkDocs and deployed through the CI pages pipeline.
|
|
43
|
+
|
|
44
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
icsv/__init__.py,sha256=PdyYOdmIg3aDIs4pcBDINzWxlOiVN_ksOYQ6--oT1DM,340
|
|
2
|
+
icsv/acdd.py,sha256=eHPcX86V5XuxZAknV2fkRTaOMcdlqUNJVzfSaH42gJI,2844
|
|
3
|
+
icsv/application_profile.py,sha256=6HMXQ9iUftuqL2i8IuOp21XiVpDEe_logGGk98JqWoc,5466
|
|
4
|
+
icsv/factory.py,sha256=yOypwMjU02-0j9qK1ABdjQCW8U9-wRpfTaeUYW3Rv1Y,2619
|
|
5
|
+
icsv/header.py,sha256=ZPRpfe2GrcC3BGsDADuH49UBH1o-fsYnP4ToGLtaGRY,6285
|
|
6
|
+
icsv/icsv_file.py,sha256=PvFxJFoOFbmWL-beHq3j1L3pU075Edf9Bzmv1STe0to,6546
|
|
7
|
+
icsv/utility.py,sha256=hTRKg8NyqnX0_TaASSudIq304g3vJTZyicHyJe-vevM,109
|
|
8
|
+
interoperable_csv-0.2.0.dist-info/METADATA,sha256=wyKA_RP8skst_AURFx9Llquv9HelsnOhPf0CvRleefg,1103
|
|
9
|
+
interoperable_csv-0.2.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
10
|
+
interoperable_csv-0.2.0.dist-info/RECORD,,
|