dmsp-ssm 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dmsp_ssm-1.0.0/LICENSE +21 -0
- dmsp_ssm-1.0.0/PKG-INFO +163 -0
- dmsp_ssm-1.0.0/README.md +135 -0
- dmsp_ssm-1.0.0/pyproject.toml +53 -0
- dmsp_ssm-1.0.0/setup.cfg +4 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/__init__.py +17 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/__init__.py +0 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/assembler/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/assembler/artifact_accumulator.py +81 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/assembler/contracts.py +88 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/assembler/in_memory.py +35 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/builder/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/builder/contracts.py +18 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/builder/numpy_builder.py +145 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/builder/table_builder.py +211 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/builder/xarray_builder.py +272 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/decoder/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/decoder/decoder.py +74 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/decoder/transform.py +33 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/format/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/format/definition.py +135 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/format/definition_validator.py +29 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/format/layout.py +18 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/format/raw_field_reader.py +37 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/format/raw_trace_reader.py +61 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/collection_error_policy.py +11 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/collection_pipeline.py +89 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/collection_result_builder.py +14 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/file_parse_result.py +23 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/file_pipeline.py +160 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/missing_minutes.py +267 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/pre_parse_estimate.py +67 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/protocols.py +14 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/raw_collection_result.py +66 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/reader_parse_use_case.py +115 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/orchestration/report_aggregation.py +65 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/pipeline/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/pipeline/decoded_record.py +18 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/pipeline/field_trace.py +26 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/pipeline/field_trace_extractor.py +236 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/pipeline/raw_record.py +18 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/pipeline/record_parser.py +108 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/runtime/__init__.py +0 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/runtime/reader_runtime.py +58 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/source/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/source/data_source.py +136 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/validator/__init__.py +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/validator/contracts.py +48 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/validator/field_resolver.py +23 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/validator/policy.py +9 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/validator/validation_report_adapter.py +42 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/_internal/validator/validator.py +393 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/parse_options.py +30 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/parse_result.py +38 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm/reader.py +104 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm.egg-info/PKG-INFO +163 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm.egg-info/SOURCES.txt +59 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm.egg-info/dependency_links.txt +1 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm.egg-info/requires.txt +7 -0
- dmsp_ssm-1.0.0/src/dmsp_ssm.egg-info/top_level.txt +1 -0
dmsp_ssm-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 DMSP SSM contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dmsp_ssm-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dmsp-ssm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python library for parsing DMSP SSM binary files.
|
|
5
|
+
Author: DMSP SSM contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/scvarr/dmsp_ssm
|
|
8
|
+
Project-URL: Repository, https://github.com/scvarr/dmsp_ssm
|
|
9
|
+
Project-URL: Issues, https://github.com/scvarr/dmsp_ssm/issues
|
|
10
|
+
Keywords: dmsp,ssm,satellite,magnetometer,xarray
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: numpy>=1.26
|
|
22
|
+
Requires-Dist: xarray>=2024.1
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
25
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
26
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# dmsp-ssm
|
|
30
|
+
|
|
31
|
+
`dmsp-ssm` is a Python library for reading, validating, and converting DMSP SSM binary data files.
|
|
32
|
+
|
|
33
|
+
The package accepts single `.dat` files, gzip-compressed `.gz` files, and directories of supported files. It returns a `ParseResult` with parsed records and a validation report. The default output is an `xarray.Dataset`.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install dmsp-ssm
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
For local development:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install -e .[dev]
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from dmsp_ssm import Reader
|
|
51
|
+
|
|
52
|
+
reader = Reader()
|
|
53
|
+
result = reader.parse("path/to/file_or_directory")
|
|
54
|
+
|
|
55
|
+
dataset = result.records
|
|
56
|
+
report = result.report
|
|
57
|
+
|
|
58
|
+
print(dataset)
|
|
59
|
+
print(report.summary)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Output Profiles
|
|
63
|
+
|
|
64
|
+
Select an output profile with `ParseOptions`:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from dmsp_ssm import ParseOptions, Reader
|
|
68
|
+
|
|
69
|
+
reader = Reader()
|
|
70
|
+
result = reader.parse(
|
|
71
|
+
"path/to/data",
|
|
72
|
+
options=ParseOptions(output_profile="xarray"),
|
|
73
|
+
)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Supported profiles:
|
|
77
|
+
|
|
78
|
+
- `xarray`: returns an `xarray.Dataset` with `record` and `second` dimensions.
|
|
79
|
+
- `numpy`: returns a `dict[str, numpy.ndarray]`.
|
|
80
|
+
- `table`: returns long-format trace rows as `list[dict[str, object]]`.
|
|
81
|
+
|
|
82
|
+
## xarray Output
|
|
83
|
+
|
|
84
|
+
The default `xarray` profile contains:
|
|
85
|
+
|
|
86
|
+
- dimensions: `record`, `second`
|
|
87
|
+
- coordinates: `record_time`, `second_index`
|
|
88
|
+
- second-level variables: `time`, `bx`, `by`, `bz`, `valid`
|
|
89
|
+
- record-level variables: `flight_number`, `year`, `day_of_year`, `minute_start_sec_of_day`, `latitude_deg`, `longitude_deg`, `altitude_km`
|
|
90
|
+
|
|
91
|
+
Data variables include `units` attributes when units are defined by the internal format definition.
|
|
92
|
+
|
|
93
|
+
Missing second-level measurements are detected by the `time == -1000.0` marker. For those positions, `time`, `bx`, `by`, and `bz` are normalized to `NaN`, and `valid` is set to `False`.
|
|
94
|
+
|
|
95
|
+
## Validation Report
|
|
96
|
+
|
|
97
|
+
`ParseResult.report` contains validation status, incidents, and summary counters:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
result = Reader().parse("path/to/data")
|
|
101
|
+
|
|
102
|
+
print(result.report.status)
|
|
103
|
+
print(result.report.outcome)
|
|
104
|
+
print(result.report.summary)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
To include compact missing-minute ranges:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
result = Reader().parse(
|
|
111
|
+
"path/to/data",
|
|
112
|
+
include_missing_minute_ranges=True,
|
|
113
|
+
)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
For directory inputs, the summary can also include per-file missing-minute diagnostics in `missing_minute_ranges_by_file`.
|
|
117
|
+
|
|
118
|
+
## API Overview
|
|
119
|
+
|
|
120
|
+
### Reader
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
Reader(
|
|
124
|
+
error_policy="resync",
|
|
125
|
+
pre_parse_size_warning_threshold_bytes=256 * 1024 * 1024,
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Use `Reader.parse(path, ...)` to parse a file or directory.
|
|
130
|
+
|
|
131
|
+
### ParseOptions
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
ParseOptions(
|
|
135
|
+
recursive=True,
|
|
136
|
+
error_policy=None,
|
|
137
|
+
include_missing_minute_ranges=False,
|
|
138
|
+
output_profile="xarray",
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
`output_profile` must be one of `xarray`, `numpy`, or `table`.
|
|
143
|
+
|
|
144
|
+
### ParseResult
|
|
145
|
+
|
|
146
|
+
`ParseResult.records` contains the selected output artifact.
|
|
147
|
+
|
|
148
|
+
`ParseResult.report` contains validation diagnostics.
|
|
149
|
+
|
|
150
|
+
`ParseResult.metadata` and `ParseResult.extensions` are reserved for optional metadata.
|
|
151
|
+
|
|
152
|
+
## Supported Input
|
|
153
|
+
|
|
154
|
+
- `.dat` files
|
|
155
|
+
- `.gz` files containing DMSP SSM binary data
|
|
156
|
+
- directories containing only one supported file type
|
|
157
|
+
- recursive directory traversal when `recursive=True`
|
|
158
|
+
|
|
159
|
+
Directories containing mixed `.dat` and `.gz` files are rejected.
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
This project is licensed under the MIT License.
|
dmsp_ssm-1.0.0/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# dmsp-ssm
|
|
2
|
+
|
|
3
|
+
`dmsp-ssm` is a Python library for reading, validating, and converting DMSP SSM binary data files.
|
|
4
|
+
|
|
5
|
+
The package accepts single `.dat` files, gzip-compressed `.gz` files, and directories of supported files. It returns a `ParseResult` with parsed records and a validation report. The default output is an `xarray.Dataset`.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install dmsp-ssm
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
For local development:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install -e .[dev]
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from dmsp_ssm import Reader
|
|
23
|
+
|
|
24
|
+
reader = Reader()
|
|
25
|
+
result = reader.parse("path/to/file_or_directory")
|
|
26
|
+
|
|
27
|
+
dataset = result.records
|
|
28
|
+
report = result.report
|
|
29
|
+
|
|
30
|
+
print(dataset)
|
|
31
|
+
print(report.summary)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Output Profiles
|
|
35
|
+
|
|
36
|
+
Select an output profile with `ParseOptions`:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from dmsp_ssm import ParseOptions, Reader
|
|
40
|
+
|
|
41
|
+
reader = Reader()
|
|
42
|
+
result = reader.parse(
|
|
43
|
+
"path/to/data",
|
|
44
|
+
options=ParseOptions(output_profile="xarray"),
|
|
45
|
+
)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Supported profiles:
|
|
49
|
+
|
|
50
|
+
- `xarray`: returns an `xarray.Dataset` with `record` and `second` dimensions.
|
|
51
|
+
- `numpy`: returns a `dict[str, numpy.ndarray]`.
|
|
52
|
+
- `table`: returns long-format trace rows as `list[dict[str, object]]`.
|
|
53
|
+
|
|
54
|
+
## xarray Output
|
|
55
|
+
|
|
56
|
+
The default `xarray` profile contains:
|
|
57
|
+
|
|
58
|
+
- dimensions: `record`, `second`
|
|
59
|
+
- coordinates: `record_time`, `second_index`
|
|
60
|
+
- second-level variables: `time`, `bx`, `by`, `bz`, `valid`
|
|
61
|
+
- record-level variables: `flight_number`, `year`, `day_of_year`, `minute_start_sec_of_day`, `latitude_deg`, `longitude_deg`, `altitude_km`
|
|
62
|
+
|
|
63
|
+
Data variables include `units` attributes when units are defined by the internal format definition.
|
|
64
|
+
|
|
65
|
+
Missing second-level measurements are detected by the `time == -1000.0` marker. For those positions, `time`, `bx`, `by`, and `bz` are normalized to `NaN`, and `valid` is set to `False`.
|
|
66
|
+
|
|
67
|
+
## Validation Report
|
|
68
|
+
|
|
69
|
+
`ParseResult.report` contains validation status, incidents, and summary counters:
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
result = Reader().parse("path/to/data")
|
|
73
|
+
|
|
74
|
+
print(result.report.status)
|
|
75
|
+
print(result.report.outcome)
|
|
76
|
+
print(result.report.summary)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
To include compact missing-minute ranges:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
result = Reader().parse(
|
|
83
|
+
"path/to/data",
|
|
84
|
+
include_missing_minute_ranges=True,
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
For directory inputs, the summary can also include per-file missing-minute diagnostics in `missing_minute_ranges_by_file`.
|
|
89
|
+
|
|
90
|
+
## API Overview
|
|
91
|
+
|
|
92
|
+
### Reader
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
Reader(
|
|
96
|
+
error_policy="resync",
|
|
97
|
+
pre_parse_size_warning_threshold_bytes=256 * 1024 * 1024,
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Use `Reader.parse(path, ...)` to parse a file or directory.
|
|
102
|
+
|
|
103
|
+
### ParseOptions
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
ParseOptions(
|
|
107
|
+
recursive=True,
|
|
108
|
+
error_policy=None,
|
|
109
|
+
include_missing_minute_ranges=False,
|
|
110
|
+
output_profile="xarray",
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
`output_profile` must be one of `xarray`, `numpy`, or `table`.
|
|
115
|
+
|
|
116
|
+
### ParseResult
|
|
117
|
+
|
|
118
|
+
`ParseResult.records` contains the selected output artifact.
|
|
119
|
+
|
|
120
|
+
`ParseResult.report` contains validation diagnostics.
|
|
121
|
+
|
|
122
|
+
`ParseResult.metadata` and `ParseResult.extensions` are reserved for optional metadata.
|
|
123
|
+
|
|
124
|
+
## Supported Input
|
|
125
|
+
|
|
126
|
+
- `.dat` files
|
|
127
|
+
- `.gz` files containing DMSP SSM binary data
|
|
128
|
+
- directories containing only one supported file type
|
|
129
|
+
- recursive directory traversal when `recursive=True`
|
|
130
|
+
|
|
131
|
+
Directories containing mixed `.dat` and `.gz` files are rejected.
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dmsp-ssm"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Python library for parsing DMSP SSM binary files."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "DMSP SSM contributors"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["dmsp", "ssm", "satellite", "magnetometer", "xarray"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 5 - Production/Stable",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Scientific/Engineering",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"numpy>=1.26",
|
|
27
|
+
"xarray>=2024.1",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = [
|
|
32
|
+
"pytest>=8.0",
|
|
33
|
+
"build>=1.2",
|
|
34
|
+
"twine>=5.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/scvarr/dmsp_ssm"
|
|
39
|
+
Repository = "https://github.com/scvarr/dmsp_ssm"
|
|
40
|
+
Issues = "https://github.com/scvarr/dmsp_ssm/issues"
|
|
41
|
+
|
|
42
|
+
[tool.pytest.ini_options]
|
|
43
|
+
testpaths = ["tests"]
|
|
44
|
+
markers = [
|
|
45
|
+
"unit: fast unit and contract tests",
|
|
46
|
+
"integration: file-based integration tests",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[tool.setuptools]
|
|
50
|
+
package-dir = {"" = "src"}
|
|
51
|
+
|
|
52
|
+
[tool.setuptools.packages.find]
|
|
53
|
+
where = ["src"]
|
dmsp_ssm-1.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Публичная поверхность API библиотеки dmsp_ssm.
|
|
2
|
+
|
|
3
|
+
Поддерживаемые пользовательские точки входа:
|
|
4
|
+
- `Reader`
|
|
5
|
+
- `ParseOptions`
|
|
6
|
+
- `ParseResult`
|
|
7
|
+
|
|
8
|
+
Остальные модули и сущности, включая `dmsp_ssm._internal.*`,
|
|
9
|
+
являются деталями реализации и не входят в публичный контракт.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from dmsp_ssm.parse_options import ParseOptions
|
|
13
|
+
from dmsp_ssm.parse_result import ParseResult
|
|
14
|
+
from dmsp_ssm.reader import Reader
|
|
15
|
+
|
|
16
|
+
__all__ = ["Reader", "ParseOptions", "ParseResult"]
|
|
17
|
+
__version__ = "1.0.0"
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Внутренний пакет компоновки результата."""
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Накопление артефактов результата по выбранному профилю вывода."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from dmsp_ssm._internal.assembler.contracts import (
|
|
8
|
+
ArtifactBundle,
|
|
9
|
+
OUTPUT_PROFILE_ARTIFACT_REQUIREMENTS
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from ..builder.numpy_builder import NumpyBuilder
|
|
13
|
+
from ..builder.table_builder import TableBuilder
|
|
14
|
+
from ..builder.xarray_builder import XArrayBuilder
|
|
15
|
+
from ..decoder.decoder import Decoder
|
|
16
|
+
from ..pipeline.decoded_record import DecodedRecord
|
|
17
|
+
from ..pipeline.field_trace import FieldTrace
|
|
18
|
+
from ..pipeline.raw_record import RawRecord
|
|
19
|
+
from ..validator.contracts import ValidationResult
|
|
20
|
+
|
|
21
|
+
ReaderOutputProfile = Literal["raw", "decoded", "xarray", "numpy", "table"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def accumulate_artifact_bundle(
|
|
25
|
+
*,
|
|
26
|
+
profile: ReaderOutputProfile,
|
|
27
|
+
raw_records: list[RawRecord],
|
|
28
|
+
field_traces: list[FieldTrace] | None,
|
|
29
|
+
report: ValidationResult,
|
|
30
|
+
decoder: Decoder,
|
|
31
|
+
builder: XArrayBuilder,
|
|
32
|
+
numpy_builder: NumpyBuilder,
|
|
33
|
+
table_builder: TableBuilder | None = None,
|
|
34
|
+
) -> ArtifactBundle:
|
|
35
|
+
"""Собрать `ArtifactBundle` из артефактов, требуемых выбранным профилем."""
|
|
36
|
+
|
|
37
|
+
requirements = OUTPUT_PROFILE_ARTIFACT_REQUIREMENTS.get(profile)
|
|
38
|
+
if requirements is None:
|
|
39
|
+
raise ValueError(f"Неподдерживаемый внутренний профиль вывода: {profile}")
|
|
40
|
+
|
|
41
|
+
bundle = ArtifactBundle(
|
|
42
|
+
report=report,
|
|
43
|
+
field_traces=field_traces,
|
|
44
|
+
)
|
|
45
|
+
required_artifacts = requirements.required_artifacts
|
|
46
|
+
|
|
47
|
+
decoded_records: list[DecodedRecord] | None = None
|
|
48
|
+
|
|
49
|
+
if "raw_records" in required_artifacts:
|
|
50
|
+
bundle.raw_records = raw_records
|
|
51
|
+
|
|
52
|
+
if (
|
|
53
|
+
"decoded_records" in required_artifacts
|
|
54
|
+
or "dataset" in required_artifacts
|
|
55
|
+
or "numpy_records" in required_artifacts
|
|
56
|
+
):
|
|
57
|
+
decoded_records = [decoder.decode(record) for record in raw_records]
|
|
58
|
+
|
|
59
|
+
if "decoded_records" in required_artifacts:
|
|
60
|
+
bundle.decoded_records = decoded_records
|
|
61
|
+
|
|
62
|
+
if "dataset" in required_artifacts:
|
|
63
|
+
bundle.dataset = builder.build(decoded_records or [])
|
|
64
|
+
|
|
65
|
+
if "numpy_records" in required_artifacts:
|
|
66
|
+
bundle.numpy_records = numpy_builder.build(decoded_records or [])
|
|
67
|
+
|
|
68
|
+
if "table_records" in required_artifacts:
|
|
69
|
+
if table_builder is None:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
"Для internal profile 'table' требуется table_builder."
|
|
72
|
+
)
|
|
73
|
+
if decoded_records is None:
|
|
74
|
+
decoded_records = [decoder.decode(record) for record in raw_records]
|
|
75
|
+
bundle.decoded_records = decoded_records
|
|
76
|
+
bundle.table_records = table_builder.build(
|
|
77
|
+
field_traces=field_traces or [],
|
|
78
|
+
decoded_records=decoded_records,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return bundle
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Внутренние контрактные границы слоя финальной сборки результата.
|
|
2
|
+
|
|
3
|
+
Assembler-слой расположен после подготовки выходного артефакта и отвечает за
|
|
4
|
+
преобразование `ArtifactBundle` в фасадный `ParseResult`.
|
|
5
|
+
|
|
6
|
+
Слой не выполняет чтение, валидацию, разбор, декодирование или построение
|
|
7
|
+
выходных структур данных. Он получает уже подготовленные артефакты для
|
|
8
|
+
публичных профилей `xarray`, `numpy` и `table`.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Literal, Protocol
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import xarray as xr
|
|
18
|
+
|
|
19
|
+
from ..pipeline.decoded_record import DecodedRecord
|
|
20
|
+
from ..pipeline.field_trace import FieldTrace
|
|
21
|
+
from ..pipeline.raw_record import RawRecord
|
|
22
|
+
from dmsp_ssm.parse_result import ParseResult
|
|
23
|
+
from ..validator.contracts import ValidationResult
|
|
24
|
+
|
|
25
|
+
OutputProfile = Literal["raw", "decoded", "xarray", "numpy", "table"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(slots=True, frozen=True)
|
|
29
|
+
class ProfileArtifactRequirements:
|
|
30
|
+
"""Описание артефактов, необходимых для выбранного профиля вывода.
|
|
31
|
+
|
|
32
|
+
Используется внутренним accumulation-слоем, чтобы подготовить только те данные,
|
|
33
|
+
которые требуются для сборки результата выбранного профиля.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
required_artifacts: frozenset[str]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
OUTPUT_PROFILE_ARTIFACT_REQUIREMENTS: dict[
|
|
40
|
+
OutputProfile,
|
|
41
|
+
ProfileArtifactRequirements,
|
|
42
|
+
] = {
|
|
43
|
+
"raw": ProfileArtifactRequirements(
|
|
44
|
+
required_artifacts=frozenset({"raw_records"})
|
|
45
|
+
),
|
|
46
|
+
"decoded": ProfileArtifactRequirements(
|
|
47
|
+
required_artifacts=frozenset({"decoded_records"})
|
|
48
|
+
),
|
|
49
|
+
"xarray": ProfileArtifactRequirements(
|
|
50
|
+
required_artifacts=frozenset({"dataset"})
|
|
51
|
+
),
|
|
52
|
+
"numpy": ProfileArtifactRequirements(
|
|
53
|
+
required_artifacts=frozenset({"numpy_records"})
|
|
54
|
+
),
|
|
55
|
+
"table": ProfileArtifactRequirements(
|
|
56
|
+
required_artifacts=frozenset({"table_records"})
|
|
57
|
+
),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(slots=True)
|
|
62
|
+
class ArtifactBundle:
|
|
63
|
+
"""Внутренний контейнер артефактов между use-case и assembler-слоем.
|
|
64
|
+
|
|
65
|
+
`ArtifactBundle` переносит диагностический отчет, подготовленный результат
|
|
66
|
+
выбранного профиля и дополнительные служебные данные. Контейнер не является
|
|
67
|
+
публичным API и не задает потоковый протокол обработки.
|
|
68
|
+
|
|
69
|
+
Необязательные поля отсутствуют, если соответствующий артефакт не требуется
|
|
70
|
+
для выбранного профиля вывода.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
report: ValidationResult
|
|
74
|
+
raw_records: list[RawRecord] | None = None
|
|
75
|
+
decoded_records: list[DecodedRecord] | None = None
|
|
76
|
+
dataset: xr.Dataset | None = None
|
|
77
|
+
numpy_records: dict[str, np.ndarray] | None = None
|
|
78
|
+
table_records: list[dict[str, object]] | None = None
|
|
79
|
+
field_traces: list[FieldTrace] | None = None
|
|
80
|
+
metadata: dict[str, object] | None = None
|
|
81
|
+
extensions: dict[str, object] | None = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SupportsParseResultAssembler(Protocol):
|
|
85
|
+
"""Контракт assembler-компонента для сборки фасадного `ParseResult`."""
|
|
86
|
+
|
|
87
|
+
def assemble(self, bundle: ArtifactBundle) -> ParseResult:
|
|
88
|
+
"""Собрать фасадный результат без чтения, декодирования и построения артефактов."""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Assembler итогового `ParseResult` с результатом в памяти."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .contracts import ArtifactBundle
|
|
6
|
+
from dmsp_ssm.parse_result import ParseResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class InMemoryParseResultAssembler:
|
|
10
|
+
"""Assembler, формирующий `ParseResult` из готового `ArtifactBundle`.
|
|
11
|
+
|
|
12
|
+
Компонент не выполняет чтение, валидацию, разбор, декодирование или построение
|
|
13
|
+
выходных структур данных. Он выбирает подготовленный результат профиля и
|
|
14
|
+
помещает его в `ParseResult.records`.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def assemble(bundle: ArtifactBundle) -> ParseResult:
|
|
19
|
+
"""Собрать `ParseResult` в памяти для поддержанных артефактов результата."""
|
|
20
|
+
records = bundle.dataset
|
|
21
|
+
if records is None:
|
|
22
|
+
records = bundle.numpy_records
|
|
23
|
+
if records is None:
|
|
24
|
+
records = bundle.table_records
|
|
25
|
+
if records is None:
|
|
26
|
+
raise ValueError(
|
|
27
|
+
"Для in-memory assembler требуется dataset, numpy_records или table_records в artifact bundle."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
return ParseResult(
|
|
31
|
+
records=records,
|
|
32
|
+
report=bundle.report,
|
|
33
|
+
metadata=bundle.metadata,
|
|
34
|
+
extensions=bundle.extensions,
|
|
35
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Внутренний пакет сборки выходных профилей."""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Внутренние контракты builder-слоя."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Protocol
|
|
6
|
+
|
|
7
|
+
from ..pipeline.decoded_record import DecodedRecord
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BuilderArtifact(Protocol):
|
|
11
|
+
"""Маркерный контракт результата builder-компонента."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SupportsDecodedRecordBuilder(Protocol):
|
|
15
|
+
"""Контракт builder-компонента, работающего с декодированными записями."""
|
|
16
|
+
|
|
17
|
+
def build(self, records: list[DecodedRecord]) -> BuilderArtifact:
|
|
18
|
+
"""Построить выходной артефакт из декодированных записей."""
|