pyspark-fluvius 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyspark_fluvius/__init__.py +71 -0
- pyspark_fluvius/converters/__init__.py +6 -0
- pyspark_fluvius/converters/energy_converter.py +427 -0
- pyspark_fluvius/converters/mandates_converter.py +52 -0
- pyspark_fluvius/datasources/__init__.py +6 -0
- pyspark_fluvius/datasources/energy.py +100 -0
- pyspark_fluvius/datasources/mandates.py +76 -0
- pyspark_fluvius/readers/__init__.py +6 -0
- pyspark_fluvius/readers/energy_reader.py +91 -0
- pyspark_fluvius/readers/mandates_reader.py +120 -0
- pyspark_fluvius/schemas/__init__.py +6 -0
- pyspark_fluvius/schemas/energy_schema.py +62 -0
- pyspark_fluvius/schemas/mandates_schema.py +17 -0
- pyspark_fluvius/utils/__init__.py +5 -0
- pyspark_fluvius/utils/credentials.py +62 -0
- pyspark_fluvius-0.1.0.dist-info/METADATA +203 -0
- pyspark_fluvius-0.1.0.dist-info/RECORD +19 -0
- pyspark_fluvius-0.1.0.dist-info/WHEEL +4 -0
- pyspark_fluvius-0.1.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Fluvius Mandates data source for PySpark."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from pyspark.sql.datasource import DataSource, DataSourceReader
|
|
8
|
+
from pyspark.sql.types import StructType
|
|
9
|
+
|
|
10
|
+
from ..readers.mandates_reader import FluviusMandatesReader
|
|
11
|
+
from ..schemas.mandates_schema import MANDATES_SCHEMA
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FluviusMandatesDataSource(DataSource):
|
|
18
|
+
"""PySpark data source for reading Fluvius mandates.
|
|
19
|
+
|
|
20
|
+
This data source allows you to read mandates from the Fluvius Energy API
|
|
21
|
+
directly into a Spark DataFrame.
|
|
22
|
+
|
|
23
|
+
Options:
|
|
24
|
+
Credential options (if not using environment variables):
|
|
25
|
+
- subscription_key: Azure API Management subscription key
|
|
26
|
+
- client_id: Azure AD application (client) ID
|
|
27
|
+
- tenant_id: Azure AD tenant ID
|
|
28
|
+
- scope: OAuth2 scope
|
|
29
|
+
- data_access_contract_number: Data access contract number
|
|
30
|
+
- certificate_thumbprint: Certificate thumbprint (for cert auth)
|
|
31
|
+
- private_key: Private key in PEM format (for cert auth)
|
|
32
|
+
- client_secret: Client secret (for secret auth)
|
|
33
|
+
- credentials_prefix: Environment variable prefix (default: "FLUVIUS")
|
|
34
|
+
|
|
35
|
+
Environment options:
|
|
36
|
+
- environment: "sandbox" (default) or "production"
|
|
37
|
+
|
|
38
|
+
Filter options:
|
|
39
|
+
- reference_number: Filter by custom reference number
|
|
40
|
+
- ean: Filter by GSRN EAN-code
|
|
41
|
+
- data_service_types: Comma-separated list of data service types
|
|
42
|
+
- energy_type: "E" (electricity) or "G" (gas)
|
|
43
|
+
- status: Mandate status (Requested, Approved, Rejected, Finished)
|
|
44
|
+
- mandate_expiration_date: Filter by expiration date (ISO format)
|
|
45
|
+
- renewal_status: ToBeRenewed, RenewalRequested, or Expired
|
|
46
|
+
- last_updated_from: Start of last updated filter (ISO format)
|
|
47
|
+
- last_updated_to: End of last updated filter (ISO format)
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
```python
|
|
51
|
+
df = spark.read.format("fluvius.mandates") \\
|
|
52
|
+
.option("status", "Approved") \\
|
|
53
|
+
.option("energy_type", "E") \\
|
|
54
|
+
.load()
|
|
55
|
+
```
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def name(cls) -> str:
|
|
60
|
+
"""Return the short name of this data source."""
|
|
61
|
+
return "fluvius.mandates"
|
|
62
|
+
|
|
63
|
+
def schema(self) -> StructType:
|
|
64
|
+
"""Return the schema for mandates data."""
|
|
65
|
+
return MANDATES_SCHEMA
|
|
66
|
+
|
|
67
|
+
def reader(self, schema: StructType) -> DataSourceReader:
|
|
68
|
+
"""Return a reader for mandates data.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
schema: The schema to use (typically the default MANDATES_SCHEMA).
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
A FluviusMandatesReader instance.
|
|
75
|
+
"""
|
|
76
|
+
return FluviusMandatesReader(schema, self.options)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Energy data source reader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import TYPE_CHECKING, Iterator
|
|
7
|
+
|
|
8
|
+
from pyspark.sql.datasource import DataSourceReader, InputPartition
|
|
9
|
+
|
|
10
|
+
from fluvius_energy_api import FluviusEnergyClient
|
|
11
|
+
from fluvius_energy_api.models.enums import PeriodType
|
|
12
|
+
|
|
13
|
+
from ..converters.energy_converter import EnergyTuple, convert_energy_response
|
|
14
|
+
from ..utils.credentials import get_credentials, get_environment
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pyspark.sql.types import StructType
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FluviusEnergyReader(DataSourceReader):
|
|
21
|
+
"""Reader for Fluvius energy data."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, schema: StructType, options: dict[str, str]) -> None:
|
|
24
|
+
"""Initialize the energy reader.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
schema: The Spark schema for energy data.
|
|
28
|
+
options: Options passed to the data source.
|
|
29
|
+
"""
|
|
30
|
+
self._schema = schema
|
|
31
|
+
self._options = options
|
|
32
|
+
|
|
33
|
+
def partitions(self) -> list[InputPartition]:
|
|
34
|
+
"""Return a single partition for energy data."""
|
|
35
|
+
return [InputPartition(0)]
|
|
36
|
+
|
|
37
|
+
def read(self, partition: InputPartition) -> Iterator[EnergyTuple]:
|
|
38
|
+
"""Read energy data from the Fluvius API.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
partition: The partition to read (unused, single partition).
|
|
42
|
+
|
|
43
|
+
Yields:
|
|
44
|
+
Tuples representing energy measurement rows.
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If required options (ean, period_type) are missing.
|
|
48
|
+
"""
|
|
49
|
+
# Required options
|
|
50
|
+
ean = self._options.get("ean")
|
|
51
|
+
if not ean:
|
|
52
|
+
raise ValueError("Option 'ean' is required for fluvius.energy data source")
|
|
53
|
+
|
|
54
|
+
period_type_str = self._options.get("period_type")
|
|
55
|
+
if not period_type_str:
|
|
56
|
+
raise ValueError("Option 'period_type' is required for fluvius.energy data source")
|
|
57
|
+
|
|
58
|
+
period_type = PeriodType(period_type_str)
|
|
59
|
+
|
|
60
|
+
# Optional filters
|
|
61
|
+
reference_number = self._options.get("reference_number")
|
|
62
|
+
granularity = self._options.get("granularity")
|
|
63
|
+
complex_energy_types = self._options.get("complex_energy_types")
|
|
64
|
+
|
|
65
|
+
# Parse dates
|
|
66
|
+
from_date_str = self._options.get("from_date")
|
|
67
|
+
from_date: datetime | None = None
|
|
68
|
+
if from_date_str:
|
|
69
|
+
from_date = datetime.fromisoformat(from_date_str.replace("Z", "+00:00"))
|
|
70
|
+
|
|
71
|
+
to_date_str = self._options.get("to_date")
|
|
72
|
+
to_date: datetime | None = None
|
|
73
|
+
if to_date_str:
|
|
74
|
+
to_date = datetime.fromisoformat(to_date_str.replace("Z", "+00:00"))
|
|
75
|
+
|
|
76
|
+
credentials = get_credentials(self._options)
|
|
77
|
+
environment = get_environment(self._options)
|
|
78
|
+
|
|
79
|
+
with FluviusEnergyClient(credentials=credentials, environment=environment) as client:
|
|
80
|
+
response = client.get_energy(
|
|
81
|
+
ean=ean,
|
|
82
|
+
period_type=period_type,
|
|
83
|
+
reference_number=reference_number,
|
|
84
|
+
granularity=granularity,
|
|
85
|
+
complex_energy_types=complex_energy_types,
|
|
86
|
+
from_date=from_date,
|
|
87
|
+
to_date=to_date,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
rows = convert_energy_response(response)
|
|
91
|
+
yield from rows
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Mandates data source reader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import TYPE_CHECKING, Iterator
|
|
7
|
+
|
|
8
|
+
from pyspark.sql.datasource import DataSourceReader, InputPartition
|
|
9
|
+
|
|
10
|
+
from fluvius_energy_api import FluviusEnergyClient
|
|
11
|
+
from fluvius_energy_api.models.enums import (
|
|
12
|
+
DataServiceType,
|
|
13
|
+
EnergyType,
|
|
14
|
+
MandateRenewalStatus,
|
|
15
|
+
MandateStatus,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from ..converters.mandates_converter import MandateTuple, convert_mandate
|
|
19
|
+
from ..utils.credentials import get_credentials, get_environment
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from pyspark.sql.types import StructType
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FluviusMandatesReader(DataSourceReader):
|
|
26
|
+
"""Reader for Fluvius mandates data."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, schema: StructType, options: dict[str, str]) -> None:
|
|
29
|
+
"""Initialize the mandates reader.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
schema: The Spark schema for mandates.
|
|
33
|
+
options: Options passed to the data source.
|
|
34
|
+
"""
|
|
35
|
+
self._schema = schema
|
|
36
|
+
self._options = options
|
|
37
|
+
|
|
38
|
+
def partitions(self) -> list[InputPartition]:
|
|
39
|
+
"""Return a single partition for mandates."""
|
|
40
|
+
return [InputPartition(0)]
|
|
41
|
+
|
|
42
|
+
def read(self, partition: InputPartition) -> Iterator[MandateTuple]:
|
|
43
|
+
"""Read mandates from the Fluvius API.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
partition: The partition to read (unused, single partition).
|
|
47
|
+
|
|
48
|
+
Yields:
|
|
49
|
+
Tuples representing mandate rows.
|
|
50
|
+
"""
|
|
51
|
+
credentials = get_credentials(self._options)
|
|
52
|
+
environment = get_environment(self._options)
|
|
53
|
+
|
|
54
|
+
# Parse filter options
|
|
55
|
+
reference_number = self._options.get("reference_number")
|
|
56
|
+
ean = self._options.get("ean")
|
|
57
|
+
|
|
58
|
+
# Parse data_service_types (comma-separated string)
|
|
59
|
+
data_service_types_str = self._options.get("data_service_types")
|
|
60
|
+
data_service_types: list[DataServiceType] | None = None
|
|
61
|
+
if data_service_types_str:
|
|
62
|
+
data_service_types = [
|
|
63
|
+
DataServiceType(t.strip()) for t in data_service_types_str.split(",")
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Parse energy_type
|
|
67
|
+
energy_type_str = self._options.get("energy_type")
|
|
68
|
+
energy_type: EnergyType | None = None
|
|
69
|
+
if energy_type_str:
|
|
70
|
+
energy_type = EnergyType(energy_type_str)
|
|
71
|
+
|
|
72
|
+
# Parse status
|
|
73
|
+
status_str = self._options.get("status")
|
|
74
|
+
status: MandateStatus | None = None
|
|
75
|
+
if status_str:
|
|
76
|
+
status = MandateStatus(status_str)
|
|
77
|
+
|
|
78
|
+
# Parse mandate_expiration_date
|
|
79
|
+
mandate_expiration_date_str = self._options.get("mandate_expiration_date")
|
|
80
|
+
mandate_expiration_date: datetime | None = None
|
|
81
|
+
if mandate_expiration_date_str:
|
|
82
|
+
mandate_expiration_date = datetime.fromisoformat(
|
|
83
|
+
mandate_expiration_date_str.replace("Z", "+00:00")
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Parse renewal_status
|
|
87
|
+
renewal_status_str = self._options.get("renewal_status")
|
|
88
|
+
renewal_status: MandateRenewalStatus | None = None
|
|
89
|
+
if renewal_status_str:
|
|
90
|
+
renewal_status = MandateRenewalStatus(renewal_status_str)
|
|
91
|
+
|
|
92
|
+
# Parse last_updated_from/to
|
|
93
|
+
last_updated_from_str = self._options.get("last_updated_from")
|
|
94
|
+
last_updated_from: datetime | None = None
|
|
95
|
+
if last_updated_from_str:
|
|
96
|
+
last_updated_from = datetime.fromisoformat(
|
|
97
|
+
last_updated_from_str.replace("Z", "+00:00")
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
last_updated_to_str = self._options.get("last_updated_to")
|
|
101
|
+
last_updated_to: datetime | None = None
|
|
102
|
+
if last_updated_to_str:
|
|
103
|
+
last_updated_to = datetime.fromisoformat(last_updated_to_str.replace("Z", "+00:00"))
|
|
104
|
+
|
|
105
|
+
with FluviusEnergyClient(credentials=credentials, environment=environment) as client:
|
|
106
|
+
response = client.get_mandates(
|
|
107
|
+
reference_number=reference_number,
|
|
108
|
+
ean=ean,
|
|
109
|
+
data_service_types=data_service_types,
|
|
110
|
+
energy_type=energy_type,
|
|
111
|
+
status=status,
|
|
112
|
+
mandate_expiration_date=mandate_expiration_date,
|
|
113
|
+
renewal_status=renewal_status,
|
|
114
|
+
last_updated_from=last_updated_from,
|
|
115
|
+
last_updated_to=last_updated_to,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if response.data and response.data.mandates:
|
|
119
|
+
for mandate in response.data.mandates:
|
|
120
|
+
yield convert_mandate(mandate)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Spark schema for energy data."""
|
|
2
|
+
|
|
3
|
+
from pyspark.sql.types import (
|
|
4
|
+
DoubleType,
|
|
5
|
+
StringType,
|
|
6
|
+
StructField,
|
|
7
|
+
StructType,
|
|
8
|
+
TimestampType,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _measurement_fields(direction: str, register: str, include_gas_factor: bool = False) -> list[StructField]:
|
|
13
|
+
"""Generate measurement fields for a direction/register combination."""
|
|
14
|
+
prefix = f"{direction}_{register}"
|
|
15
|
+
fields = [
|
|
16
|
+
StructField(f"{prefix}_value", DoubleType(), nullable=True),
|
|
17
|
+
StructField(f"{prefix}_unit", StringType(), nullable=True),
|
|
18
|
+
StructField(f"{prefix}_validation_state", StringType(), nullable=True),
|
|
19
|
+
]
|
|
20
|
+
if include_gas_factor:
|
|
21
|
+
fields.append(StructField(f"{prefix}_gas_conversion_factor", StringType(), nullable=True))
|
|
22
|
+
return fields
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _direction_fields(direction: str) -> list[StructField]:
|
|
26
|
+
"""Generate all measurement fields for a direction."""
|
|
27
|
+
fields = []
|
|
28
|
+
# total includes gas_conversion_factor
|
|
29
|
+
fields.extend(_measurement_fields(direction, "total", include_gas_factor=True))
|
|
30
|
+
# Other registers don't have gas_conversion_factor
|
|
31
|
+
for register in ["day", "night", "reactive", "inductive", "capacitive"]:
|
|
32
|
+
fields.extend(_measurement_fields(direction, register))
|
|
33
|
+
return fields
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
ENERGY_SCHEMA = StructType(
|
|
37
|
+
[
|
|
38
|
+
# Headpoint identification
|
|
39
|
+
StructField("ean", StringType(), nullable=True),
|
|
40
|
+
StructField("energy_type", StringType(), nullable=True),
|
|
41
|
+
StructField("metering_type", StringType(), nullable=True),
|
|
42
|
+
# Time slice information
|
|
43
|
+
StructField("measurement_start", TimestampType(), nullable=True),
|
|
44
|
+
StructField("measurement_end", TimestampType(), nullable=True),
|
|
45
|
+
StructField("granularity", StringType(), nullable=True),
|
|
46
|
+
# Meter information (for metering-on-meter installations)
|
|
47
|
+
StructField("meter_seq_number", StringType(), nullable=True),
|
|
48
|
+
StructField("meter_id", StringType(), nullable=True),
|
|
49
|
+
# Subheadpoint information (for submetering installations)
|
|
50
|
+
StructField("subheadpoint_ean", StringType(), nullable=True),
|
|
51
|
+
StructField("subheadpoint_type", StringType(), nullable=True),
|
|
52
|
+
StructField("subheadpoint_seq_number", StringType(), nullable=True),
|
|
53
|
+
# Offtake measurements (total, day, night, reactive, inductive, capacitive)
|
|
54
|
+
*_direction_fields("offtake"),
|
|
55
|
+
# Injection measurements
|
|
56
|
+
*_direction_fields("injection"),
|
|
57
|
+
# Production measurements
|
|
58
|
+
*_direction_fields("production"),
|
|
59
|
+
# Auxiliary measurements
|
|
60
|
+
*_direction_fields("auxiliary"),
|
|
61
|
+
]
|
|
62
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Spark schema for mandates data."""
|
|
2
|
+
|
|
3
|
+
from pyspark.sql.types import StringType, StructField, StructType, TimestampType
|
|
4
|
+
|
|
5
|
+
MANDATES_SCHEMA = StructType(
|
|
6
|
+
[
|
|
7
|
+
StructField("reference_number", StringType(), nullable=True),
|
|
8
|
+
StructField("status", StringType(), nullable=True),
|
|
9
|
+
StructField("ean", StringType(), nullable=True),
|
|
10
|
+
StructField("energy_type", StringType(), nullable=True),
|
|
11
|
+
StructField("data_period_from", TimestampType(), nullable=True),
|
|
12
|
+
StructField("data_period_to", TimestampType(), nullable=True),
|
|
13
|
+
StructField("data_service_type", StringType(), nullable=True),
|
|
14
|
+
StructField("mandate_expiration_date", TimestampType(), nullable=True),
|
|
15
|
+
StructField("renewal_status", StringType(), nullable=True),
|
|
16
|
+
]
|
|
17
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Credential handling for Fluvius data sources."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from fluvius_energy_api import FluviusCredentials
|
|
6
|
+
from fluvius_energy_api.client import Environment
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_credentials(options: dict[str, str]) -> FluviusCredentials:
|
|
10
|
+
"""Get Fluvius credentials from Spark options or environment variables.
|
|
11
|
+
|
|
12
|
+
Priority:
|
|
13
|
+
1. Spark .option() parameters (if all required fields are present)
|
|
14
|
+
2. Environment variables (via FluviusCredentials.from_env())
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
options: Dictionary of options from Spark DataSource.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
FluviusCredentials instance.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
ConfigurationError: If credentials cannot be loaded.
|
|
24
|
+
"""
|
|
25
|
+
# Check if credentials are provided via options
|
|
26
|
+
subscription_key = options.get("subscription_key")
|
|
27
|
+
client_id = options.get("client_id")
|
|
28
|
+
tenant_id = options.get("tenant_id")
|
|
29
|
+
scope = options.get("scope")
|
|
30
|
+
data_access_contract_number = options.get("data_access_contract_number")
|
|
31
|
+
|
|
32
|
+
# If all required fields are present in options, use them
|
|
33
|
+
if all([subscription_key, client_id, tenant_id, scope, data_access_contract_number]):
|
|
34
|
+
return FluviusCredentials(
|
|
35
|
+
subscription_key=subscription_key, # type: ignore[arg-type]
|
|
36
|
+
client_id=client_id, # type: ignore[arg-type]
|
|
37
|
+
tenant_id=tenant_id, # type: ignore[arg-type]
|
|
38
|
+
scope=scope, # type: ignore[arg-type]
|
|
39
|
+
data_access_contract_number=data_access_contract_number, # type: ignore[arg-type]
|
|
40
|
+
certificate_thumbprint=options.get("certificate_thumbprint"),
|
|
41
|
+
private_key=options.get("private_key"),
|
|
42
|
+
client_secret=options.get("client_secret"),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Fall back to environment variables
|
|
46
|
+
prefix = options.get("credentials_prefix", "FLUVIUS")
|
|
47
|
+
return FluviusCredentials.from_env(prefix=prefix)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_environment(options: dict[str, str]) -> Environment:
|
|
51
|
+
"""Get the Fluvius API environment from options.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
options: Dictionary of options from Spark DataSource.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Environment enum value.
|
|
58
|
+
"""
|
|
59
|
+
env_str = options.get("environment", "sandbox").lower()
|
|
60
|
+
if env_str == "production":
|
|
61
|
+
return Environment.PRODUCTION
|
|
62
|
+
return Environment.SANDBOX
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyspark-fluvius
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: PySpark custom data source for Fluvius Energy API
|
|
5
|
+
Project-URL: Homepage, https://github.com/warreee/spark-fluvius
|
|
6
|
+
Project-URL: Repository, https://github.com/warreee/spark-fluvius
|
|
7
|
+
Project-URL: Issues, https://github.com/warreee/spark-fluvius/issues
|
|
8
|
+
Author: Ward Schodts
|
|
9
|
+
License-Expression: AGPL-3.0-or-later
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: api,data-source,energy,fluvius,pyspark,spark
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.13
|
|
19
|
+
Requires-Dist: fluvius-energy-api>=0.1.1
|
|
20
|
+
Requires-Dist: pyarrow>=15.0.0
|
|
21
|
+
Requires-Dist: pyspark>=4.0.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: mypy>=1.9.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# spark-fluvius
|
|
30
|
+
|
|
31
|
+
PySpark custom data sources for the [Fluvius Energy API](https://github.com/warreee/fluvius-energy-api).
|
|
32
|
+
|
|
33
|
+
Read energy measurements and mandates directly into Spark DataFrames.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install pyspark-fluvius
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from pyspark.sql import SparkSession
|
|
45
|
+
from pyspark_fluvius import register_datasources
|
|
46
|
+
|
|
47
|
+
# Create SparkSession
|
|
48
|
+
spark = SparkSession.builder.appName("MyApp").getOrCreate()
|
|
49
|
+
|
|
50
|
+
# Register Fluvius data sources
|
|
51
|
+
register_datasources()
|
|
52
|
+
|
|
53
|
+
# Read mandates
|
|
54
|
+
mandates_df = spark.read.format("fluvius.mandates") \
|
|
55
|
+
.option("status", "Approved") \
|
|
56
|
+
.load()
|
|
57
|
+
|
|
58
|
+
# Read energy data
|
|
59
|
+
energy_df = spark.read.format("fluvius.energy") \
|
|
60
|
+
.option("ean", "541234567890123456") \
|
|
61
|
+
.option("period_type", "readTime") \
|
|
62
|
+
.option("granularity", "daily") \
|
|
63
|
+
.option("from_date", "2024-01-01") \
|
|
64
|
+
.option("to_date", "2024-01-31") \
|
|
65
|
+
.load()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Authentication
|
|
69
|
+
|
|
70
|
+
Credentials can be provided via environment variables or Spark options.
|
|
71
|
+
|
|
72
|
+
### Environment Variables
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Required
|
|
76
|
+
export FLUVIUS_SUBSCRIPTION_KEY="your-subscription-key"
|
|
77
|
+
export FLUVIUS_CLIENT_ID="your-client-id"
|
|
78
|
+
export FLUVIUS_TENANT_ID="your-tenant-id"
|
|
79
|
+
export FLUVIUS_SCOPE="your-scope"
|
|
80
|
+
export FLUVIUS_DATA_ACCESS_CONTRACT_NUMBER="your-contract-number"
|
|
81
|
+
|
|
82
|
+
# For sandbox (client secret auth)
|
|
83
|
+
export FLUVIUS_CLIENT_SECRET="your-client-secret"
|
|
84
|
+
|
|
85
|
+
# For production (certificate auth)
|
|
86
|
+
export FLUVIUS_CERTIFICATE_THUMBPRINT="your-thumbprint"
|
|
87
|
+
export FLUVIUS_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----..."
|
|
88
|
+
# Or use a file path:
|
|
89
|
+
export FLUVIUS_PRIVATE_KEY_PATH="/path/to/private_key.pem"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Spark Options
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
df = spark.read.format("fluvius.mandates") \
|
|
96
|
+
.option("subscription_key", "...") \
|
|
97
|
+
.option("client_id", "...") \
|
|
98
|
+
.option("tenant_id", "...") \
|
|
99
|
+
.option("scope", "...") \
|
|
100
|
+
.option("data_access_contract_number", "...") \
|
|
101
|
+
.option("client_secret", "...") \
|
|
102
|
+
.load()
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Data Sources
|
|
106
|
+
|
|
107
|
+
### fluvius.mandates
|
|
108
|
+
|
|
109
|
+
Read mandate data from the Fluvius API.
|
|
110
|
+
|
|
111
|
+
**Options:**
|
|
112
|
+
| Option | Description |
|
|
113
|
+
|--------|-------------|
|
|
114
|
+
| `reference_number` | Filter by custom reference number |
|
|
115
|
+
| `ean` | Filter by GSRN EAN-code |
|
|
116
|
+
| `data_service_types` | Comma-separated list (e.g., "VH_dag,VH_kwartier_uur") |
|
|
117
|
+
| `energy_type` | "E" (electricity) or "G" (gas) |
|
|
118
|
+
| `status` | Requested, Approved, Rejected, or Finished |
|
|
119
|
+
| `mandate_expiration_date` | ISO format date filter |
|
|
120
|
+
| `renewal_status` | ToBeRenewed, RenewalRequested, or Expired |
|
|
121
|
+
| `last_updated_from` | ISO format datetime |
|
|
122
|
+
| `last_updated_to` | ISO format datetime |
|
|
123
|
+
| `environment` | "sandbox" (default) or "production" |
|
|
124
|
+
|
|
125
|
+
**Schema:**
|
|
126
|
+
| Column | Type |
|
|
127
|
+
|--------|------|
|
|
128
|
+
| reference_number | string |
|
|
129
|
+
| status | string |
|
|
130
|
+
| ean | string |
|
|
131
|
+
| energy_type | string |
|
|
132
|
+
| data_period_from | timestamp |
|
|
133
|
+
| data_period_to | timestamp |
|
|
134
|
+
| data_service_type | string |
|
|
135
|
+
| mandate_expiration_date | timestamp |
|
|
136
|
+
| renewal_status | string |
|
|
137
|
+
|
|
138
|
+
### fluvius.energy
|
|
139
|
+
|
|
140
|
+
Read energy measurement data from the Fluvius API.
|
|
141
|
+
|
|
142
|
+
**Required Options:**
|
|
143
|
+
| Option | Description |
|
|
144
|
+
|--------|-------------|
|
|
145
|
+
| `ean` | GSRN EAN-code (required) |
|
|
146
|
+
| `period_type` | "readTime" or "insertTime" (required) |
|
|
147
|
+
|
|
148
|
+
**Optional Options:**
|
|
149
|
+
| Option | Description |
|
|
150
|
+
|--------|-------------|
|
|
151
|
+
| `reference_number` | Custom reference number |
|
|
152
|
+
| `granularity` | e.g., "daily", "hourly_quarterhourly" |
|
|
153
|
+
| `complex_energy_types` | e.g., "active,reactive" |
|
|
154
|
+
| `from_date` | ISO format date (e.g., "2024-01-01") |
|
|
155
|
+
| `to_date` | ISO format date (e.g., "2024-01-31") |
|
|
156
|
+
| `environment` | "sandbox" (default) or "production" |
|
|
157
|
+
|
|
158
|
+
**Schema:**
|
|
159
|
+
| Column | Type | Description |
|
|
160
|
+
|--------|------|-------------|
|
|
161
|
+
| ean | string | EAN code of the installation |
|
|
162
|
+
| energy_type | string | "E" or "G" |
|
|
163
|
+
| metering_type | string | Type of metering installation |
|
|
164
|
+
| measurement_start | timestamp | Start of measurement period |
|
|
165
|
+
| measurement_end | timestamp | End of measurement period |
|
|
166
|
+
| granularity | string | daily, hourly, or quarter_hourly |
|
|
167
|
+
| meter_seq_number | string | Physical meter sequence (if applicable) |
|
|
168
|
+
| meter_id | string | Physical meter ID (if applicable) |
|
|
169
|
+
| subheadpoint_ean | string | Subheadpoint EAN (for submetering) |
|
|
170
|
+
| subheadpoint_type | string | auxiliary, offtake, or production |
|
|
171
|
+
| subheadpoint_seq_number | string | Subheadpoint sequence number |
|
|
172
|
+
| offtake_total_value | double | Offtake measurement value |
|
|
173
|
+
| offtake_total_unit | string | Measurement unit (e.g., kWh) |
|
|
174
|
+
| offtake_total_validation_state | string | READ, EST, VAL, or NVAL |
|
|
175
|
+
| offtake_total_gas_conversion_factor | string | P, D, or C (gas only) |
|
|
176
|
+
| offtake_day_value | double | Day tariff offtake |
|
|
177
|
+
| offtake_day_unit | string | Unit |
|
|
178
|
+
| offtake_day_validation_state | string | Validation state |
|
|
179
|
+
| offtake_night_value | double | Night tariff offtake |
|
|
180
|
+
| offtake_night_unit | string | Unit |
|
|
181
|
+
| offtake_night_validation_state | string | Validation state |
|
|
182
|
+
| injection_total_value | double | Injection measurement |
|
|
183
|
+
| injection_total_unit | string | Unit |
|
|
184
|
+
| injection_total_validation_state | string | Validation state |
|
|
185
|
+
| injection_day_value | double | Day tariff injection |
|
|
186
|
+
| injection_day_unit | string | Unit |
|
|
187
|
+
| injection_day_validation_state | string | Validation state |
|
|
188
|
+
| injection_night_value | double | Night tariff injection |
|
|
189
|
+
| injection_night_unit | string | Unit |
|
|
190
|
+
| injection_night_validation_state | string | Validation state |
|
|
191
|
+
| production_total_value | double | Production measurement |
|
|
192
|
+
| production_total_unit | string | Unit |
|
|
193
|
+
| production_total_validation_state | string | Validation state |
|
|
194
|
+
|
|
195
|
+
## Requirements
|
|
196
|
+
|
|
197
|
+
- Python 3.13+
|
|
198
|
+
- PySpark 4.0+
|
|
199
|
+
- fluvius-energy-api 0.1.0+
|
|
200
|
+
|
|
201
|
+
## License
|
|
202
|
+
|
|
203
|
+
AGPL-3.0-or-later
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
pyspark_fluvius/__init__.py,sha256=MQ6vxzjHIFskaGpFlNcDsX0x3_3lLkttu8cOqzex_uM,2112
|
|
2
|
+
pyspark_fluvius/converters/__init__.py,sha256=HahX04i5A3gVCJ7toRJDiFHVb-DlVkMkssFHRbRMD1E,214
|
|
3
|
+
pyspark_fluvius/converters/energy_converter.py,sha256=oh-XswN1aC-72iR-oE3wle9xWFVyIpIldeoZjDQ_7-o,13451
|
|
4
|
+
pyspark_fluvius/converters/mandates_converter.py,sha256=Qj7rmUZy_sTReUbkrIz3nZehB9p0ewD5Pn5mzHFXnl0,1460
|
|
5
|
+
pyspark_fluvius/datasources/__init__.py,sha256=vYFUCFM15ShiKSE1X5MyI4RSMioX6zcwof8ehtOQhJM,197
|
|
6
|
+
pyspark_fluvius/datasources/energy.py,sha256=LrwJbKki7SWjgPxX8Syh-nIQwi3WwFMItdPeoj-dZjk,3964
|
|
7
|
+
pyspark_fluvius/datasources/mandates.py,sha256=6E-3XgvD6mbB6U2LNoqQBkYVMs-A2NL9hcj3_IToT6Y,2775
|
|
8
|
+
pyspark_fluvius/readers/__init__.py,sha256=Po9NDoudQWWJ-Z1pvEDwWfduA3W5wtrRUf6NxXAC9sk,202
|
|
9
|
+
pyspark_fluvius/readers/energy_reader.py,sha256=frP-5AhemjOPnfqsqLrk69FPImNXGwWd-gaBiYfe9jo,3157
|
|
10
|
+
pyspark_fluvius/readers/mandates_reader.py,sha256=zGAumUCvYSKrpzZ2piu7XAQTcR0DopQ7bSETUHW6-Zs,4370
|
|
11
|
+
pyspark_fluvius/schemas/__init__.py,sha256=vaxbrJaxXeehA84cBCLKnwlzhn5tMBG9u2TZyqClNoM,192
|
|
12
|
+
pyspark_fluvius/schemas/energy_schema.py,sha256=3x8HvHFTuBDOKUOU0VMD2rlgoGs-GAg_8Ahv3cw2xCQ,2585
|
|
13
|
+
pyspark_fluvius/schemas/mandates_schema.py,sha256=S1CFf1gap-Wav9ddmE5_F8dsWzTvxTqt6RM80fZyavw,780
|
|
14
|
+
pyspark_fluvius/utils/__init__.py,sha256=1YbeuzmzSXm_bF-wz_GqbJuGx2psLrHB7zk7QchbsYI,118
|
|
15
|
+
pyspark_fluvius/utils/credentials.py,sha256=v3OrK5ncX4YCL3LShAZZ948CIKhjcWCTpmIKeQFP72E,2266
|
|
16
|
+
pyspark_fluvius-0.1.0.dist-info/METADATA,sha256=Jpp92Cv7MM4PjXV3_d0Zye-sMBiF_xwN2oeue06vBYQ,6830
|
|
17
|
+
pyspark_fluvius-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
18
|
+
pyspark_fluvius-0.1.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
19
|
+
pyspark_fluvius-0.1.0.dist-info/RECORD,,
|