libadalina-core 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,46 @@
1
+ from libadalina_core.readers.readers import geopackage_to_dataframe
2
+ import pathlib
3
+ import pandas as pd
4
+
5
+ from libadalina_core.spatial_join.query_builder import spatial_join, JoinType, spatial_aggregation, AggregationType, \
6
+ AggregationFunction
7
+
8
+ if __name__ == "__main__":
9
+ """Example of how to use libadalina to find hospitals in specific provinces in Italy and aggregate their data."""
10
+
11
+ # Set pandas display options
12
+ pd.set_option('display.max_columns', None)
13
+ pd.set_option('display.width', None)
14
+ pd.set_option('display.max_colwidth', 100)
15
+
16
+ hospitals = geopackage_to_dataframe(
17
+ str(pathlib.Path(__file__).parent.parent.parent / "tests" / "samples" / "healthcare" / "EU_healthcare.gpkg"),
18
+ "EU"
19
+ )[["hospital_name", "geometry", "city", "cap_beds"]]
20
+
21
+ regions = geopackage_to_dataframe(
22
+ str(pathlib.Path(__file__).parent.parent.parent / "tests" / "samples" / "regions" / "NUTS_RG_20M_2024_4326.gpkg"),
23
+ "NUTS_RG_20M_2024_4326.gpkg"
24
+ )[["LEVL_CODE", "NUTS_NAME", "CNTR_CODE", "geometry"]]
25
+
26
+ # select province of Milan and Cremona
27
+ filtered_regions = regions[
28
+ (regions['LEVL_CODE'] == 3) &
29
+ (regions['CNTR_CODE'] == "IT") &
30
+ (regions['NUTS_NAME'].str.contains('Milano|Cremona', case=False))
31
+ ]
32
+
33
+ # join with hospitals table to get hospitals in these provinces
34
+ result = (spatial_join(filtered_regions, hospitals, join_type=JoinType.LEFT)
35
+ # join operator renames the geometries adding suffixes _left and _right to avoid conflicts
36
+ .withColumnRenamed('geometry_left', 'geometry'))
37
+ result.show(truncate=False)
38
+
39
+ # get the number of hospitals in each province along with the total and average number of beds
40
+ result = spatial_aggregation(result, aggregate_functions=[
41
+ AggregationFunction("hospital_name", AggregationType.COUNT, 'hospitals'),
42
+ AggregationFunction("cap_beds", AggregationType.SUM, 'total_beds'),
43
+ AggregationFunction("cap_beds", AggregationType.AVG, 'average_beds'),
44
+ ])
45
+ result.show(truncate=False)
46
+
@@ -0,0 +1,42 @@
1
+ from libadalina_core.readers.readers import geopackage_to_dataframe
2
+ import pathlib
3
+ import pandas as pd
4
+
5
+ from libadalina_core.spatial_join.query_builder import spatial_join, JoinType, spatial_aggregation, AggregationType, \
6
+ AggregationFunction
7
+
8
+ if __name__ == "__main__":
9
+ """Example of how to use libadalina to find the total amount of the population living in specific provinces in Italy."""
10
+
11
+ # Set pandas display options
12
+ pd.set_option('display.max_columns', None)
13
+ pd.set_option('display.width', None)
14
+ pd.set_option('display.max_colwidth', 100)
15
+
16
+ population = geopackage_to_dataframe(
17
+ str(pathlib.Path(__file__).parent.parent.parent / "tests" / "samples" / "population-north-italy" / "nord-italia.gpkg"),
18
+ "census2021"
19
+ )[['T', 'geometry']]
20
+
21
+ regions = geopackage_to_dataframe(
22
+ str(pathlib.Path(__file__).parent.parent.parent / "tests" / "samples" / "regions" / "NUTS_RG_20M_2024_4326.gpkg"),
23
+ "NUTS_RG_20M_2024_4326.gpkg"
24
+ )[["LEVL_CODE", "NUTS_NAME", "CNTR_CODE", "geometry"]]
25
+
26
+ # select province of Milan and Cremona
27
+ filtered_regions = regions[
28
+ (regions['LEVL_CODE'] == 3) &
29
+ (regions['CNTR_CODE'] == "IT") &
30
+ (regions['NUTS_NAME'].str.contains('Milano|Cremona', case=False))
31
+ ]
32
+
33
+ # join with population table to get the population of these provinces
34
+ result = spatial_aggregation(
35
+ spatial_join(filtered_regions, population, join_type=JoinType.LEFT)
36
+ # join operator renames the geometries adding suffixes _left and _right to avoid conflicts
37
+ .withColumnRenamed('geometry_left', 'geometry'),
38
+ aggregate_functions=[
39
+ AggregationFunction("T", AggregationType.SUM, 'population', proportional='geometry_right'),
40
+ ])
41
+ result.show(truncate=False)
42
+
@@ -0,0 +1,59 @@
1
+ from libadalina_core.readers.readers import geopackage_to_dataframe
2
+ import pathlib
3
+ import pandas as pd
4
+
5
+ from libadalina_core.spatial_join.query_builder import spatial_join, JoinType, spatial_aggregation, AggregationType, \
6
+ AggregationFunction, polygonize
7
+
8
+ if __name__ == "__main__":
9
+ """Example of how to use libadalina to find the amount of population living within 1km from each hospital of a specific province in Italy."""
10
+
11
+ # Set pandas display options
12
+ pd.set_option('display.max_columns', None)
13
+ pd.set_option('display.width', None)
14
+ pd.set_option('display.max_colwidth', 100)
15
+
16
+ population = geopackage_to_dataframe(
17
+ str(pathlib.Path(
18
+ __file__).parent.parent.parent / "tests" / "samples" / "population-north-italy" / "nord-italia.gpkg"),
19
+ "census2021"
20
+ )[['T', 'geometry']]
21
+
22
+ hospitals = geopackage_to_dataframe(
23
+ str(pathlib.Path(__file__).parent.parent.parent / "tests" / "samples" / "healthcare" / "EU_healthcare.gpkg"),
24
+ "EU"
25
+ )[["hospital_name", "geometry", "city", "cap_beds"]]
26
+
27
+ regions = geopackage_to_dataframe(
28
+ str(pathlib.Path(__file__).parent.parent.parent / "tests" / "samples" / "regions" / "NUTS_RG_20M_2024_4326.gpkg"),
29
+ "NUTS_RG_20M_2024_4326.gpkg"
30
+ )[["LEVL_CODE", "NUTS_NAME", "CNTR_CODE", "geometry"]]
31
+
32
+ # select province of Cremona
33
+ filtered_regions = regions[
34
+ (regions['LEVL_CODE'] == 3) &
35
+ (regions['CNTR_CODE'] == "IT") &
36
+ (regions['NUTS_NAME'].str.contains('Cremona', case=False))
37
+ ]
38
+
39
+ # join with hospitals table to get hospitals in these provinces
40
+ result = (spatial_join(filtered_regions, hospitals, join_type=JoinType.LEFT)
41
+ # join operator renames the geometries adding suffixes _left and _right to avoid conflicts
42
+ .withColumnRenamed('geometry_left', 'geometry_provinces')
43
+ .withColumnRenamed('geometry_right', 'geometry'))
44
+ result.show(truncate=False)
45
+
46
+ # transform the points representing the hospitals on the map to circle-like shaped polygons with a radius of 1000 meters
47
+ result = (polygonize(result, 1000)
48
+ .withColumnRenamed('geometry', 'original_geometry')
49
+ .withColumnRenamed('polygonized_geometry', 'geometry'))
50
+
51
+
52
+ result = spatial_aggregation(spatial_join(result, population, join_type=JoinType.INNER)
53
+ # join operator renames the geometries adding suffixes _left and _right to avoid conflicts
54
+ .withColumnRenamed('geometry_left', 'geometry'),
55
+ aggregate_functions=[
56
+ AggregationFunction("T", AggregationType.SUM, 'population',
57
+ proportional='geometry_right'),
58
+ ])
59
+ result.show(truncate=False)
File without changes
@@ -0,0 +1,18 @@
1
+ import geopandas as gpd
2
+
3
+ from libadalina_core.sedona_utils.coordinate_formats import DEFAULT_EPSG
4
+
5
+
6
+ def geopackage_to_dataframe(path: str, layer: str) -> gpd.GeoDataFrame:
7
+ """
8
+ Read a GeoPackage file into a GeoDataFrame.
9
+
10
+ Geometry is automatically converted in libadalina default EPSG `DEFAULT_EPSG`.
11
+
12
+ :param path: The path to the GeoPackage file.
13
+ :param layer: The layer name of the GeoPackage.
14
+ :return: A GeoDataFrame containing the data from the specified layer.
15
+ """
16
+ gdf = gpd.read_file(path, layer=layer)
17
+ gdf.to_crs(epsg=DEFAULT_EPSG.value, inplace=True)
18
+ return gdf
File without changes
@@ -0,0 +1,22 @@
1
+ import logging
2
+ import os
3
+ import jdk
4
+
5
+ def install_jdk_if_needed():
6
+ """
7
+ Install a compatible JDK if `JAVA_HOME` environment variable is not found.
8
+ """
9
+ if 'JAVA_HOME' not in os.environ:
10
+ version = '17'
11
+ jre_path = os.path.join(jdk._JRE_DIR, version)
12
+
13
+ if os.path.exists(jre_path):
14
+ logging.info(f'JAVA_HOME not set but JRE already downloaded')
15
+ os.environ['JAVA_HOME'] = jre_path
16
+ else:
17
+ logging.info('JAVA_HOME not set, installing JRE...')
18
+ java_home = jdk.install(version, jre=True)
19
+ os.environ['JAVA_HOME'] = java_home
20
+ os.symlink(java_home, jre_path)
21
+
22
+ logging.info(f'JAVA_HOME set to {os.environ.get("JAVA_HOME")}')
@@ -0,0 +1,104 @@
1
+ from pyspark.sql import SparkSession
2
+ from sedona.spark import SedonaContext
3
+ import pandas as pd
4
+ from libadalina_core.sedona_configuration.jdk_installer import install_jdk_if_needed
5
+
6
+ # compatibility with Pandas
7
+ pd.DataFrame.iteritems = pd.DataFrame.items
8
+
9
+ def _get_sedona_master_configuration(master_host: str) -> SparkSession:
10
+ spark = (SparkSession.builder
11
+ .appName("Adalina")
12
+ .master(master_host)
13
+ .config(
14
+ "spark.jars.packages",
15
+ "org.apache.sedona:sedona-spark-3.3_2.12:1.7.1,"
16
+ "org.datasyslab:geotools-wrapper:1.7.1-28.5",
17
+ )
18
+ .config(
19
+ "spark.jars.repositories",
20
+ "https://artifacts.unidata.ucar.edu/repository/unidata-all"
21
+ )
22
+ .config("spark.executor.instances", 1)
23
+ .config("spark.executor.cores", "1")
24
+ .config("spark.executor.memory", "2G")
25
+ .getOrCreate())
26
+ return SedonaContext.create(spark)
27
+
28
+ def _sedona_configuration() -> SparkSession:
29
+ config = (
30
+ SedonaContext.builder()
31
+ .appName("Adalina")
32
+ .config(
33
+ "spark.jars.packages",
34
+ "org.apache.sedona:sedona-spark-3.3_2.12:1.7.1,"
35
+ "org.datasyslab:geotools-wrapper:1.7.1-28.5",
36
+ )
37
+ .config("spark.driver.memory", "20g")
38
+ .config(
39
+ "spark.jars.repositories",
40
+ "https://artifacts.unidata.ucar.edu/repository/unidata-all"
41
+ )
42
+ .getOrCreate()
43
+ )
44
+ return SedonaContext.create(config)
45
+
46
+ _sedona_context: SparkSession | None = None
47
+
48
+ def init_sedona_context(
49
+ spark_master: str | None = None,
50
+ spark: SparkSession | None = None
51
+ ):
52
+ """
53
+ Initialize the Sedona context for spatial data processing.
54
+
55
+ This function can either:
56
+
57
+ 1. Create a new Sedona context with a specified Spark master,
58
+ 2. Use an existing SparkSession, or
59
+ 3. Create a default Sedona context with the default Spark configuration.
60
+
61
+ If no parameters are provided, it will create a default Sedona context (option 3).
62
+
63
+ If a `JAVA_HOME` environment variable is not set, it will attempt to install a compatible JDK.
64
+
65
+ :param spark_master: The Spark master URL to connect to. If provided, a new Sedona context will be created with this master.
66
+ :param spark: An existing SparkSession to use. If provided, it will be used to create the Sedona context.
67
+
68
+ Example:
69
+ # Initialize with default configuration
70
+ init_sedona_context()
71
+
72
+ # Initialize with specific Spark master
73
+ init_sedona_context(spark_master="spark://localhost:7077")
74
+
75
+ # Initialize with existing SparkSession
76
+ spark = SparkSession.builder.getOrCreate()
77
+ init_sedona_context(spark=spark)
78
+ """
79
+ global _sedona_context
80
+
81
+ install_jdk_if_needed()
82
+
83
+ if spark_master is not None:
84
+ _sedona_context = _get_sedona_master_configuration(spark_master)
85
+ elif isinstance(spark, SparkSession):
86
+ _sedona_context = SedonaContext.create(spark)
87
+ else:
88
+ _sedona_context = _sedona_configuration()
89
+
90
+ def get_sedona_context() -> SparkSession:
91
+ """
92
+ Get the Sedona context for spatial data processing.
93
+ This context is the one used for all spatial operations in libadalina.
94
+
95
+ If the Sedona context has not been initialized yet with `init_sedona_context`,
96
+ the function `init_sedona_context` will be called to initialize it with the default configuration.
97
+
98
+ :return: The Sedona context as a SparkSession.
99
+ """
100
+ global _sedona_context
101
+
102
+ if _sedona_context is None:
103
+ init_sedona_context()
104
+ return _sedona_context
File without changes
@@ -0,0 +1,22 @@
1
+ from enum import Enum
2
+
3
+ class EPSGFormats(Enum):
4
+ """
5
+ Enum representing common EPSG formats used in geospatial data.
6
+ """
7
+ EPSG4326 = 4326 # WGS84
8
+ EPSG32632 = 32632 # UTM zone 32N
9
+
10
+ @staticmethod
11
+ def from_code(code: int) -> 'EPSGFormats':
12
+ for f in EPSGFormats:
13
+ if f.value == code:
14
+ return f
15
+ raise ValueError(f"No EPSG format found for code {code}")
16
+
17
+ """
18
+ Default EPSG format used in libadalina.
19
+
20
+ All DataFrame are converted upon reading and writing to this format.
21
+ """
22
+ DEFAULT_EPSG = EPSGFormats.EPSG4326
@@ -0,0 +1,27 @@
1
+ import pandas as pd
2
+ import geopandas as gpd
3
+ import pyspark.sql as ps
4
+
5
+ from libadalina_core.sedona_configuration.sedona_configuration import get_sedona_context
6
+
7
+ def to_spark_dataframe(df: pd.DataFrame | gpd.GeoDataFrame | ps.DataFrame) -> ps.DataFrame:
8
+ """
9
+ Covert a pandas DataFrame or a GeoPandas GeoDataFrame to a Spark DataFrame.
10
+ If the input is already a Spark DataFrame, it will be returned as is.
11
+
12
+ This function is useful for converting data to a format suitable for processing with Apache Sedona,
13
+ however, each function of libadalina already converts the input DataFrame to a Spark DataFrame before processing.
14
+
15
+ :param df: The DataFrame to convert, which can be a pandas DataFrame, a GeoPandas GeoDataFrame, or a Spark DataFrame.
16
+ :return: A Spark DataFrame.
17
+ """
18
+ if isinstance(df, ps.DataFrame):
19
+ return df
20
+ sedona = get_sedona_context()
21
+ if isinstance(df, gpd.GeoDataFrame):
22
+ return sedona.createDataFrame(df)
23
+ if isinstance(df, pd.DataFrame):
24
+ return sedona.createDataFrame(df)
25
+ if isinstance(df, ps.DataFrame):
26
+ return df # nothing to do here
27
+ raise TypeError(f"Unsupported type {type(df)}. Expected pandas, geopandas, or spark DataFrame.")
File without changes
@@ -0,0 +1,147 @@
1
+ import dataclasses
2
+ from enum import Enum
3
+
4
+ import pandas as pd
5
+ import geopandas as gpd
6
+ import pyspark.sql as ps
7
+ import pyspark.sql.functions as func
8
+
9
+ from libadalina_core.sedona_utils.utils import to_spark_dataframe
10
+ from sedona.sql import ST_Intersects, ST_Area, ST_Intersection, ST_Union, ST_Buffer, ST_GeometryType, ST_Dump
11
+
12
+ DataFrame = pd.DataFrame | gpd.GeoDataFrame | ps.DataFrame
13
+
14
+ def polygonize(df: DataFrame, radius_meters: float) -> ps.DataFrame:
15
+ table = to_spark_dataframe(df)
16
+
17
+ return table.select("*", func
18
+ .when(df.geometry.isNull(), None)
19
+ .when(ST_GeometryType(df.geometry).like('%Point%'),
20
+ ST_Buffer(func.col('geometry'), radius_meters, func.lit(True)))
21
+ .when(ST_GeometryType(func.col('geometry')).like('%LineString%'),
22
+ ST_Union(
23
+ ST_Buffer(func.col('geometry'), radius_meters, func.lit(True),
24
+ parameters=func.lit('endcap=flat side=left')),
25
+ ST_Buffer(func.col('geometry'), radius_meters, func.lit(True),
26
+ parameters=func.lit('endcap=flat side=right'))
27
+ ))
28
+ .otherwise(df.geometry)
29
+ .alias('polygonized_geometry')
30
+ )
31
+
32
+
33
+ def explode_multi_geometry(df: DataFrame) -> ps.DataFrame:
34
+ table = to_spark_dataframe(df)
35
+
36
+ return table.select("*", func
37
+ .when(df.geometry.isNull(), func.array())
38
+ .when(ST_GeometryType(df.geometry).like('%Multi%'),
39
+ func.explode(ST_Dump(df.geometry)))
40
+ .otherwise(df.geometry)
41
+ )
42
+
43
+ class AggregationType(Enum):
44
+ COUNT = 'count'
45
+ SUM = 'sum'
46
+ AVG = 'avg'
47
+ MIN = 'min'
48
+ MAX = 'max'
49
+
50
+ def to_spark_func(self):
51
+ if self == AggregationType.COUNT:
52
+ return func.count
53
+ elif self == AggregationType.SUM:
54
+ return func.sum
55
+ elif self == AggregationType.AVG:
56
+ return func.avg
57
+ elif self == AggregationType.MIN:
58
+ return func.min
59
+ elif self == AggregationType.MAX:
60
+ return func.max
61
+ return func.count # Default to count if none matched
62
+
63
+ def __str__(self):
64
+ return self.value
65
+
66
+ def __repr__(self):
67
+ return self.value
68
+
69
+ @dataclasses.dataclass
70
+ class AggregationFunction:
71
+ column: str
72
+ aggregation_type: AggregationType
73
+ alias: str | None = None
74
+ proportional: str | None = None
75
+
76
+ def spatial_aggregation(table: DataFrame, aggregate_functions: list[AggregationFunction]) -> ps.DataFrame:
77
+ table = to_spark_dataframe(table)
78
+
79
+ columns_to_aggregate = [c.column for c in aggregate_functions]
80
+ projection_of_not_aggregated_columns = (
81
+ func.first(c).alias(c) for c in table.columns if c != 'geometry' and c not in columns_to_aggregate
82
+ )
83
+
84
+ columns_with_no_proportional_aggregation = [c for c in aggregate_functions if c.proportional is None]
85
+ columns_with_proportional_aggregation = [c for c in aggregate_functions if c.proportional is not None]
86
+
87
+ projection_of_aggregated_columns = (
88
+ agg_func.aggregation_type.to_spark_func()(func.col(agg_func.column)).alias(
89
+ f"{agg_func.aggregation_type.value}({agg_func.column})" if agg_func.alias is None else agg_func.alias
90
+ ) for agg_func in columns_with_no_proportional_aggregation if agg_func.column in table.columns
91
+ )
92
+
93
+ projection_of_proportional_aggregated_columns = (
94
+ agg_func.aggregation_type.to_spark_func()(func.col(agg_func.column) * ST_Area(ST_Intersection(func.col('geometry'), func.col(agg_func.proportional))) / ST_Area(func.col(agg_func.proportional))).alias(
95
+ f"{agg_func.aggregation_type.value}({agg_func.column})" if agg_func.alias is None else agg_func.alias
96
+ ) for agg_func in columns_with_proportional_aggregation if agg_func.column in table.columns
97
+ )
98
+
99
+ # Group by geometry and aggregate other columns
100
+ aggregated = (table
101
+ .groupby(table.geometry)
102
+ .agg(
103
+ # from the columns for which is not specified an aggregation function, take the first value
104
+ *projection_of_not_aggregated_columns,
105
+ # apply the aggregation functions to the other columns
106
+ *projection_of_aggregated_columns,
107
+ *projection_of_proportional_aggregated_columns
108
+ ))
109
+
110
+ return aggregated
111
+
112
+ class JoinType(Enum):
113
+ INNER = 'inner'
114
+ LEFT = 'left'
115
+ RIGHT = 'right'
116
+ FULL = 'full'
117
+
118
+ def __str__(self):
119
+ return self.value
120
+
121
+ def __repr__(self):
122
+ return self.value
123
+
124
+ def spatial_join(
125
+ left_table: DataFrame,
126
+ right_table: DataFrame,
127
+ join_type: JoinType = JoinType.INNER,
128
+ aggregate: bool = False,
129
+ aggregate_functions: list[AggregationFunction] | None = None
130
+ ) -> ps.DataFrame:
131
+
132
+ left_table = to_spark_dataframe(left_table)
133
+ right_table = to_spark_dataframe(right_table)
134
+
135
+ result = (left_table
136
+ .withColumnRenamed('geometry', 'geometry_left')
137
+ .join(right_table.withColumnRenamed('geometry', 'geometry_right'),
138
+ on=ST_Intersects(func.col('geometry_left'), func.col('geometry_right')), how=join_type.value)
139
+ )
140
+
141
+ if aggregate:
142
+ if aggregate_functions is None:
143
+ raise ValueError("aggregate_functions must be provided when aggregate is True")
144
+ result = spatial_aggregation(result, aggregate_functions)
145
+
146
+ return result
147
+
File without changes
@@ -0,0 +1,22 @@
1
+ import pandas as pd
2
+ import geopandas as gpd
3
+ import pyspark.sql as ps
4
+ from libadalina_core.sedona_utils.coordinate_formats import DEFAULT_EPSG
5
+
6
+ def dataframe_to_geopackage(df: pd.DataFrame | gpd.GeoDataFrame | ps.DataFrame, path: str):
7
+ """
8
+ Write a DataFrame to a GeoPackage file.
9
+ DataFrame geometry is assumed to be in libadalina default EPSG `DEFAULT_EPSG`
10
+
11
+ :param df: The DataFrame to write, which can be a pandas DataFrame, a GeoPandas GeoDataFrame, or a Spark DataFrame.
12
+ :param path: The path to the GeoPackage file where the DataFrame will be saved.
13
+ """
14
+ if isinstance(df, ps.DataFrame):
15
+ df = gpd.GeoDataFrame(df.toPandas(), geometry = 'geometry', crs = DEFAULT_EPSG.value)
16
+ elif isinstance(df, pd.DataFrame):
17
+ df = gpd.GeoDataFrame(df, geometry='geometry', crs=DEFAULT_EPSG.value)
18
+ elif isinstance(df, gpd.GeoDataFrame):
19
+ pass # already a GeoDataFrame
20
+ else:
21
+ raise TypeError(f"Unsupported type {type(df)}. Expected pandas DataFrame, geopandas GeoDataFrame, or spark DataFrame.")
22
+ df.to_file(path, layer='dataframe', driver="GPKG")
@@ -0,0 +1,67 @@
1
+ Metadata-Version: 2.4
2
+ Name: libadalina-core
3
+ Version: 1.0
4
+ Summary: A library for spatial joins of geographic data
5
+ Author-email: Marco Casazza <d.marcocasazza@gmail.com>, Alberto Ceselli <alberto.ceselli@unimi.it>, Marco Premoli <marco.premoli@unimi.it>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://gitlab.com/amelia_unimi/libadalina
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Requires-Python: ~=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: apache-sedona[spark]==1.7.1
16
+ Requires-Dist: pyspark==3.3.2
17
+ Requires-Dist: pandas==2.2.3
18
+ Requires-Dist: geopandas==1.0.1
19
+ Requires-Dist: shapely==2.1.1
20
+ Requires-Dist: install-jdk==1.1.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest==8.4.1; extra == "dev"
23
+ Requires-Dist: black; extra == "dev"
24
+ Requires-Dist: isort; extra == "dev"
25
+ Requires-Dist: sphinx; extra == "dev"
26
+ Requires-Dist: pydata-sphinx-theme; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ # libadalina-core
30
+
31
+ A Python library for spatial data processing.
32
+ It makes it easier to work with geospatial data in Python by providing a high-level interface
33
+ to Apache Sedona, a powerful geospatial processing engine, and integrates nicely with other well-known libraries
34
+ such as *geopandas* and *pandas*.
35
+
36
+ ## Installation
37
+
38
+ liabadalina-core can be installed using pip:
39
+ ```
40
+ pip install libadalina-core
41
+ ```
42
+
43
+ If `JAVA_HOME` environment variable is not set a suitable JDK will be downloaded in `$HOME/.jre` and used automatically.
44
+ Not all JRE are supported, so if you encounter issues, you can try the automatically installed version.
45
+
46
+ ## Usage
47
+
48
+ You can find the documentation and example at [libadalina-core documentation](https://libadalinacore-6b2a95.gitlab.io/).
49
+
50
+ ## Features
51
+
52
+
53
+ * Reading and writing geospatial data from various formats
54
+ * Spatial joins between datasets
55
+ * Spatial aggregations
56
+ * Utilities for working with Apache Sedona
57
+ * Configuration helpers for setting up Apache Sedona
58
+
59
+ ## Requirements
60
+
61
+ - Python 3.10
62
+ - Dependencies:
63
+ - apache-sedona[spark]
64
+ - pyspark
65
+ - pandas
66
+ - geopandas
67
+ - install-jdk
@@ -0,0 +1,22 @@
1
+ libadalina_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ libadalina_core/examples/example_hospitals_in_provinces.py,sha256=ytDOWOG6Tp0Tmu6jst-9QUmaEeYaxQfAxnm6DtSoVyM,2075
3
+ libadalina_core/examples/example_population_in_provinces.py,sha256=ost2ymqSPZYgMgjnh7f2bvlla8n9b2FmLtjrVU63Vq8,1807
4
+ libadalina_core/examples/example_population_served_by_hospitals.py,sha256=3M1F0GpQ2i6tR4ZjQ2dkm1vBq02OjvwAqZ3FQje7hP0,2833
5
+ libadalina_core/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ libadalina_core/readers/readers.py,sha256=PbUThxwu_kBSJMlpjjixvIG46G8i8hPpJIb6HoFOQoU,608
7
+ libadalina_core/sedona_configuration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ libadalina_core/sedona_configuration/jdk_installer.py,sha256=i59G9YiNJ9H0BGpbeGmlTiFT0vP_SfjlIFAJylknR3I,730
9
+ libadalina_core/sedona_configuration/sedona_configuration.py,sha256=qhHqL74CBEXDdol3YP-lklsCkZvuvzPXHgezfyhSiiw,3697
10
+ libadalina_core/sedona_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ libadalina_core/sedona_utils/coordinate_formats.py,sha256=Ye1npHwJT7fLST1zkwyE8867mwXK-77ZsqaSEIL8RQs,571
12
+ libadalina_core/sedona_utils/utils.py,sha256=IYBc3XSpXDxmSdndLj08Nv1SkZEh8nGKg99xWEmgv7I,1227
13
+ libadalina_core/spatial_join/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ libadalina_core/spatial_join/query_builder.py,sha256=G12nXQid0ZQbeLvytFNY78ynLmk5iKIp08kGSJ8Tk5o,5750
15
+ libadalina_core/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ libadalina_core/writers/writers.py,sha256=kFD-oZbEiirmlNMeccZ5DaYvtXd3LeBafCpGKuXFlwQ,1105
17
+ libadalina_core-1.0.dist-info/licenses/LICENSE,sha256=U4yJabEzK3cseBN2UTwArB1I1p0ExZhl2eLUcCz_pl8,1075
18
+ libadalina_core-1.0.dist-info/METADATA,sha256=afDUocHENbBj2qHhunRdtocaJe-4Q2H_twIKTdoxaJU,2179
19
+ libadalina_core-1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ libadalina_core-1.0.dist-info/entry_points.txt,sha256=tsKAZ3w781QnK-TreW6KdA0H0655k5dW2ayYMjsTxPk,56
21
+ libadalina_core-1.0.dist-info/top_level.txt,sha256=DspVcM_AHdB9K-Za5D4H_LHHHOB918nTuOLCtOpo-xA,16
22
+ libadalina_core-1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ libadalina = libadalina.__main__:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 University of Milan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ libadalina_core