digitalarzengine 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. digitalarzengine-0.1.0/PKG-INFO +136 -0
  2. digitalarzengine-0.1.0/digitalarzengine/__init__.py +0 -0
  3. digitalarzengine-0.1.0/digitalarzengine/adapters/__init__.py +0 -0
  4. digitalarzengine-0.1.0/digitalarzengine/adapters/data_manager.py +512 -0
  5. digitalarzengine-0.1.0/digitalarzengine/adapters/db_manager.py +112 -0
  6. digitalarzengine-0.1.0/digitalarzengine/io/__init__.py +0 -0
  7. digitalarzengine-0.1.0/digitalarzengine/io/file_io.py +54 -0
  8. digitalarzengine-0.1.0/digitalarzengine/io/gee_auth.py +61 -0
  9. digitalarzengine-0.1.0/digitalarzengine/io/gee_image.py +160 -0
  10. digitalarzengine-0.1.0/digitalarzengine/io/gee_image_collection.py +257 -0
  11. digitalarzengine-0.1.0/digitalarzengine/io/gee_region.py +204 -0
  12. digitalarzengine-0.1.0/digitalarzengine/io/gpd_vector.py +98 -0
  13. digitalarzengine-0.1.0/digitalarzengine/io/rio_raster.py +421 -0
  14. digitalarzengine-0.1.0/digitalarzengine/manage.py +92 -0
  15. digitalarzengine-0.1.0/digitalarzengine/pipeline/__init__.py +0 -0
  16. digitalarzengine-0.1.0/digitalarzengine/pipeline/gee_pipeline.py +60 -0
  17. digitalarzengine-0.1.0/digitalarzengine/pipeline/pak_data.py +25 -0
  18. digitalarzengine-0.1.0/digitalarzengine/processing/__init__.py +0 -0
  19. digitalarzengine-0.1.0/digitalarzengine/processing/band_process.py +60 -0
  20. digitalarzengine-0.1.0/digitalarzengine/processing/rio_process.py +63 -0
  21. digitalarzengine-0.1.0/digitalarzengine/settings.py +25 -0
  22. digitalarzengine-0.1.0/digitalarzengine/utils/__init__.py +0 -0
  23. digitalarzengine-0.1.0/digitalarzengine/utils/crypto.py +42 -0
  24. digitalarzengine-0.1.0/digitalarzengine/utils/date_utils.py +23 -0
  25. digitalarzengine-0.1.0/digitalarzengine.egg-info/PKG-INFO +136 -0
  26. digitalarzengine-0.1.0/digitalarzengine.egg-info/SOURCES.txt +28 -0
  27. digitalarzengine-0.1.0/digitalarzengine.egg-info/dependency_links.txt +1 -0
  28. digitalarzengine-0.1.0/digitalarzengine.egg-info/top_level.txt +1 -0
  29. digitalarzengine-0.1.0/setup.cfg +4 -0
  30. digitalarzengine-0.1.0/setup.py +55 -0
@@ -0,0 +1,136 @@
1
+ Metadata-Version: 2.4
2
+ Name: digitalarzengine
3
+ Version: 0.1.0
4
+ Summary: DigitalArzEngine for GEE, raster and vector data processing
5
+ Author: Ather Ashraf
6
+ Author-email: atherashraf@gmail.com
7
+ Keywords: raster,vector,digitalarz
8
+ Classifier: Development Status :: 1 - Planning
9
+ Classifier: Intended Audience :: Education
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: MacOS :: MacOS X
12
+ Classifier: Operating System :: Microsoft :: Windows
13
+ Requires-Python: >=3
14
+ Description-Content-Type: text/markdown
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: keywords
21
+ Dynamic: requires-python
22
+ Dynamic: summary
23
+
24
+ # DigitalArzEngine
25
+
26
+ **DigitalArzEngine** is a Python library designed to streamline raster data processing by extending the capabilities of the [`rasterio`](https://rasterio.readthedocs.io/) library. It provides a suite of tools for efficient geospatial transformations, mosaicing, and analysis, making it ideal for researchers, analysts, and developers working with geospatial raster data.
27
+
28
+ ## πŸš€ Features
29
+
30
+ - **Mosaicing:** Seamlessly merge multiple raster datasets into a unified output.
31
+ - **Summary Statistics:** Extract key metrics such as minimum, maximum, mean, and standard deviation from raster layers.
32
+ - **Reprojection & Resampling:** Transform raster datasets to different coordinate systems and resolutions with ease.
33
+ - **Clipping & Masking:** Apply geometric masks or clip rasters to specific regions of interest.
34
+ - **Pixel-wise Analysis:** Enable pixel-level operations for customized raster computations.
35
+ - **Efficient I/O Handling:** Support for reading, writing, and converting between various raster formats.
36
+
37
+ ## πŸ“¦ Installation
38
+
39
+ To install the library using pip:
40
+
41
+ ```bash
42
+ pip install digitalarzengine
43
+ ```
44
+
45
+ ---
46
+
47
+ ## πŸ“š DataManager Utility Class
48
+
49
+ The `DataManager` class is a powerful and modular tool for handling geospatial data using SQLite databases. It supports structured storage of JSON records alongside geometric data (as WKB), with metadata tracking, querying, and integration with `GeoPandas`.
50
+
51
+ ### βœ… Benefits and Usage
52
+
53
+ - **Structured Storage**: Stores geospatial records (geometry + attributes) in a portable `.db` format.
54
+ - **Metadata Management**: Tracks field names, geometry columns, and record counts.
55
+ - **Geometry Support**: Accepts and stores geometries as WKB with support for reprojection to EPSG:4326.
56
+ - **Integration with GeoPandas**: Easily convert the stored data into DataFrames and GeoDataFrames.
57
+ - **Custom Query Support**: Run filtered SQL queries and retrieve results as structured pandas objects.
58
+ - **Extendable Schema**: Dynamically add and update fields in your dataset.
59
+
60
+ ### πŸ”§ Example Use-Cases
61
+
62
+ - Saving extracted geospatial features from remote sensing workflows.
63
+ - Iteratively storing geospatial model outputs with spatial context.
64
+ - Lightweight local spatial database for machine learning input.
65
+
66
+ ### πŸ”„ Core Methods
67
+
68
+ - `add_record`, `update_record`, `delete_record`, `get_record`
69
+ - `get_data_as_df`, `get_data_as_gdf`, `get_gdf_list_under_aoi`
70
+ - `record_exists`, `change_key`, `add_column`, `update_column`
71
+ - Context manager support (`with DataManager(...) as dm:`)
72
+
73
+ This utility class is designed to complement the raster processing tools in `DigitalArzEngine`, making it easier to work with both raster and vector data consistently.
74
+
75
+ ---
76
+
77
+ ## πŸ—ƒοΈ DBManager Utility Class
78
+
79
+ The `DBManager` class provides a secure and flexible way to interact with PostgreSQL/PostGIS databases. It builds SQLAlchemy engines from settings and supports reading data directly into Pandas or GeoPandas.
80
+
81
+ ### βœ… Benefits and Usage
82
+
83
+ - **Secure Configuration**: Pulls database credentials from a centralized encrypted config using `CryptoUtils`.
84
+ - **Flexible Access**: Supports multiple environments or databases through the `from_config` method.
85
+ - **SQLAlchemy Integration**: Simplifies connection management and avoids hardcoding sensitive details.
86
+ - **Read GeoData**: Loads spatial tables directly into `GeoDataFrame` using `read_postgis`.
87
+ - **Exclude Geometry When Needed**: Supports reading attribute-only tables as plain DataFrames.
88
+
89
+ ### πŸ”§ Example Use-Cases
90
+
91
+ - Connecting securely to remote geospatial databases for querying.
92
+ - Reading and processing large PostGIS tables as GeoDataFrames.
93
+ - Integrating web dashboards or data pipelines with PostgreSQL/PostGIS backends.
94
+
95
+ ### πŸ”„ Core Methods
96
+
97
+ - `from_config(db_key)`: Load credentials and settings from `DATABASES`
98
+ - `get_engine()`: Return SQLAlchemy engine object
99
+ - `read_as_geo_dataframe()`: Read spatial data into GeoDataFrame
100
+ - `read_as_dataframe()`: Read tabular data with option to exclude geometry
101
+ - `get_geometry_columns()`: Identify spatial fields in the schema
102
+
103
+ ### βš™οΈ Configuration Example
104
+
105
+ Below is a sample `DATABASES` configuration dictionary to be placed in `digitalarzengine/settings.py`:
106
+
107
+ ```python
108
+ DATABASES = {
109
+ "drm": {
110
+ "ENGINE": "postgresql+psycopg2",
111
+ "NAME": "drm",
112
+ "USER": "dafast",
113
+ "PASSWORD": "***********************************",
114
+ "HOST": os.getenv("DB_HOST", "localhost"),
115
+ "PORT": "5432",
116
+ }
117
+ }
118
+ ```
119
+
120
+ > ⚠️ Note: The password here is shown encrypted. It should be decrypted using `CryptoUtils` at runtime.
121
+
122
+ The `DBManager` is ideal for scenarios where spatial data needs to be read or processed securely from enterprise databases, complementing local `DataManager` workflows.
123
+
124
+ For more advanced usage patterns and custom queries, see the source or documentation site (coming soon).
125
+
126
+ ## πŸ‘¨β€πŸ’» Developed by
127
+
128
+ **Ather Ashraf**
129
+ Geospatial Data Scientist and AI Specialist
130
+
131
+ * πŸ“§ Email: [atherashraf@gmail.com](mailto:atherashraf@gmail.com)
132
+ * 🌐 LinkedIn: [https://sa.linkedin.com/in/ather-ashraf](https://sa.linkedin.com/in/ather-ashraf)
133
+ * πŸ“œ Google Scholar: [View Profile](https://scholar.google.com.pk/citations?user=XbqhyrsAAAAJ&hl=en)
134
+ * πŸ“± WhatsApp: +966557252342 | +923224785104
135
+
136
+ ---
File without changes
@@ -0,0 +1,512 @@
1
+ import os
2
+ import sqlite3
3
+ import json
4
+ import logging
5
+ from datetime import date, datetime
6
+ from numbers import Number
7
+ from typing import Optional
8
+
9
+ import geopandas as gpd
10
+ import numpy as np
11
+ import pandas as pd
12
+ import pyproj
13
+
14
+ from shapely import wkb
15
+ from shapely.geometry.base import BaseGeometry
16
+ from shapely.ops import transform
17
+
18
+ from digitalarzengine.io.file_io import FileIO
19
+
20
+ logger = logging.getLogger(__name__)
21
+ logger.setLevel(logging.DEBUG)
22
+
23
+
24
+ class DataManager:
25
+ def __init__(self, folder_path: str, base_name: str, purpose: str = None):
26
+ if not base_name.startswith("da_"):
27
+ base_name = "da_" + base_name
28
+ if not base_name.endswith(".db"):
29
+ base_name = base_name + ".db"
30
+ self.db_path = os.path.join(folder_path, base_name)
31
+ FileIO.mkdirs(self.db_path)
32
+ self.metadata_file = os.path.join(folder_path, f"da_{base_name}_metadata.json")
33
+ self.metadata = {
34
+ "field_name": [],
35
+ "geom_field_name": "",
36
+ "record_count": 0,
37
+ "purpose": purpose,
38
+ "additional_cols": []
39
+ }
40
+ self.table_name = "records"
41
+ self._initialize_db()
42
+ self._load_metadata()
43
+
44
+ def __enter__(self):
45
+ return self
46
+
47
+ def __exit__(self, exc_type, exc_value, tb):
48
+ self.close()
49
+
50
+ def get_dirname(self):
51
+ return os.path.dirname(self.db_path)
52
+
53
+ @classmethod
54
+ def from_file_path(cls, file_path: str) -> 'DataManager':
55
+ dir = os.path.dirname(file_path)
56
+ base_name = os.path.basename(file_path)
57
+ return cls(dir, base_name)
58
+
59
+ def get_database_file_path(self):
60
+ return self.db_path
61
+
62
+ def get_metadata_file_path(self):
63
+ return self.metadata_file
64
+
65
+ def _initialize_db(self):
66
+ # Initialize the SQLite database and create the table if it doesn't exist
67
+ self.conn = sqlite3.connect(self.db_path)
68
+ self.cursor = self.conn.cursor()
69
+ self.cursor.execute(f'''
70
+ CREATE TABLE IF NOT EXISTS {self.table_name} (
71
+ id INTEGER PRIMARY KEY,
72
+ key TEXT UNIQUE,
73
+ data JSON,
74
+ geom BLOB
75
+ )
76
+ ''')
77
+ self.conn.commit()
78
+ self.close()
79
+
80
+ def _load_metadata(self):
81
+ # Load metadata from the JSON file
82
+ if os.path.exists(self.metadata_file):
83
+ with open(self.metadata_file, 'r') as file:
84
+ self.metadata = json.load(file)
85
+
86
+ def _save_metadata(self):
87
+ # Save metadata to the JSON file
88
+ with open(self.metadata_file, 'w') as file:
89
+ json.dump(self.metadata, file, indent=4)
90
+
91
+ @staticmethod
92
+ def default_serializer(obj):
93
+ if isinstance(obj, (date, datetime)):
94
+ return obj.isoformat()
95
+ elif isinstance(obj, Number):
96
+ return float(obj)
97
+ elif isinstance(obj, (np.integer, np.floating)):
98
+ return obj.item()
99
+ else:
100
+ raise TypeError(f'Object of type {obj.__class__.__name__} is not JSON serializable')
101
+
102
+ @staticmethod
103
+ def ensure_srid_4326(geom: BaseGeometry, original_crs) -> BaseGeometry:
104
+ """
105
+ Ensures the geometry is in SRID 4326. If not, reprojects it.
106
+
107
+ Parameters:
108
+ ----------
109
+ geom : BaseGeometry
110
+ The Shapely geometry object.
111
+
112
+ original_crs
113
+
114
+ Returns:
115
+ -------
116
+ BaseGeometry
117
+ The geometry reprojected to SRID 4326, if necessary.
118
+ """
119
+ if geom is not None:
120
+ # Define the target CRS (SRID 4326)
121
+ target_crs = pyproj.CRS.from_epsg(4326)
122
+
123
+ # Only transform if the current CRS is different from 4326
124
+ if original_crs != target_crs:
125
+ project = pyproj.Transformer.from_crs(original_crs, target_crs, always_xy=True).transform
126
+ geom = transform(project, geom)
127
+
128
+ return geom
129
+
130
+ def change_key(self, key: str, new_key: str) -> bool:
131
+ """
132
+ Changes the key of an existing record to a new key.
133
+ Useful for renaming or reindexing specific entries.
134
+ """
135
+ try:
136
+ query = f'UPDATE {self.table_name} SET key = ? WHERE key = ?'
137
+ self.cursor.execute(query, (new_key, key))
138
+ if self.cursor.rowcount == 0:
139
+ logger.warning(f"Record with key '{key}' not found to change.")
140
+ return False
141
+ self.conn.commit()
142
+ self.close()
143
+ logger.info(f"Changed key from '{key}' to '{new_key}'.")
144
+ return True
145
+ except Exception as e:
146
+ logger.error(f"Failed to change key '{key}' to '{new_key}': {e}", exc_info=True)
147
+ return False
148
+
149
+ def add_record(self, key: str, record: dict,
150
+ geom: Optional[BaseGeometry] = None, geom_crs=None, overwrite=False) -> bool:
151
+ """
152
+ Adds a record to the database table with optional geometric data.
153
+ Parameters
154
+ ----------
155
+ key: str
156
+ Unique identifier used as primary key in the database.
157
+ record: dict
158
+ Dictionary to be serialized as JSON.
159
+ geom : Optional[BaseGeometry]
160
+ Shapely geometry must be in 4326 or convertible to it.
161
+ geom_crs : optional
162
+ If given, the geometry will be reprojected to 4326.
163
+ Returns
164
+ -------
165
+ bool
166
+ True if added successfully, False if key exists or error occurs.
167
+ """
168
+ try:
169
+ if overwrite and self.record_exists(key):
170
+ self.delete_record(key)
171
+ # Serialize the record with custom default serializer
172
+ record_json = json.dumps(record, default=self.default_serializer)
173
+
174
+ # Reproject and convert geometry if present
175
+ if geom_crs is not None:
176
+ geom = self.ensure_srid_4326(geom, geom_crs)
177
+ if geom is not None:
178
+ geom = sqlite3.Binary(geom.wkb)
179
+
180
+ # Insert into table
181
+ query = f'INSERT INTO {self.table_name} (key, data, geom) VALUES (?, ?, ?)'
182
+ self.cursor.execute(query, (key, record_json, geom))
183
+
184
+ # Merge new fields
185
+ existing_fields = set(self.metadata.get('field_name', []))
186
+ updated_fields = existing_fields.union(record.keys())
187
+ self.metadata['field_name'] = list(updated_fields)
188
+
189
+ self.metadata['record_count'] += 1
190
+ self.conn.commit()
191
+ self._save_metadata()
192
+ self.close()
193
+ return True
194
+
195
+ except sqlite3.IntegrityError:
196
+ logger.warning(f"Duplicate key '{key}' encountered.")
197
+ return False
198
+ except Exception as e:
199
+ logger.error(f"Failed to add record with key '{key}': {e}", exc_info=True)
200
+ return False
201
+
202
+ # (Other methods remain the same, updated similarly...)
203
+ def close(self):
204
+ try:
205
+ self.cursor.close()
206
+ except Exception as e:
207
+ logger.warning(f"Cursor close failed: {e}")
208
+ self.conn.close()
209
+
210
+ def delete_record(self, key: str) -> bool:
211
+ """
212
+ Deletes a record from the database table based on the key.
213
+
214
+ Parameters:
215
+ ----------
216
+ key : str
217
+ The unique identifier for the record to be deleted.
218
+
219
+ Returns:
220
+ -------
221
+ bool
222
+ True if the record was successfully deleted, False if the record with the given key does not exist.
223
+ """
224
+ try:
225
+ # Construct and execute the SQL query to delete the record
226
+ query = f'DELETE FROM {self.table_name} WHERE key = ?'
227
+ self.cursor.execute(query, (key,))
228
+
229
+ # Check if any rows were affected by the deletion
230
+ if self.cursor.rowcount == 0:
231
+ # If no rows were affected, the record with the given key does not exist
232
+ print(f"Record with key '{key}' does not exist.")
233
+ return False
234
+
235
+ # Commit the changes to the database
236
+ self.conn.commit()
237
+
238
+ # Update metadata and save it (e.g., decrement record count)
239
+ self.metadata['record_count'] -= 1
240
+ self._save_metadata()
241
+ self.close()
242
+ # Return True indicating success
243
+ return True
244
+
245
+ except Exception as e:
246
+ # Catch any exceptions and print the error message
247
+ print(f"An error occurred while deleting the record: {e}")
248
+ return False
249
+
250
+ # (Other methods like get_data_as_df, get_data_as_gdf, etc, will also be slightly updated as discussed)
251
+ def update_record(self, key: str, record: dict,
252
+ geom: Optional[BaseGeometry] = None, geom_crs=None) -> bool:
253
+ """
254
+ Updates an existing record in the database table with new data and optional geometric data.
255
+
256
+ Parameters:
257
+ ----------
258
+ key : str
259
+ The unique identifier for the record to be updated. This is used to locate the specific record in the database.
260
+
261
+ record : dict
262
+ A dictionary containing the new data to be serialized into JSON format and stored in the database.
263
+
264
+ geom : Optional[Union[BaseGeometry, bytes, bytearray, memoryview]]
265
+ The new geometric data associated with the record. It can be:
266
+ - A Shapely geometry object (e.g., Point, Polygon) which will be converted to WKB.
267
+ - A bytes-like object representing the WKB of a geometry.
268
+ - None if there is no geometric data to update.
269
+
270
+ Returns:
271
+ -------
272
+ bool
273
+ True if the record was successfully updated, False if the record with the given key does not exist.
274
+ """
275
+ try:
276
+ # Serialize the record dictionary to a JSON string
277
+ record_json = json.dumps(record, default=self.default_serializer)
278
+
279
+ # Convert Shapely geometry to WKB
280
+ if geom_crs is not None:
281
+ geom = self.ensure_srid_4326(geom, geom_crs)
282
+ if geom is not None:
283
+ geom = sqlite3.Binary(geom.wkb)
284
+
285
+ # Construct and execute the SQL query to update the existing record in the database
286
+ query = f'UPDATE {self.table_name} SET data = ?, geom = ? WHERE key = ?'
287
+ self.cursor.execute(query, (record_json, geom, key))
288
+
289
+ # Check if any rows were affected by the update
290
+ if self.cursor.rowcount == 0:
291
+ # If no rows were affected, the record with the given key does not exist
292
+ print(f"Record with key '{key}' does not exist.")
293
+ return False
294
+
295
+ # Commit the changes to the database
296
+ self.conn.commit()
297
+ self.close()
298
+ # Return True indicating success
299
+ return True
300
+
301
+ except Exception as e:
302
+ # Catch any exceptions and print the error message
303
+ print(f"An error occurred: {e}")
304
+ return False
305
+
306
+ def get_record(self, key: str):
307
+ query = f'SELECT data, geom FROM {self.table_name} WHERE key = ?'
308
+ self.cursor.execute(query, (key,))
309
+ result = self.cursor.fetchone()
310
+ if result:
311
+ record = json.loads(result[0])
312
+ geom = gpd.GeoSeries.from_wkb(result[1]) if result[1] is not None else None
313
+ return record, geom
314
+ return None, None
315
+
316
+ def get_record_value(self, key: str, column: str):
317
+ record, geom = self.get_record(key)
318
+ if record:
319
+ return record.get(column, None)
320
+ return None
321
+
322
+ def get_record_value_as_type(self, key: str, column: str, data_type: str):
323
+ value = self.get_record_value(key, column)
324
+
325
+ def record_exists(self, key: str):
326
+ query = f'SELECT 1 FROM {self.table_name} WHERE key = ?'
327
+ self.cursor.execute(query, (key,))
328
+ return self.cursor.fetchone() is not None
329
+
330
+ def get_metadata(self):
331
+ return self.metadata
332
+
333
+ def get_data_as_df(self, query: str = None) -> pd.DataFrame:
334
+ """
335
+ Fetches data from the database and returns it as a DataFrame.
336
+
337
+ Parameters:
338
+ ----------
339
+ query : str, optional
340
+ An SQL query string to fetch specific data from the database.
341
+ If not provided, the function will fetch all records.
342
+
343
+ Returns:
344
+ -------
345
+ pd.DataFrame
346
+ A DataFrame containing the fetched records.
347
+ """
348
+ if query is None:
349
+ # Prepare the list of columns to fetch
350
+ columns_to_select = ['key', 'data']
351
+
352
+ # Add additional columns from metadata to the selection list
353
+ if "additional_cols" in self.metadata:
354
+ columns_to_select.extend(self.metadata["additional_cols"])
355
+
356
+ # Build the SQL query with the necessary columns
357
+ columns_str = ', '.join(columns_to_select)
358
+ query = f'SELECT {columns_str} FROM {self.table_name}'
359
+
360
+ # Execute the query
361
+ self.cursor.execute(query)
362
+
363
+ # Fetch the data and get the column names from the query
364
+ rows = self.cursor.fetchall()
365
+ column_names = [description[0] for description in self.cursor.description]
366
+
367
+ # Process the fetched rows
368
+ records = []
369
+ for row in rows:
370
+ record_dict = {}
371
+ for col_name, col_value in zip(column_names, row):
372
+ if isinstance(col_value, str):
373
+ try:
374
+ # Attempt to load JSON data
375
+ json_data = json.loads(col_value)
376
+ if isinstance(json_data, dict):
377
+ record_dict.update(json_data) # Merge JSON data if it’s a dictionary
378
+ else:
379
+ record_dict[col_name] = json_data # Add non-dict JSON data as-is
380
+ except json.JSONDecodeError:
381
+ record_dict[col_name] = col_value # Add string as-is if not JSON
382
+ elif isinstance(col_value, (date, datetime)) or 'date' in col_name:
383
+ # Convert date or datetime objects to ISO format string
384
+ record_dict[col_name] = col_value.isoformat()
385
+ else:
386
+ record_dict[col_name] = col_value # Add other data types as they are
387
+ records.append(record_dict)
388
+
389
+ # Convert records to a DataFrame
390
+ df = pd.DataFrame(records)
391
+ for col in df.columns:
392
+ if "date" in col.lower(): # Check for 'date' or 'at' in column name
393
+ df[col] = pd.to_datetime(df[col], errors='coerce')
394
+ return df
395
+
396
+ def get_data_as_gdf(self, query: str = None) -> gpd.GeoDataFrame:
397
+ """
398
+ Fetches data from the database, including geometry, and returns it as a GeoDataFrame.
399
+
400
+ Parameters:
401
+ ----------
402
+ query : str, optional
403
+ An SQL query string to fetch specific data from the database.
404
+ If not provided, the function will fetch all records with non-null geometry.
405
+
406
+ Returns:
407
+ -------
408
+ gpd.GeoDataFrame
409
+ A GeoDataFrame containing the fetched records and their associated geometries.
410
+ """
411
+ if query is None:
412
+ # Prepare the list of columns to fetch
413
+ columns_to_select = ['key', 'data', 'geom']
414
+
415
+ # Add additional columns from metadata to the selection list
416
+ if "additional_cols" in self.metadata:
417
+ columns_to_select.extend(self.metadata["additional_cols"])
418
+
419
+ # Build the SQL query with the necessary columns
420
+ columns_str = ', '.join(columns_to_select)
421
+ query = f'SELECT {columns_str} FROM {self.table_name} WHERE geom IS NOT NULL'
422
+
423
+ # Fetch the data as a DataFrame
424
+ df = self.get_data_as_df(query=query)
425
+ if not df.empty:
426
+ df['geom'] = df['geom'].apply(lambda x: wkb.loads(x))
427
+ # Add the geometry to the DataFrame and create a GeoDataFrame
428
+ gdf = gpd.GeoDataFrame(df, geometry='geom', crs='EPSG:4326') # Replace with appropriate CRS
429
+ # gdf.drop('geom', axis=1, inplace=True)
430
+ return gdf
431
+ return gpd.GeoDataFrame()
432
+
433
+ def add_column(self, column_name: str, data_type: str, default_value=None):
434
+ """
435
+ Add a new column to the table if it doesn't already exist.
436
+
437
+ @param column_name: Name of the new column.
438
+ @param data_type: Type of the new column like
439
+ TEXT, INTEGER, BLOB
440
+ DATE (YYYY-MM-DD), TIME (hh:mm:ss) and TIMESTAMP (YYYY-MM-DD hh:mm:ss)
441
+ @param default_value: Default value for the new column.
442
+ """
443
+ try:
444
+ # Check if the column already exists
445
+ self.cursor.execute(f"PRAGMA table_info({self.table_name})")
446
+ columns = [info[1] for info in self.cursor.fetchall()]
447
+ if column_name in columns:
448
+ print(f"Column '{column_name}' already exists. Skipping addition.")
449
+ return
450
+
451
+ # Construct the SQL statement for adding a new column with a default value
452
+ sql = f'ALTER TABLE {self.table_name} ADD COLUMN {column_name} {data_type}'
453
+ if default_value is not None:
454
+ sql += f' DEFAULT {default_value}'
455
+
456
+ # Execute the SQL statement to add the column
457
+ self.cursor.execute(sql)
458
+
459
+ # Check if 'additional_cols' exists in metadata, if not, initialize it
460
+ if "additional_cols" not in self.metadata:
461
+ self.metadata["additional_cols"] = []
462
+
463
+ # Update metadata with the new column
464
+ if column_name not in self.metadata["additional_cols"]:
465
+ self.metadata["additional_cols"].append(column_name)
466
+ self._save_metadata()
467
+
468
+ # Commit the changes to the database
469
+ self.conn.commit()
470
+ print(f"Added column '{column_name}' to the records table with default value '{default_value}'.")
471
+ self.close()
472
+ except sqlite3.OperationalError as e:
473
+ print(f"Error adding column '{column_name}': {e}")
474
+
475
+ def update_column(self, key: str, column_name: str, value):
476
+ try:
477
+ # Update the specified column for the given key
478
+ query = f'UPDATE {self.table_name} SET {column_name} = ? WHERE key = ?'
479
+ self.cursor.execute(query, (value, key))
480
+ if self.cursor.rowcount == 0:
481
+ print(f"Record with key '{key}' does not exist.")
482
+ return False
483
+ self.conn.commit()
484
+ print(f"Updated column '{column_name}' for key '{key}' with value '{value}'.")
485
+ self.close()
486
+ return True
487
+ except sqlite3.OperationalError as e:
488
+ print(f"Error updating column '{column_name}' for key '{key}': {e}")
489
+ return False
490
+
491
+ def get_gdf_list_under_aoi(self, aoi_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
492
+ # Fetch the data as a GeoDataFrame
493
+ gdf = self.get_data_as_gdf()
494
+ if not gdf.empty:
495
+ # Ensure both GeoDataFrames have the same CRS
496
+ if gdf.crs != aoi_gdf.crs:
497
+ aoi_gdf = aoi_gdf.to_crs(gdf.crs)
498
+
499
+ # Perform the overlay and include the 'key' column
500
+ aoi_gdf = aoi_gdf[[aoi_gdf.geometry.name]]
501
+ result_gdf = gpd.sjoin(gdf, aoi_gdf, how='inner', predicate='intersects')
502
+ return result_gdf
503
+ return gpd.GeoDataFrame()
504
+
505
+ def iterate_keys(self):
506
+ """
507
+ Yields all keys in the database one by one.
508
+ """
509
+ query = f"SELECT key FROM {self.table_name}"
510
+ self.cursor.execute(query)
511
+ for (key,) in self.cursor.fetchall():
512
+ yield key