ecoscape-utilities 0.0.25__tar.gz → 0.0.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ecoscape-utilities might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: ecoscape-utilities
3
- Version: 0.0.25
3
+ Version: 0.0.32
4
4
  Summary: A collection of EcoScape utilities.
5
5
  Author-email: Luca de Alfaro <luca@ucsc.edu>, Coen Adler <ctadler@ucsc.edu>, Artie Nazarov <anazarov@ucsc.edu>, Natalia Ocampo-Peñuela <nocampop@ucsc.edu>, Jasmine Tai <cjtai@ucsc.edu>, Natalie Valett <nvalett@ucsc.edu>
6
6
  Project-URL: Homepage, https://github.com/ecoscape-earth/ecoscape-utilities
@@ -11,6 +11,11 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.7
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE.md
14
+ Requires-Dist: pandas>=1.0.0
15
+ Requires-Dist: scgt>=0.0.20
16
+ Requires-Dist: pyproj>=3.0.0
17
+ Requires-Dist: numpy>=1.0.0
18
+ Dynamic: license-file
14
19
 
15
20
  # EcoScape Utilities
16
21
 
@@ -1,2 +1,3 @@
1
1
  from .bird_runs import *
2
+ from .ebird_db import *
2
3
 
@@ -0,0 +1,658 @@
1
+ import hashlib
2
+ import numpy as np
3
+ import os
4
+ import pandas as pd
5
+ import sqlite3
6
+ from sqlite3 import Error
7
+
8
+ from collections import defaultdict
9
+ from pyproj import Transformer
10
+ from pyproj.crs import CRS
11
+
12
+ from scgt import GeoTiff
13
+
14
+ """
15
+ A module for interaction with a sqlite database. Contains functions for query execution,
16
+ and some common functionality we need to run on the DB
17
+ """
18
+ class Connection:
19
+ def __init__(self, db_file):
20
+ """Initializes the connection to the SQLite database
21
+ @param db_file: The file path to the database file
22
+ """
23
+ conn = None
24
+ try:
25
+ conn = sqlite3.connect(db_file)
26
+ except Error as e:
27
+ print("Error in create_connection: ", e)
28
+ self.conn = conn
29
+
30
+ def __enter__(self):
31
+ return self
32
+
33
+ def __exit__(self, type, value, traceback):
34
+ if self.conn:
35
+ self.conn.close()
36
+
37
+ def get_cursor(self):
38
+ try:
39
+ cur = self.conn.cursor()
40
+ except:
41
+ print("error connecting to db")
42
+ cur = None
43
+ return cur
44
+
45
+ def execute_query(self, query, verbose=False):
46
+ """
47
+ executes the given query in the database
48
+ :param query (str): a sqlite query to the database
49
+ :param verbose (boolean): flag to print out result of the query
50
+ :returns: result of the query as a list of rows
51
+ """
52
+ try:
53
+ cur = self.get_cursor()
54
+ if isinstance(query, str):
55
+ cur.execute(query)
56
+ else:
57
+ cur.execute(query[0], query[1])
58
+ self.conn.commit()
59
+ rows = cur.fetchall()
60
+ if verbose:
61
+ for row in rows:
62
+ print(row)
63
+ return rows
64
+ except Exception as e:
65
+ print("Error executing query:\n\t", query, ".\n Error: ", e)
66
+
67
+
68
+ class EbirdObservations(Connection):
69
+ """Class for eBird-specific connections, includes functionality particular to the eBird database"""
70
+
71
+ def __init__(self, db_file):
72
+ super().__init__(db_file)
73
+
74
+ def get_all_squares(self, state=None,
75
+ breeding=True, date_range=None,
76
+ lat_range=None, lng_range=None, max_dist=2,
77
+ verbose=False):
78
+ """
79
+ Gets all squares with bird (any bird) observations, for a certain state,
80
+ and withing certain lat, lng, and date ranges.
81
+ :param state (str): state code
82
+ :param breeding (boolean): whether to filter observations by breeding months (getting only apr-june)
83
+ :param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
84
+ :param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
85
+ :param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
86
+ :param max_dist (int): max kilometers traveled for the checklist for any observation we consider
87
+ (any of further distance will be too noisy, and should be disreguarded)
88
+ :returns: list of squares which fall within the query parameters
89
+ """
90
+ query_string=['select DISTINCT SQUARE from checklist where "ALL SPECIES REPORTED" = 1']
91
+ query_string.append('and "PROTOCOL TYPE" != "Incidental"')
92
+ query_string.append('and "EFFORT DISTANCE KM" <= :dist')
93
+ d = {"dist": max_dist}
94
+ if state is not None:
95
+ query_string.append('and "STATE CODE" = :state')
96
+ d['state'] = state
97
+ # Adds breeding portion
98
+ if breeding:
99
+ query_string.extend([
100
+ 'and (substr("OBSERVATION DATE", 6, 2) = "04"',
101
+ 'OR substr("OBSERVATION DATE", 6, 2) = "05"',
102
+ 'OR substr("OBSERVATION DATE", 6, 2) = "06")'
103
+ ])
104
+ if date_range is not None:
105
+ query_string.append('and "OBSERVATION DATE" >= :min_date')
106
+ query_string.append('and "OBSERVATION DATE" <= :max_date')
107
+ d["min_date"], d["max_date"] = date_range
108
+ if lat_range is not None:
109
+ query_string.append('and "LATITUDE" >= :min_lat')
110
+ query_string.append('and "LATITUDE" <= :max_lat')
111
+ d["min_lat"], d["max_lat"] = lat_range
112
+ if lng_range is not None:
113
+ query_string.append('and "LONGITUDE" >= :min_lng')
114
+ query_string.append('and "LONGITUDE" <= :max_lng')
115
+ d["min_lng"], d["max_lng"] = lng_range
116
+ query_string = " ".join(query_string)
117
+ if verbose:
118
+ print("Query:", query_string)
119
+ squares_list = self.execute_query((query_string, d))
120
+ return [sq[0] for sq in squares_list]
121
+
122
+ def get_square_observations(self, square, bird,
123
+ breeding=True, date_range=None,
124
+ lat_range=None, lng_range=None, max_dist=2,
125
+ verbose=False):
126
+ """
127
+ Get the number of checklists, number of checklists with a bird,
128
+ total time, total distance, and total bird sightings, for a square.
129
+ :param square: tuple of 2 floats, representing (lat, lng) of the square
130
+ :param bird: bird
131
+ :param breeding (boolean): whether to filter observations by breeding months (getting only apr-june)
132
+ :param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
133
+ :param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
134
+ :param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
135
+ :param max_dist (int): max kilometers traveled for the checklist for any observation we consider
136
+ (any of further distance will be too noisy, and should be disreguarded)
137
+ :returns: num_checklists, num_bird_checklists, num_birds for the given square.
138
+ """
139
+ # Gets the number of checklists, the total time, the total distance, and the total number of birds.
140
+ query_string=['select COUNT(*), SUM("EFFORT DISTANCE KM"), SUM("DURATION MINUTES")',
141
+ 'FROM checklist where SQUARE = :square']
142
+ d = {'square': square}
143
+ query_string.append('and "ALL SPECIES REPORTED" = 1')
144
+ query_string.append('and "PROTOCOL TYPE" != "Incidental"')
145
+ query_string.append('and "EFFORT DISTANCE KM" <= :dist')
146
+ d["dist"] = max_dist
147
+ # Adds breeding portion
148
+ if breeding:
149
+ query_string.extend([
150
+ 'and (substr("OBSERVATION DATE", 6, 2) = "04"',
151
+ 'OR substr("OBSERVATION DATE", 6, 2) = "05"',
152
+ 'OR substr("OBSERVATION DATE", 6, 2) = "06")'
153
+ ])
154
+ if date_range is not None:
155
+ query_string.append('and "OBSERVATION DATE" >= :min_date')
156
+ query_string.append('and "OBSERVATION DATE" <= :max_date')
157
+ d["min_date"], d["max_date"] = date_range
158
+ if lat_range is not None:
159
+ query_string.append('and "LATITUDE" >= :min_lat')
160
+ query_string.append('and "LATITUDE" <= :max_lat')
161
+ d["min_lat"], d["max_lat"] = lat_range
162
+ if lng_range is not None:
163
+ query_string.append('and "LONGITUDE" >= :min_lng')
164
+ query_string.append('and "LONGITUDE" <= :max_lng')
165
+ d["min_lng"], d["max_lng"] = lng_range
166
+ query_string = " ".join(query_string)
167
+ if verbose:
168
+ print("Query:", query_string)
169
+ r = self.execute_query((query_string, d))
170
+ if r is not None:
171
+ num_checklists = float(r[0][0])
172
+ total_km = float(r[0][1])
173
+ total_minutes = float(r[0][2])
174
+ else:
175
+ num_checklists = 0
176
+ total_km = 0
177
+ total_minutes = 0
178
+ # Then, the number of checklists with the bird, and the total number of birds.
179
+ query_string = ['select COUNT(DISTINCT checklist."SAMPLING EVENT IDENTIFIER"),',
180
+ 'SUM(observation."OBSERVATION COUNT")',
181
+ 'from checklist join observation',
182
+ 'on checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
183
+ ]
184
+ query_string.append('where checklist.SQUARE = :square')
185
+ query_string.append('and checklist."ALL SPECIES REPORTED" = 1')
186
+ query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
187
+ query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
188
+ d["dist"] = max_dist
189
+ # Adds breeding portion
190
+ if breeding:
191
+ query_string.extend([
192
+ 'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
193
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
194
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")'
195
+ ])
196
+ if date_range is not None:
197
+ query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
198
+ query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
199
+ d["min_date"], d["max_date"] = date_range
200
+ if lat_range is not None:
201
+ query_string.append('and checklist."LATITUDE" >= :min_lat')
202
+ query_string.append('and checklist."LATITUDE" <= :max_lat')
203
+ d["min_lat"], d["max_lat"] = lat_range
204
+ if lng_range is not None:
205
+ query_string.append('and checklist."LONGITUDE" >= :min_lng')
206
+ query_string.append('and checklist."LONGITUDE" <= :max_lng')
207
+ d["min_lng"], d["max_lng"] = lng_range
208
+ # Ask about the bird.
209
+ query_string.append('and observation."COMMON NAME" = :bird')
210
+ d["bird"] = bird.name
211
+ # Runs the query.
212
+ query_string = " ".join(query_string)
213
+ if verbose:
214
+ print("Query:", query_string)
215
+ r = self.execute_query((query_string, d))
216
+ if r is None:
217
+ num_birds = 0
218
+ num_bird_checklists = 0
219
+ else:
220
+ r = r[0]
221
+ num_bird_checklists = float(r[0])
222
+ num_birds = 0 if r[1] is None else float(r[1])
223
+ return dict(
224
+ num_checklists=num_checklists,
225
+ num_bird_checklists=num_bird_checklists,
226
+ num_birds=num_birds,
227
+ total_km=total_km,
228
+ total_minutes=total_minutes,
229
+ )
230
+
231
+ def get_square_individual_checklists(self, square, bird,
232
+ breeding=True, date_range=None,
233
+ lat_range=None, lng_range=None, max_dist=2,
234
+ verbose=False):
235
+ """
236
+ Get the checklists for a square, so that statistics can be computed.
237
+ The result is returned as a dataframe.
238
+
239
+ and total bird sightings, for a square.
240
+ :param square: tuple of 2 floats, representing (lat, lng) of the square
241
+ :param bird (str): name of bird
242
+ :param breeding (boolean): whether to filter observations by breeding months (getting only apr-june)
243
+ :param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
244
+ :param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
245
+ :param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
246
+ :param max_dist (int): max kilometers traveled for the checklist for any observation we consider
247
+ (any of further distance will be too noisy, and should be disreguarded)
248
+ :returns: list of squares which fall within the query parameters
249
+ """
250
+ # First the checklists, with or without the bird.
251
+ query_string=['select DISTINCT("SAMPLING EVENT IDENTIFIER")',
252
+ 'FROM checklist where SQUARE = :square']
253
+ d = {'square': square}
254
+ query_string.append('and "ALL SPECIES REPORTED" = 1')
255
+ query_string.append('and "PROTOCOL TYPE" != "Incidental"')
256
+ query_string.append('and "EFFORT DISTANCE KM" <= :dist')
257
+ d["dist"] = max_dist
258
+ # Adds breeding portion
259
+ if breeding:
260
+ query_string.extend([
261
+ 'and (substr("OBSERVATION DATE", 6, 2) = "04"',
262
+ 'OR substr("OBSERVATION DATE", 6, 2) = "05"',
263
+ 'OR substr("OBSERVATION DATE", 6, 2) = "06")'
264
+ ])
265
+ if date_range is not None:
266
+ query_string.append('and "OBSERVATION DATE" >= :min_date')
267
+ query_string.append('and "OBSERVATION DATE" <= :max_date')
268
+ d["min_date"], d["max_date"] = date_range
269
+ if lat_range is not None:
270
+ query_string.append('and "LATITUDE" >= :min_lat')
271
+ query_string.append('and "LATITUDE" <= :max_lat')
272
+ d["min_lat"], d["max_lat"] = lat_range
273
+ if lng_range is not None:
274
+ query_string.append('and "LONGITUDE" >= :min_lng')
275
+ query_string.append('and "LONGITUDE" <= :max_lng')
276
+ d["min_lng"], d["max_lng"] = lng_range
277
+ query_string = " ".join(query_string)
278
+ if verbose:
279
+ print("Query:", query_string)
280
+ checklists_df = pd.read_sql_query(query_string, self.conn, params=d)
281
+
282
+ # Then, the number of checklists with the bird, and the total number of birds.
283
+ query_string = ['select checklist."SAMPLING EVENT IDENTIFIER", ',
284
+ 'observation."OBSERVATION COUNT"',
285
+ 'from checklist join observation',
286
+ 'on checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
287
+ ]
288
+ query_string.append('where checklist.SQUARE = :square')
289
+ query_string.append('and checklist."ALL SPECIES REPORTED" = 1')
290
+ query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
291
+ query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
292
+ d["dist"] = max_dist
293
+ # Adds breeding portion
294
+ if breeding:
295
+ query_string.extend([
296
+ 'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
297
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
298
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")'
299
+ ])
300
+ if date_range is not None:
301
+ query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
302
+ query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
303
+ d["min_date"], d["max_date"] = date_range
304
+ if lat_range is not None:
305
+ query_string.append('and checklist."LATITUDE" >= :min_lat')
306
+ query_string.append('and checklist."LATITUDE" <= :max_lat')
307
+ d["min_lat"], d["max_lat"] = lat_range
308
+ if lng_range is not None:
309
+ query_string.append('and checklist."LONGITUDE" >= :min_lng')
310
+ query_string.append('and checklist."LONGITUDE" <= :max_lng')
311
+ d["min_lng"], d["max_lng"] = lng_range
312
+ # Ask about the bird.
313
+ query_string.append('and observation."COMMON NAME" = :bird')
314
+ d["bird"] = bird.name
315
+ # Runs the query.
316
+ query_string = " ".join(query_string)
317
+ if verbose:
318
+ print("Query:", query_string)
319
+ rows = self.execute_query((query_string, d))
320
+ counts = defaultdict(int)
321
+ for r in rows:
322
+ counts[r[0]] = r[1]
323
+ checklists_df["Count"] = checklists_df.apply(lambda row : counts[row["SAMPLING EVENT IDENTIFIER"]], axis=1)
324
+ return checklists_df
325
+
326
+ def get_squares_with_bird(self, bird, max_dist=1, breeding=False,
327
+ date_range=None, lat_range=None, lng_range=None,
328
+ state=None, verbose=False):
329
+ """Gets all the squares where a bird has been sighted. This is used
330
+ primarily to refine the terrain resistance.
331
+ :param bird: Common name of the bird
332
+ :param max_dist: max length of the checklist in Km
333
+ :param breeding: whether to consider only the breeding period or not
334
+ :param date_range: date range in years, as a string tuple of yyyy-mm-dd dates
335
+ :param lat_range: range of latitudes to consider, as number tuple, optional.
336
+ :param lng_range: range of longitudes to consider, as number tuple, optional.
337
+ :param state: state, to limit the query. Example: "US-CA"
338
+ :param verbose: if True, more debugging information is printed.
339
+ :return: List of squares with the bird.
340
+ """
341
+ query_string = [
342
+ 'select DISTINCT checklist.SQUARE',
343
+ 'from checklist join observation on',
344
+ 'checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
345
+ 'where observation."COMMON NAME" = :bird',
346
+ 'and checklist."STATE CODE" = :state',
347
+ 'and checklist."ALL SPECIES REPORTED" = 1',
348
+ ]
349
+ d = {'dist': max_dist ,'bird': bird, 'state': state}
350
+ query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
351
+ query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
352
+ # Adds breeding portion
353
+ if breeding:
354
+ query_string.extend([
355
+ 'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
356
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
357
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")'
358
+ ])
359
+ if date_range is not None:
360
+ query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
361
+ query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
362
+ d["min_date"], d["max_date"] = date_range
363
+ if lat_range is not None:
364
+ query_string.append('and checklist."LATITUDE" >= :min_lat')
365
+ query_string.append('and checklist."LATITUDE" <= :max_lat')
366
+ d["min_lat"], d["max_lat"] = lat_range
367
+ if lng_range is not None:
368
+ query_string.append('and checklist."LONGITUDE" >= :min_lng')
369
+ query_string.append('and checklist."LONGITUDE" <= :max_lng')
370
+ d["min_lng"], d["max_lng"] = lng_range
371
+ # Runs the query.
372
+ query_string = " ".join(query_string)
373
+ if verbose:
374
+ print("Query:", query_string)
375
+ r = self.execute_query((query_string, d))
376
+ return [row[0] for row in r]
377
+
378
+
379
+ def get_observation_ratios(self, bird, min_checklists, bigsquare=False,
380
+ max_dist=1, verbose=False,
381
+ state=None, breeding=True):
382
+ """This function is not used now. It was the old, deprecated way of
383
+ doing validation, and we are keeping the code for reference only."""
384
+ # First, I create a dictionary of squares to checklist counts.
385
+ query_string = [
386
+ 'select "SAMPLING EVENT IDENTIFIER",',
387
+ 'BIGSQUARE' if bigsquare else 'SQUARE',
388
+ 'from checklist where',
389
+ '"ALL SPECIES REPORTED" = 1',
390
+ ]
391
+ d = {'dist': max_dist}
392
+ query_string.append('and "PROTOCOL TYPE" != "Incidental"')
393
+ query_string.append('and "EFFORT DISTANCE KM" <= :dist')
394
+ if state is not None:
395
+ query_string.append('and "STATE CODE" = :state')
396
+ d['state'] = state
397
+ # Adds breeding portion
398
+ if breeding:
399
+ query_string.extend([
400
+ 'and (substr("OBSERVATION DATE", 6, 2) = "04"',
401
+ 'OR substr("OBSERVATION DATE", 6, 2) = "05"',
402
+ 'OR substr("OBSERVATION DATE", 6, 2) = "06")'
403
+ ])
404
+ query_string = " ".join(query_string)
405
+ if verbose:
406
+ print("Query:", query_string)
407
+ observations = self.execute_query((query_string, d))
408
+ checklists_per_square = defaultdict(int)
409
+ for _, sq in observations:
410
+ checklists_per_square[sq] += 1
411
+ # Now I keep only the squares with a minimum of checklists.
412
+ checklists_per_square = {sq: c for sq, c in checklists_per_square.items() if c >= min_checklists}
413
+ # Ok, I care only about these squares.
414
+ # Now I want to know, for each of these squares, how many checklists there are that
415
+ # contain the bird.
416
+ query_string = [
417
+ 'select DISTINCT checklist."SAMPLING EVENT IDENTIFIER",',
418
+ 'checklist.BIGSQUARE' if bigsquare else 'checklist.SQUARE',
419
+ 'from checklist join observation on',
420
+ 'checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
421
+ 'where observation."COMMON NAME" = :bird',
422
+ 'and checklist."ALL SPECIES REPORTED" = 1',
423
+ ]
424
+ d = {'dist': max_dist ,'bird': bird}
425
+ query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
426
+ query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
427
+ if state is not None:
428
+ query_string.append('and checklist."STATE CODE" = :state')
429
+ d['state'] = state
430
+ if breeding:
431
+ query_string.extend([
432
+ 'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
433
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
434
+ 'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")',
435
+ ])
436
+ query_string = " ".join(query_string)
437
+ if verbose:
438
+ print("Query:", query_string)
439
+ observations = self.execute_query((query_string, d))
440
+ good_checklists_per_square = defaultdict(int)
441
+ for _, sq in observations:
442
+ if sq in checklists_per_square: # Otherwise, too few observations.
443
+ good_checklists_per_square[sq] += 1
444
+ for sq in checklists_per_square:
445
+ if good_checklists_per_square[sq] > checklists_per_square[sq]:
446
+ print("Too many checklists at", sq, good_checklists_per_square[sq], checklists_per_square[sq])
447
+ return {sq: (good_checklists_per_square[sq] / checklists_per_square[sq])
448
+ for sq in checklists_per_square}
449
+
450
+
451
+ def format_coords(coords, bigsquare=False):
452
+ """
453
+ formats coords from the eBird database format '4406;-12131' to
454
+ tuple (44.06, -121.31) for (lat, lng) in WGS84 format
455
+ :param coords (str): coordinates in eBird database format (ie '4406;-12131')
456
+ :param bigsquare (bool): option is used in case these are big squares (one less decimal).
457
+ :returns: tuple (lat, long)
458
+ """
459
+ lat, long = coords.split(';')
460
+ # Note that we have to use a - sign here for longitude, since these are negative
461
+ # numbers, and since awk rounds towards zero, rather than towards negative infinity.
462
+ if bigsquare:
463
+ lat = float(lat[:-1] + '.' + lat[-1:]) + 0.05
464
+ long = float(long[:-1] + '.' + long[-1:]) - 0.05
465
+ else:
466
+ lat = float(lat[:-2] + '.' + lat[-2:]) + 0.005
467
+ long = float(long[:-2] + '.' + long[-2:]) - 0.005
468
+ return (lat, long)
469
+
470
+
471
+ def transform_coords(geotiff, coord):
472
+ """
473
+ transforms WGS84 coordinates to the same projection as the given geotiff
474
+ :param geotiff (scgt.GeoTiff): geotiff which we want our coordinates to map to
475
+ :param coords: tuple of 2 floats (lat, lng), representing coordinates in WGS84 format
476
+ :returns: tuple (lat, long) in the CRS of geotiff
477
+ """
478
+ lat, long = coord
479
+ transformer = Transformer.from_crs("WGS84", CRS.from_user_input(geotiff.crs), always_xy=True)
480
+ xx, yy = transformer.transform(long, lat)
481
+ return (xx, yy)
482
+
483
+
484
+ """
485
+ A module for common functionality in the validaiton process using ebird data
486
+ """
487
+ class Validation(object):
488
+
489
+ def __init__(self, obs_fn, geotiff_fn):
490
+ """
491
+ Generates a class for validation.
492
+ It first tries to read the cached version of obs_fn for the specified geotiff_fn.
493
+ If the cached version is not found, it is created.
494
+ The cached version contains pre-translated coordinates to pixel values.
495
+ :param obs_fn: Observations filename.
496
+ :param geotiff_fn: name of a geotiff (repopulation is preferred) used
497
+ for translating coordinates to pixel coordinates.
498
+ """
499
+ self.obs_fn = obs_fn
500
+ self.geotiff_fn = geotiff_fn
501
+ h = hashlib.sha1(obs_fn.encode('utf-8'))
502
+ h.update(geotiff_fn.encode('utf-8'))
503
+ cached_fn = obs_fn + "." + h.hexdigest() + ".csv"
504
+ if not os.path.exists(cached_fn):
505
+ self._create_cached_observations(cached_fn)
506
+ self.observations = pd.read_csv(cached_fn)
507
+
508
+
509
+ def _create_cached_observations(self, cached_fn):
510
+ """Creates a cached version of the observations that also contains
511
+ pixel coordinates."""
512
+ geotiff = GeoTiff.from_file(self.geotiff_fn)
513
+ def f(row):
514
+ square = row["Square"]
515
+ if (isinstance(square, str)):
516
+ coords = format_coords(square)
517
+ else:
518
+ coords = square
519
+ lat, lng = coords
520
+ pix_x, pix_y = transform_coords(geotiff, coords)
521
+ return lat, lng, pix_x, pix_y
522
+ df = pd.read_csv(self.obs_fn)
523
+ df["lat"], df["lng"], df["pix_x"], df["pix_y"] = zip(*df.apply(f, axis=1))
524
+ df.to_csv(cached_fn)
525
+
526
+
527
+ def filter_CA_rectangle(self, observation_ratios, bigsquare=False):
528
+ """
529
+ Filters observation ratios, keeping only the ones in California.
530
+ :param observation_ratios: list of tuples (square, observation_ratio)
531
+ :returns: list of tuples (square, observation_ratio) with only squares in CA
532
+ """
533
+ # California rectangle
534
+ ca_lng_max = -113
535
+ ca_lng_min = -125
536
+ ca_lat_max = 43
537
+ ca_lat_min = 32
538
+ result = {}
539
+ for square, ratio in observation_ratios.items():
540
+ lat, lng = format_coords(square, bigsquare=bigsquare)
541
+ if ca_lat_min <= lat <= ca_lat_max and ca_lng_min <= lng <= ca_lng_max:
542
+ result[square] = ratio
543
+ return result
544
+
545
+ def plot_observations(self, observation_ratios, hab_fn, output_path,
546
+ bigsquare=False, obs_multiplier=1):
547
+ """
548
+ Creates a Geotiff with the observation ratios plotted
549
+ :param observation_ratios: list of tuples (square, observation_ratio)
550
+ :param hab_fn: file path to the habitat geotiff to clone
551
+ :param output_path: file path to create our new geotiff
552
+ :param obs_multiplier: scalar to multiply the observation_ratios by
553
+ """
554
+ tile_scale = 30 if bigsquare else 3
555
+ with GeoTiff.from_file(hab_fn) as hab_f:
556
+ with hab_f.clone_shape(output_path, no_data_value=-1, dtype='float32') as obsTiff:
557
+ for (square, observed) in observation_ratios:
558
+ if (isinstance(square, str)):
559
+ square = format_coords(square, bigsquare=bigsquare)
560
+ coord = transform_coords(obsTiff, square)
561
+ obsTiff.set_tile_from_coord(coord, observed * obs_multiplier, tile_scale)
562
+
563
+ ### Correlation Functions ###
564
+ def get_df_correlation(self, df):
565
+ return df.corr()
566
+
567
+ # Weighted correlation coefficent
568
+ def weighted_correlation(self, df):
569
+ '''
570
+ :param df: dataframe with 3 columns: 'repop', 'obs_ratio', and 'weight'
571
+ :returns: the weighted correlation coefficent of the df
572
+ '''
573
+ # Weighted Mean
574
+ def m(x, w):
575
+ return np.sum(x * w) / np.sum(w)
576
+
577
+ # Weighted Covariance
578
+ def cov(x, y, w):
579
+ return np.sum(w * (x - m(x, w)) * (y - m(y, w))) / np.sum(w)
580
+
581
+ # Weighted Correlation
582
+ return cov(df['repop'], df['obs_ratio'], df['weight']) / np.sqrt(cov(df['repop'], df['repop'], df['weight']) * cov(df['obs_ratio'], df['obs_ratio'], df['weight']))
583
+
584
+
585
+ def weighted_repop_to_observation_ratio_df(
586
+ self, repop_tif, hab, observation_ratios, bigsquare=False,
587
+ tile_scale=4, weighted_tile_size=100):
588
+ '''
589
+ :param repop_tif: repopulation geotiff
590
+ :param hab: habitat geotiff used to compute repop
591
+ :param observation_ratios: list of pairs (square, observation ratio) from ebird.
592
+ :param tile_scale: percentage of habitat the tile must contain to be considered "in habitat" if being refined by hab
593
+ :param tile_scale: size of the tile around square
594
+ :param weighted_tile_size: size of the tile to attribute grouped weights to
595
+ :returns: a dataframe with columns repopulation, observation ratio, and weights
596
+ '''
597
+ assert repop_tif.crs == hab.crs, "Repopulation and habitat geotiffs must have the same CRS"
598
+ assert repop_tif.size == hab.size, "Repopulation and habitat geotiffs must have the same size"
599
+ df = pd.DataFrame(columns=['repop', 'hab', 'max_repop', 'max_hab', 'obs_ratio', 'lat', 'lng', 'x', 'y', ])
600
+ count = defaultdict(int)
601
+ for (square, ratio) in observation_ratios:
602
+ if (isinstance(square, str)):
603
+ coords = format_coords(square, bigsquare=bigsquare)
604
+ else:
605
+ coords = square
606
+ lat, lng = coords
607
+ coords = transform_coords(repop_tif, coords)
608
+ repop_tile = repop_tif.get_tile_from_coord(coords, tile_scale=tile_scale)
609
+ hab_tile = hab.get_tile_from_coord(hab, coords, tile_scale=tile_scale)
610
+ if repop_tile is None or hab_tile is None:
611
+ continue
612
+ x, y = repop_tif.get_pixel_from_coord(coords)
613
+ x_floor = x // weighted_tile_size
614
+ y_floor = y // weighted_tile_size
615
+ count[(x_floor, y_floor)] += 1
616
+ # df = df.append(
617
+ df = pd.concat([df, pd.DataFrame.from_records([
618
+ {'repop': np.average(repop_tile.m),
619
+ 'hab': np.average(hab_tile.m),
620
+ 'max_repop': np.max(repop_tile.m),
621
+ 'max_hab': np.max(hab_tile.m),
622
+ 'obs_ratio': ratio,
623
+ 'lat': lat,
624
+ 'lng': lng,
625
+ 'x': x,
626
+ 'y': y,
627
+ }])])
628
+ # Now adds the weight column.
629
+ df['weight'] = df.apply(lambda row:
630
+ 1 / count[(row.x // weighted_tile_size, row.y // weighted_tile_size)], axis=1)
631
+ return df
632
+
633
+ def get_repop_ratios(self, repop_tif, hab_tif, tile_scale=3, div_by_255=False):
634
+ """
635
+ Takes as input a dataframe containing columns Square (and possibly other columns), and
636
+ adds to it columns for the total repopulation and amount of habitat.
637
+ :param repop_tif: repopulation geotiff
638
+ :param hab_tif: habitat geotiff used to compute repop
639
+ :param tile_scale: size of the tile around square
640
+ """
641
+ df = self.observations.copy()
642
+ def f(row):
643
+ coords = (row["pix_x"], row["pix_y"])
644
+ repop_tile = repop_tif.get_tile_from_coord(coords, tile_scale=tile_scale)
645
+ hab_tile = hab_tif.get_tile_from_coord(coords, tile_scale=tile_scale)
646
+ if repop_tile is None or hab_tile is None:
647
+ return pd.NA, pd.NA, pd.NA, pd.NA
648
+ avg_repop = np.average(repop_tile.m)
649
+ avg_hab = np.average(hab_tile.m)
650
+ max_repop = np.max(repop_tile.m)
651
+ max_hab = np.max(hab_tile.m)
652
+ if div_by_255:
653
+ avg_repop /= 255.
654
+ max_repop /= 255.
655
+ return avg_repop, avg_hab, max_repop, max_hab
656
+ df["avg_repop"], df["avg_hab"], df["max_repop"], df["max_hab"] = zip(*df.apply(f, axis=1))
657
+ return df
658
+
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: ecoscape-utilities
3
- Version: 0.0.25
3
+ Version: 0.0.32
4
4
  Summary: A collection of EcoScape utilities.
5
5
  Author-email: Luca de Alfaro <luca@ucsc.edu>, Coen Adler <ctadler@ucsc.edu>, Artie Nazarov <anazarov@ucsc.edu>, Natalia Ocampo-Peñuela <nocampop@ucsc.edu>, Jasmine Tai <cjtai@ucsc.edu>, Natalie Valett <nvalett@ucsc.edu>
6
6
  Project-URL: Homepage, https://github.com/ecoscape-earth/ecoscape-utilities
@@ -11,6 +11,11 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.7
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE.md
14
+ Requires-Dist: pandas>=1.0.0
15
+ Requires-Dist: scgt>=0.0.20
16
+ Requires-Dist: pyproj>=3.0.0
17
+ Requires-Dist: numpy>=1.0.0
18
+ Dynamic: license-file
14
19
 
15
20
  # EcoScape Utilities
16
21
 
@@ -4,7 +4,9 @@ pyproject.toml
4
4
  requirements.txt
5
5
  ecoscape_utilities/__init__.py
6
6
  ecoscape_utilities/bird_runs.py
7
+ ecoscape_utilities/ebird_db.py
7
8
  ecoscape_utilities.egg-info/PKG-INFO
8
9
  ecoscape_utilities.egg-info/SOURCES.txt
9
10
  ecoscape_utilities.egg-info/dependency_links.txt
11
+ ecoscape_utilities.egg-info/requires.txt
10
12
  ecoscape_utilities.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ pandas>=1.0.0
2
+ scgt>=0.0.20
3
+ pyproj>=3.0.0
4
+ numpy>=1.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ecoscape-utilities"
7
- version = "0.0.25"
7
+ version = "0.0.32"
8
8
  authors = [
9
9
  {name="Luca de Alfaro", email="luca@ucsc.edu"},
10
10
  {name="Coen Adler", email="ctadler@ucsc.edu"},
@@ -0,0 +1,4 @@
1
+ pandas>=1.0.0
2
+ scgt>=0.0.20
3
+ pyproj>=3.0.0
4
+ numpy>=1.0.0
File without changes