ecoscape-utilities 0.0.25__py3-none-any.whl → 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ecoscape-utilities might be problematic. Click here for more details.
- ecoscape_utilities/__init__.py +1 -0
- ecoscape_utilities/ebird_db.py +658 -0
- {ecoscape_utilities-0.0.25.dist-info → ecoscape_utilities-0.0.32.dist-info}/METADATA +7 -2
- ecoscape_utilities-0.0.32.dist-info/RECORD +8 -0
- {ecoscape_utilities-0.0.25.dist-info → ecoscape_utilities-0.0.32.dist-info}/WHEEL +1 -1
- ecoscape_utilities-0.0.25.dist-info/RECORD +0 -7
- {ecoscape_utilities-0.0.25.dist-info → ecoscape_utilities-0.0.32.dist-info/licenses}/LICENSE.md +0 -0
- {ecoscape_utilities-0.0.25.dist-info → ecoscape_utilities-0.0.32.dist-info}/top_level.txt +0 -0
ecoscape_utilities/__init__.py
CHANGED
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import numpy as np
|
|
3
|
+
import os
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import sqlite3
|
|
6
|
+
from sqlite3 import Error
|
|
7
|
+
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from pyproj import Transformer
|
|
10
|
+
from pyproj.crs import CRS
|
|
11
|
+
|
|
12
|
+
from scgt import GeoTiff
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
A module for interaction with a sqlite database. Contains functions for query execution,
|
|
16
|
+
and some common functionality we need to run on the DB
|
|
17
|
+
"""
|
|
18
|
+
class Connection:
|
|
19
|
+
def __init__(self, db_file):
|
|
20
|
+
"""Initializes the connection to the SQLite database
|
|
21
|
+
@param db_file: The file path to the database file
|
|
22
|
+
"""
|
|
23
|
+
conn = None
|
|
24
|
+
try:
|
|
25
|
+
conn = sqlite3.connect(db_file)
|
|
26
|
+
except Error as e:
|
|
27
|
+
print("Error in create_connection: ", e)
|
|
28
|
+
self.conn = conn
|
|
29
|
+
|
|
30
|
+
def __enter__(self):
|
|
31
|
+
return self
|
|
32
|
+
|
|
33
|
+
def __exit__(self, type, value, traceback):
|
|
34
|
+
if self.conn:
|
|
35
|
+
self.conn.close()
|
|
36
|
+
|
|
37
|
+
def get_cursor(self):
|
|
38
|
+
try:
|
|
39
|
+
cur = self.conn.cursor()
|
|
40
|
+
except:
|
|
41
|
+
print("error connecting to db")
|
|
42
|
+
cur = None
|
|
43
|
+
return cur
|
|
44
|
+
|
|
45
|
+
def execute_query(self, query, verbose=False):
|
|
46
|
+
"""
|
|
47
|
+
executes the given query in the database
|
|
48
|
+
:param query (str): a sqlite query to the database
|
|
49
|
+
:param verbose (boolean): flag to print out result of the query
|
|
50
|
+
:returns: result of the query as a list of rows
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
cur = self.get_cursor()
|
|
54
|
+
if isinstance(query, str):
|
|
55
|
+
cur.execute(query)
|
|
56
|
+
else:
|
|
57
|
+
cur.execute(query[0], query[1])
|
|
58
|
+
self.conn.commit()
|
|
59
|
+
rows = cur.fetchall()
|
|
60
|
+
if verbose:
|
|
61
|
+
for row in rows:
|
|
62
|
+
print(row)
|
|
63
|
+
return rows
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print("Error executing query:\n\t", query, ".\n Error: ", e)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class EbirdObservations(Connection):
|
|
69
|
+
"""Class for eBird-specific connections, includes functionality particular to the eBird database"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, db_file):
|
|
72
|
+
super().__init__(db_file)
|
|
73
|
+
|
|
74
|
+
def get_all_squares(self, state=None,
|
|
75
|
+
breeding=True, date_range=None,
|
|
76
|
+
lat_range=None, lng_range=None, max_dist=2,
|
|
77
|
+
verbose=False):
|
|
78
|
+
"""
|
|
79
|
+
Gets all squares with bird (any bird) observations, for a certain state,
|
|
80
|
+
and withing certain lat, lng, and date ranges.
|
|
81
|
+
:param state (str): state code
|
|
82
|
+
:param breeding (boolean): whether to filter observations by breeding months (getting only apr-june)
|
|
83
|
+
:param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
|
|
84
|
+
:param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
|
|
85
|
+
:param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
|
|
86
|
+
:param max_dist (int): max kilometers traveled for the checklist for any observation we consider
|
|
87
|
+
(any of further distance will be too noisy, and should be disreguarded)
|
|
88
|
+
:returns: list of squares which fall within the query parameters
|
|
89
|
+
"""
|
|
90
|
+
query_string=['select DISTINCT SQUARE from checklist where "ALL SPECIES REPORTED" = 1']
|
|
91
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
92
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
93
|
+
d = {"dist": max_dist}
|
|
94
|
+
if state is not None:
|
|
95
|
+
query_string.append('and "STATE CODE" = :state')
|
|
96
|
+
d['state'] = state
|
|
97
|
+
# Adds breeding portion
|
|
98
|
+
if breeding:
|
|
99
|
+
query_string.extend([
|
|
100
|
+
'and (substr("OBSERVATION DATE", 6, 2) = "04"',
|
|
101
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "05"',
|
|
102
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "06")'
|
|
103
|
+
])
|
|
104
|
+
if date_range is not None:
|
|
105
|
+
query_string.append('and "OBSERVATION DATE" >= :min_date')
|
|
106
|
+
query_string.append('and "OBSERVATION DATE" <= :max_date')
|
|
107
|
+
d["min_date"], d["max_date"] = date_range
|
|
108
|
+
if lat_range is not None:
|
|
109
|
+
query_string.append('and "LATITUDE" >= :min_lat')
|
|
110
|
+
query_string.append('and "LATITUDE" <= :max_lat')
|
|
111
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
112
|
+
if lng_range is not None:
|
|
113
|
+
query_string.append('and "LONGITUDE" >= :min_lng')
|
|
114
|
+
query_string.append('and "LONGITUDE" <= :max_lng')
|
|
115
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
116
|
+
query_string = " ".join(query_string)
|
|
117
|
+
if verbose:
|
|
118
|
+
print("Query:", query_string)
|
|
119
|
+
squares_list = self.execute_query((query_string, d))
|
|
120
|
+
return [sq[0] for sq in squares_list]
|
|
121
|
+
|
|
122
|
+
def get_square_observations(self, square, bird,
|
|
123
|
+
breeding=True, date_range=None,
|
|
124
|
+
lat_range=None, lng_range=None, max_dist=2,
|
|
125
|
+
verbose=False):
|
|
126
|
+
"""
|
|
127
|
+
Get the number of checklists, number of checklists with a bird,
|
|
128
|
+
total time, total distance, and total bird sightings, for a square.
|
|
129
|
+
:param square: tuple of 2 floats, representing (lat, lng) of the square
|
|
130
|
+
:param bird: bird
|
|
131
|
+
:param breeding (boolean): whether to filter observations by breeding months (getting only apr-june)
|
|
132
|
+
:param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
|
|
133
|
+
:param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
|
|
134
|
+
:param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
|
|
135
|
+
:param max_dist (int): max kilometers traveled for the checklist for any observation we consider
|
|
136
|
+
(any of further distance will be too noisy, and should be disreguarded)
|
|
137
|
+
:returns: num_checklists, num_bird_checklists, num_birds for the given square.
|
|
138
|
+
"""
|
|
139
|
+
# Gets the number of checklists, the total time, the total distance, and the total number of birds.
|
|
140
|
+
query_string=['select COUNT(*), SUM("EFFORT DISTANCE KM"), SUM("DURATION MINUTES")',
|
|
141
|
+
'FROM checklist where SQUARE = :square']
|
|
142
|
+
d = {'square': square}
|
|
143
|
+
query_string.append('and "ALL SPECIES REPORTED" = 1')
|
|
144
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
145
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
146
|
+
d["dist"] = max_dist
|
|
147
|
+
# Adds breeding portion
|
|
148
|
+
if breeding:
|
|
149
|
+
query_string.extend([
|
|
150
|
+
'and (substr("OBSERVATION DATE", 6, 2) = "04"',
|
|
151
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "05"',
|
|
152
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "06")'
|
|
153
|
+
])
|
|
154
|
+
if date_range is not None:
|
|
155
|
+
query_string.append('and "OBSERVATION DATE" >= :min_date')
|
|
156
|
+
query_string.append('and "OBSERVATION DATE" <= :max_date')
|
|
157
|
+
d["min_date"], d["max_date"] = date_range
|
|
158
|
+
if lat_range is not None:
|
|
159
|
+
query_string.append('and "LATITUDE" >= :min_lat')
|
|
160
|
+
query_string.append('and "LATITUDE" <= :max_lat')
|
|
161
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
162
|
+
if lng_range is not None:
|
|
163
|
+
query_string.append('and "LONGITUDE" >= :min_lng')
|
|
164
|
+
query_string.append('and "LONGITUDE" <= :max_lng')
|
|
165
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
166
|
+
query_string = " ".join(query_string)
|
|
167
|
+
if verbose:
|
|
168
|
+
print("Query:", query_string)
|
|
169
|
+
r = self.execute_query((query_string, d))
|
|
170
|
+
if r is not None:
|
|
171
|
+
num_checklists = float(r[0][0])
|
|
172
|
+
total_km = float(r[0][1])
|
|
173
|
+
total_minutes = float(r[0][2])
|
|
174
|
+
else:
|
|
175
|
+
num_checklists = 0
|
|
176
|
+
total_km = 0
|
|
177
|
+
total_minutes = 0
|
|
178
|
+
# Then, the number of checklists with the bird, and the total number of birds.
|
|
179
|
+
query_string = ['select COUNT(DISTINCT checklist."SAMPLING EVENT IDENTIFIER"),',
|
|
180
|
+
'SUM(observation."OBSERVATION COUNT")',
|
|
181
|
+
'from checklist join observation',
|
|
182
|
+
'on checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
183
|
+
]
|
|
184
|
+
query_string.append('where checklist.SQUARE = :square')
|
|
185
|
+
query_string.append('and checklist."ALL SPECIES REPORTED" = 1')
|
|
186
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
187
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
188
|
+
d["dist"] = max_dist
|
|
189
|
+
# Adds breeding portion
|
|
190
|
+
if breeding:
|
|
191
|
+
query_string.extend([
|
|
192
|
+
'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
|
|
193
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
|
|
194
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")'
|
|
195
|
+
])
|
|
196
|
+
if date_range is not None:
|
|
197
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
198
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
199
|
+
d["min_date"], d["max_date"] = date_range
|
|
200
|
+
if lat_range is not None:
|
|
201
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
202
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
203
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
204
|
+
if lng_range is not None:
|
|
205
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
206
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
207
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
208
|
+
# Ask about the bird.
|
|
209
|
+
query_string.append('and observation."COMMON NAME" = :bird')
|
|
210
|
+
d["bird"] = bird.name
|
|
211
|
+
# Runs the query.
|
|
212
|
+
query_string = " ".join(query_string)
|
|
213
|
+
if verbose:
|
|
214
|
+
print("Query:", query_string)
|
|
215
|
+
r = self.execute_query((query_string, d))
|
|
216
|
+
if r is None:
|
|
217
|
+
num_birds = 0
|
|
218
|
+
num_bird_checklists = 0
|
|
219
|
+
else:
|
|
220
|
+
r = r[0]
|
|
221
|
+
num_bird_checklists = float(r[0])
|
|
222
|
+
num_birds = 0 if r[1] is None else float(r[1])
|
|
223
|
+
return dict(
|
|
224
|
+
num_checklists=num_checklists,
|
|
225
|
+
num_bird_checklists=num_bird_checklists,
|
|
226
|
+
num_birds=num_birds,
|
|
227
|
+
total_km=total_km,
|
|
228
|
+
total_minutes=total_minutes,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def get_square_individual_checklists(self, square, bird,
|
|
232
|
+
breeding=True, date_range=None,
|
|
233
|
+
lat_range=None, lng_range=None, max_dist=2,
|
|
234
|
+
verbose=False):
|
|
235
|
+
"""
|
|
236
|
+
Get the checklists for a square, so that statistics can be computed.
|
|
237
|
+
The result is returned as a dataframe.
|
|
238
|
+
|
|
239
|
+
and total bird sightings, for a square.
|
|
240
|
+
:param square: tuple of 2 floats, representing (lat, lng) of the square
|
|
241
|
+
:param bird (str): name of bird
|
|
242
|
+
:param breeding (boolean): whether to filter observations by breeding months (getting only apr-june)
|
|
243
|
+
:param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
|
|
244
|
+
:param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
|
|
245
|
+
:param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
|
|
246
|
+
:param max_dist (int): max kilometers traveled for the checklist for any observation we consider
|
|
247
|
+
(any of further distance will be too noisy, and should be disreguarded)
|
|
248
|
+
:returns: list of squares which fall within the query parameters
|
|
249
|
+
"""
|
|
250
|
+
# First the checklists, with or without the bird.
|
|
251
|
+
query_string=['select DISTINCT("SAMPLING EVENT IDENTIFIER")',
|
|
252
|
+
'FROM checklist where SQUARE = :square']
|
|
253
|
+
d = {'square': square}
|
|
254
|
+
query_string.append('and "ALL SPECIES REPORTED" = 1')
|
|
255
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
256
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
257
|
+
d["dist"] = max_dist
|
|
258
|
+
# Adds breeding portion
|
|
259
|
+
if breeding:
|
|
260
|
+
query_string.extend([
|
|
261
|
+
'and (substr("OBSERVATION DATE", 6, 2) = "04"',
|
|
262
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "05"',
|
|
263
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "06")'
|
|
264
|
+
])
|
|
265
|
+
if date_range is not None:
|
|
266
|
+
query_string.append('and "OBSERVATION DATE" >= :min_date')
|
|
267
|
+
query_string.append('and "OBSERVATION DATE" <= :max_date')
|
|
268
|
+
d["min_date"], d["max_date"] = date_range
|
|
269
|
+
if lat_range is not None:
|
|
270
|
+
query_string.append('and "LATITUDE" >= :min_lat')
|
|
271
|
+
query_string.append('and "LATITUDE" <= :max_lat')
|
|
272
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
273
|
+
if lng_range is not None:
|
|
274
|
+
query_string.append('and "LONGITUDE" >= :min_lng')
|
|
275
|
+
query_string.append('and "LONGITUDE" <= :max_lng')
|
|
276
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
277
|
+
query_string = " ".join(query_string)
|
|
278
|
+
if verbose:
|
|
279
|
+
print("Query:", query_string)
|
|
280
|
+
checklists_df = pd.read_sql_query(query_string, self.conn, params=d)
|
|
281
|
+
|
|
282
|
+
# Then, the number of checklists with the bird, and the total number of birds.
|
|
283
|
+
query_string = ['select checklist."SAMPLING EVENT IDENTIFIER", ',
|
|
284
|
+
'observation."OBSERVATION COUNT"',
|
|
285
|
+
'from checklist join observation',
|
|
286
|
+
'on checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
287
|
+
]
|
|
288
|
+
query_string.append('where checklist.SQUARE = :square')
|
|
289
|
+
query_string.append('and checklist."ALL SPECIES REPORTED" = 1')
|
|
290
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
291
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
292
|
+
d["dist"] = max_dist
|
|
293
|
+
# Adds breeding portion
|
|
294
|
+
if breeding:
|
|
295
|
+
query_string.extend([
|
|
296
|
+
'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
|
|
297
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
|
|
298
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")'
|
|
299
|
+
])
|
|
300
|
+
if date_range is not None:
|
|
301
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
302
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
303
|
+
d["min_date"], d["max_date"] = date_range
|
|
304
|
+
if lat_range is not None:
|
|
305
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
306
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
307
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
308
|
+
if lng_range is not None:
|
|
309
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
310
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
311
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
312
|
+
# Ask about the bird.
|
|
313
|
+
query_string.append('and observation."COMMON NAME" = :bird')
|
|
314
|
+
d["bird"] = bird.name
|
|
315
|
+
# Runs the query.
|
|
316
|
+
query_string = " ".join(query_string)
|
|
317
|
+
if verbose:
|
|
318
|
+
print("Query:", query_string)
|
|
319
|
+
rows = self.execute_query((query_string, d))
|
|
320
|
+
counts = defaultdict(int)
|
|
321
|
+
for r in rows:
|
|
322
|
+
counts[r[0]] = r[1]
|
|
323
|
+
checklists_df["Count"] = checklists_df.apply(lambda row : counts[row["SAMPLING EVENT IDENTIFIER"]], axis=1)
|
|
324
|
+
return checklists_df
|
|
325
|
+
|
|
326
|
+
def get_squares_with_bird(self, bird, max_dist=1, breeding=False,
|
|
327
|
+
date_range=None, lat_range=None, lng_range=None,
|
|
328
|
+
state=None, verbose=False):
|
|
329
|
+
"""Gets all the squares where a bird has been sighted. This is used
|
|
330
|
+
primarily to refine the terrain resistance.
|
|
331
|
+
:param bird: Common name of the bird
|
|
332
|
+
:param max_dist: max length of the checklist in Km
|
|
333
|
+
:param breeding: whether to consider only the breeding period or not
|
|
334
|
+
:param date_range: date range in years, as a string tuple of yyyy-mm-dd dates
|
|
335
|
+
:param lat_range: range of latitudes to consider, as number tuple, optional.
|
|
336
|
+
:param lng_range: range of longitudes to consider, as number tuple, optional.
|
|
337
|
+
:param state: state, to limit the query. Example: "US-CA"
|
|
338
|
+
:param verbose: if True, more debugging information is printed.
|
|
339
|
+
:return: List of squares with the bird.
|
|
340
|
+
"""
|
|
341
|
+
query_string = [
|
|
342
|
+
'select DISTINCT checklist.SQUARE',
|
|
343
|
+
'from checklist join observation on',
|
|
344
|
+
'checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
345
|
+
'where observation."COMMON NAME" = :bird',
|
|
346
|
+
'and checklist."STATE CODE" = :state',
|
|
347
|
+
'and checklist."ALL SPECIES REPORTED" = 1',
|
|
348
|
+
]
|
|
349
|
+
d = {'dist': max_dist ,'bird': bird, 'state': state}
|
|
350
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
351
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
352
|
+
# Adds breeding portion
|
|
353
|
+
if breeding:
|
|
354
|
+
query_string.extend([
|
|
355
|
+
'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
|
|
356
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
|
|
357
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")'
|
|
358
|
+
])
|
|
359
|
+
if date_range is not None:
|
|
360
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
361
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
362
|
+
d["min_date"], d["max_date"] = date_range
|
|
363
|
+
if lat_range is not None:
|
|
364
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
365
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
366
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
367
|
+
if lng_range is not None:
|
|
368
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
369
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
370
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
371
|
+
# Runs the query.
|
|
372
|
+
query_string = " ".join(query_string)
|
|
373
|
+
if verbose:
|
|
374
|
+
print("Query:", query_string)
|
|
375
|
+
r = self.execute_query((query_string, d))
|
|
376
|
+
return [row[0] for row in r]
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def get_observation_ratios(self, bird, min_checklists, bigsquare=False,
|
|
380
|
+
max_dist=1, verbose=False,
|
|
381
|
+
state=None, breeding=True):
|
|
382
|
+
"""This function is not used now. It was the old, deprecated way of
|
|
383
|
+
doing validation, and we are keeping the code for reference only."""
|
|
384
|
+
# First, I create a dictionary of squares to checklist counts.
|
|
385
|
+
query_string = [
|
|
386
|
+
'select "SAMPLING EVENT IDENTIFIER",',
|
|
387
|
+
'BIGSQUARE' if bigsquare else 'SQUARE',
|
|
388
|
+
'from checklist where',
|
|
389
|
+
'"ALL SPECIES REPORTED" = 1',
|
|
390
|
+
]
|
|
391
|
+
d = {'dist': max_dist}
|
|
392
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
393
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
394
|
+
if state is not None:
|
|
395
|
+
query_string.append('and "STATE CODE" = :state')
|
|
396
|
+
d['state'] = state
|
|
397
|
+
# Adds breeding portion
|
|
398
|
+
if breeding:
|
|
399
|
+
query_string.extend([
|
|
400
|
+
'and (substr("OBSERVATION DATE", 6, 2) = "04"',
|
|
401
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "05"',
|
|
402
|
+
'OR substr("OBSERVATION DATE", 6, 2) = "06")'
|
|
403
|
+
])
|
|
404
|
+
query_string = " ".join(query_string)
|
|
405
|
+
if verbose:
|
|
406
|
+
print("Query:", query_string)
|
|
407
|
+
observations = self.execute_query((query_string, d))
|
|
408
|
+
checklists_per_square = defaultdict(int)
|
|
409
|
+
for _, sq in observations:
|
|
410
|
+
checklists_per_square[sq] += 1
|
|
411
|
+
# Now I keep only the squares with a minimum of checklists.
|
|
412
|
+
checklists_per_square = {sq: c for sq, c in checklists_per_square.items() if c >= min_checklists}
|
|
413
|
+
# Ok, I care only about these squares.
|
|
414
|
+
# Now I want to know, for each of these squares, how many checklists there are that
|
|
415
|
+
# contain the bird.
|
|
416
|
+
query_string = [
|
|
417
|
+
'select DISTINCT checklist."SAMPLING EVENT IDENTIFIER",',
|
|
418
|
+
'checklist.BIGSQUARE' if bigsquare else 'checklist.SQUARE',
|
|
419
|
+
'from checklist join observation on',
|
|
420
|
+
'checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
421
|
+
'where observation."COMMON NAME" = :bird',
|
|
422
|
+
'and checklist."ALL SPECIES REPORTED" = 1',
|
|
423
|
+
]
|
|
424
|
+
d = {'dist': max_dist ,'bird': bird}
|
|
425
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
426
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
427
|
+
if state is not None:
|
|
428
|
+
query_string.append('and checklist."STATE CODE" = :state')
|
|
429
|
+
d['state'] = state
|
|
430
|
+
if breeding:
|
|
431
|
+
query_string.extend([
|
|
432
|
+
'and (substr(checklist."OBSERVATION DATE", 6, 2) = "04"',
|
|
433
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "05"',
|
|
434
|
+
'OR substr(checklist."OBSERVATION DATE", 6, 2) = "06")',
|
|
435
|
+
])
|
|
436
|
+
query_string = " ".join(query_string)
|
|
437
|
+
if verbose:
|
|
438
|
+
print("Query:", query_string)
|
|
439
|
+
observations = self.execute_query((query_string, d))
|
|
440
|
+
good_checklists_per_square = defaultdict(int)
|
|
441
|
+
for _, sq in observations:
|
|
442
|
+
if sq in checklists_per_square: # Otherwise, too few observations.
|
|
443
|
+
good_checklists_per_square[sq] += 1
|
|
444
|
+
for sq in checklists_per_square:
|
|
445
|
+
if good_checklists_per_square[sq] > checklists_per_square[sq]:
|
|
446
|
+
print("Too many checklists at", sq, good_checklists_per_square[sq], checklists_per_square[sq])
|
|
447
|
+
return {sq: (good_checklists_per_square[sq] / checklists_per_square[sq])
|
|
448
|
+
for sq in checklists_per_square}
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def format_coords(coords, bigsquare=False):
|
|
452
|
+
"""
|
|
453
|
+
formats coords from the eBird database format '4406;-12131' to
|
|
454
|
+
tuple (44.06, -121.31) for (lat, lng) in WGS84 format
|
|
455
|
+
:param coords (str): coordinates in eBird database format (ie '4406;-12131')
|
|
456
|
+
:param bigsquare (bool): option is used in case these are big squares (one less decimal).
|
|
457
|
+
:returns: tuple (lat, long)
|
|
458
|
+
"""
|
|
459
|
+
lat, long = coords.split(';')
|
|
460
|
+
# Note that we have to use a - sign here for longitude, since these are negative
|
|
461
|
+
# numbers, and since awk rounds towards zero, rather than towards negative infinity.
|
|
462
|
+
if bigsquare:
|
|
463
|
+
lat = float(lat[:-1] + '.' + lat[-1:]) + 0.05
|
|
464
|
+
long = float(long[:-1] + '.' + long[-1:]) - 0.05
|
|
465
|
+
else:
|
|
466
|
+
lat = float(lat[:-2] + '.' + lat[-2:]) + 0.005
|
|
467
|
+
long = float(long[:-2] + '.' + long[-2:]) - 0.005
|
|
468
|
+
return (lat, long)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def transform_coords(geotiff, coord):
|
|
472
|
+
"""
|
|
473
|
+
transforms WGS84 coordinates to the same projection as the given geotiff
|
|
474
|
+
:param geotiff (scgt.GeoTiff): geotiff which we want our coordinates to map to
|
|
475
|
+
:param coords: tuple of 2 floats (lat, lng), representing coordinates in WGS84 format
|
|
476
|
+
:returns: tuple (lat, long) in the CRS of geotiff
|
|
477
|
+
"""
|
|
478
|
+
lat, long = coord
|
|
479
|
+
transformer = Transformer.from_crs("WGS84", CRS.from_user_input(geotiff.crs), always_xy=True)
|
|
480
|
+
xx, yy = transformer.transform(long, lat)
|
|
481
|
+
return (xx, yy)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
"""
|
|
485
|
+
A module for common functionality in the validaiton process using ebird data
|
|
486
|
+
"""
|
|
487
|
+
class Validation(object):
|
|
488
|
+
|
|
489
|
+
def __init__(self, obs_fn, geotiff_fn):
|
|
490
|
+
"""
|
|
491
|
+
Generates a class for validation.
|
|
492
|
+
It first tries to read the cached version of obs_fn for the specified geotiff_fn.
|
|
493
|
+
If the cached version is not found, it is created.
|
|
494
|
+
The cached version contains pre-translated coordinates to pixel values.
|
|
495
|
+
:param obs_fn: Observations filename.
|
|
496
|
+
:param geotiff_fn: name of a geotiff (repopulation is preferred) used
|
|
497
|
+
for translating coordinates to pixel coordinates.
|
|
498
|
+
"""
|
|
499
|
+
self.obs_fn = obs_fn
|
|
500
|
+
self.geotiff_fn = geotiff_fn
|
|
501
|
+
h = hashlib.sha1(obs_fn.encode('utf-8'))
|
|
502
|
+
h.update(geotiff_fn.encode('utf-8'))
|
|
503
|
+
cached_fn = obs_fn + "." + h.hexdigest() + ".csv"
|
|
504
|
+
if not os.path.exists(cached_fn):
|
|
505
|
+
self._create_cached_observations(cached_fn)
|
|
506
|
+
self.observations = pd.read_csv(cached_fn)
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _create_cached_observations(self, cached_fn):
|
|
510
|
+
"""Creates a cached version of the observations that also contains
|
|
511
|
+
pixel coordinates."""
|
|
512
|
+
geotiff = GeoTiff.from_file(self.geotiff_fn)
|
|
513
|
+
def f(row):
|
|
514
|
+
square = row["Square"]
|
|
515
|
+
if (isinstance(square, str)):
|
|
516
|
+
coords = format_coords(square)
|
|
517
|
+
else:
|
|
518
|
+
coords = square
|
|
519
|
+
lat, lng = coords
|
|
520
|
+
pix_x, pix_y = transform_coords(geotiff, coords)
|
|
521
|
+
return lat, lng, pix_x, pix_y
|
|
522
|
+
df = pd.read_csv(self.obs_fn)
|
|
523
|
+
df["lat"], df["lng"], df["pix_x"], df["pix_y"] = zip(*df.apply(f, axis=1))
|
|
524
|
+
df.to_csv(cached_fn)
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def filter_CA_rectangle(self, observation_ratios, bigsquare=False):
|
|
528
|
+
"""
|
|
529
|
+
Filters observation ratios, keeping only the ones in California.
|
|
530
|
+
:param observation_ratios: list of tuples (square, observation_ratio)
|
|
531
|
+
:returns: list of tuples (square, observation_ratio) with only squares in CA
|
|
532
|
+
"""
|
|
533
|
+
# California rectangle
|
|
534
|
+
ca_lng_max = -113
|
|
535
|
+
ca_lng_min = -125
|
|
536
|
+
ca_lat_max = 43
|
|
537
|
+
ca_lat_min = 32
|
|
538
|
+
result = {}
|
|
539
|
+
for square, ratio in observation_ratios.items():
|
|
540
|
+
lat, lng = format_coords(square, bigsquare=bigsquare)
|
|
541
|
+
if ca_lat_min <= lat <= ca_lat_max and ca_lng_min <= lng <= ca_lng_max:
|
|
542
|
+
result[square] = ratio
|
|
543
|
+
return result
|
|
544
|
+
|
|
545
|
+
def plot_observations(self, observation_ratios, hab_fn, output_path,
|
|
546
|
+
bigsquare=False, obs_multiplier=1):
|
|
547
|
+
"""
|
|
548
|
+
Creates a Geotiff with the observation ratios plotted
|
|
549
|
+
:param observation_ratios: list of tuples (square, observation_ratio)
|
|
550
|
+
:param hab_fn: file path to the habitat geotiff to clone
|
|
551
|
+
:param output_path: file path to create our new geotiff
|
|
552
|
+
:param obs_multiplier: scalar to multiply the observation_ratios by
|
|
553
|
+
"""
|
|
554
|
+
tile_scale = 30 if bigsquare else 3
|
|
555
|
+
with GeoTiff.from_file(hab_fn) as hab_f:
|
|
556
|
+
with hab_f.clone_shape(output_path, no_data_value=-1, dtype='float32') as obsTiff:
|
|
557
|
+
for (square, observed) in observation_ratios:
|
|
558
|
+
if (isinstance(square, str)):
|
|
559
|
+
square = format_coords(square, bigsquare=bigsquare)
|
|
560
|
+
coord = transform_coords(obsTiff, square)
|
|
561
|
+
obsTiff.set_tile_from_coord(coord, observed * obs_multiplier, tile_scale)
|
|
562
|
+
|
|
563
|
+
### Correlation Functions ###
|
|
564
|
+
def get_df_correlation(self, df):
|
|
565
|
+
return df.corr()
|
|
566
|
+
|
|
567
|
+
# Weighted correlation coefficent
|
|
568
|
+
def weighted_correlation(self, df):
|
|
569
|
+
'''
|
|
570
|
+
:param df: dataframe with 3 columns: 'repop', 'obs_ratio', and 'weight'
|
|
571
|
+
:returns: the weighted correlation coefficent of the df
|
|
572
|
+
'''
|
|
573
|
+
# Weighted Mean
|
|
574
|
+
def m(x, w):
|
|
575
|
+
return np.sum(x * w) / np.sum(w)
|
|
576
|
+
|
|
577
|
+
# Weighted Covariance
|
|
578
|
+
def cov(x, y, w):
|
|
579
|
+
return np.sum(w * (x - m(x, w)) * (y - m(y, w))) / np.sum(w)
|
|
580
|
+
|
|
581
|
+
# Weighted Correlation
|
|
582
|
+
return cov(df['repop'], df['obs_ratio'], df['weight']) / np.sqrt(cov(df['repop'], df['repop'], df['weight']) * cov(df['obs_ratio'], df['obs_ratio'], df['weight']))
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def weighted_repop_to_observation_ratio_df(
|
|
586
|
+
self, repop_tif, hab, observation_ratios, bigsquare=False,
|
|
587
|
+
tile_scale=4, weighted_tile_size=100):
|
|
588
|
+
'''
|
|
589
|
+
:param repop_tif: repopulation geotiff
|
|
590
|
+
:param hab: habitat geotiff used to compute repop
|
|
591
|
+
:param observation_ratios: list of pairs (square, observation ratio) from ebird.
|
|
592
|
+
:param tile_scale: percentage of habitat the tile must contain to be considered "in habitat" if being refined by hab
|
|
593
|
+
:param tile_scale: size of the tile around square
|
|
594
|
+
:param weighted_tile_size: size of the tile to attribute grouped weights to
|
|
595
|
+
:returns: a dataframe with columns repopulation, observation ratio, and weights
|
|
596
|
+
'''
|
|
597
|
+
assert repop_tif.crs == hab.crs, "Repopulation and habitat geotiffs must have the same CRS"
|
|
598
|
+
assert repop_tif.size == hab.size, "Repopulation and habitat geotiffs must have the same size"
|
|
599
|
+
df = pd.DataFrame(columns=['repop', 'hab', 'max_repop', 'max_hab', 'obs_ratio', 'lat', 'lng', 'x', 'y', ])
|
|
600
|
+
count = defaultdict(int)
|
|
601
|
+
for (square, ratio) in observation_ratios:
|
|
602
|
+
if (isinstance(square, str)):
|
|
603
|
+
coords = format_coords(square, bigsquare=bigsquare)
|
|
604
|
+
else:
|
|
605
|
+
coords = square
|
|
606
|
+
lat, lng = coords
|
|
607
|
+
coords = transform_coords(repop_tif, coords)
|
|
608
|
+
repop_tile = repop_tif.get_tile_from_coord(coords, tile_scale=tile_scale)
|
|
609
|
+
hab_tile = hab.get_tile_from_coord(hab, coords, tile_scale=tile_scale)
|
|
610
|
+
if repop_tile is None or hab_tile is None:
|
|
611
|
+
continue
|
|
612
|
+
x, y = repop_tif.get_pixel_from_coord(coords)
|
|
613
|
+
x_floor = x // weighted_tile_size
|
|
614
|
+
y_floor = y // weighted_tile_size
|
|
615
|
+
count[(x_floor, y_floor)] += 1
|
|
616
|
+
# df = df.append(
|
|
617
|
+
df = pd.concat([df, pd.DataFrame.from_records([
|
|
618
|
+
{'repop': np.average(repop_tile.m),
|
|
619
|
+
'hab': np.average(hab_tile.m),
|
|
620
|
+
'max_repop': np.max(repop_tile.m),
|
|
621
|
+
'max_hab': np.max(hab_tile.m),
|
|
622
|
+
'obs_ratio': ratio,
|
|
623
|
+
'lat': lat,
|
|
624
|
+
'lng': lng,
|
|
625
|
+
'x': x,
|
|
626
|
+
'y': y,
|
|
627
|
+
}])])
|
|
628
|
+
# Now adds the weight column.
|
|
629
|
+
df['weight'] = df.apply(lambda row:
|
|
630
|
+
1 / count[(row.x // weighted_tile_size, row.y // weighted_tile_size)], axis=1)
|
|
631
|
+
return df
|
|
632
|
+
|
|
633
|
+
def get_repop_ratios(self, repop_tif, hab_tif, tile_scale=3, div_by_255=False):
|
|
634
|
+
"""
|
|
635
|
+
Takes as input a dataframe containing columns Square (and possibly other columns), and
|
|
636
|
+
adds to it columns for the total repopulation and amount of habitat.
|
|
637
|
+
:param repop_tif: repopulation geotiff
|
|
638
|
+
:param hab_tif: habitat geotiff used to compute repop
|
|
639
|
+
:param tile_scale: size of the tile around square
|
|
640
|
+
"""
|
|
641
|
+
df = self.observations.copy()
|
|
642
|
+
def f(row):
|
|
643
|
+
coords = (row["pix_x"], row["pix_y"])
|
|
644
|
+
repop_tile = repop_tif.get_tile_from_coord(coords, tile_scale=tile_scale)
|
|
645
|
+
hab_tile = hab_tif.get_tile_from_coord(coords, tile_scale=tile_scale)
|
|
646
|
+
if repop_tile is None or hab_tile is None:
|
|
647
|
+
return pd.NA, pd.NA, pd.NA, pd.NA
|
|
648
|
+
avg_repop = np.average(repop_tile.m)
|
|
649
|
+
avg_hab = np.average(hab_tile.m)
|
|
650
|
+
max_repop = np.max(repop_tile.m)
|
|
651
|
+
max_hab = np.max(hab_tile.m)
|
|
652
|
+
if div_by_255:
|
|
653
|
+
avg_repop /= 255.
|
|
654
|
+
max_repop /= 255.
|
|
655
|
+
return avg_repop, avg_hab, max_repop, max_hab
|
|
656
|
+
df["avg_repop"], df["avg_hab"], df["max_repop"], df["max_hab"] = zip(*df.apply(f, axis=1))
|
|
657
|
+
return df
|
|
658
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ecoscape-utilities
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.32
|
|
4
4
|
Summary: A collection of EcoScape utilities.
|
|
5
5
|
Author-email: Luca de Alfaro <luca@ucsc.edu>, Coen Adler <ctadler@ucsc.edu>, Artie Nazarov <anazarov@ucsc.edu>, Natalia Ocampo-Peñuela <nocampop@ucsc.edu>, Jasmine Tai <cjtai@ucsc.edu>, Natalie Valett <nvalett@ucsc.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/ecoscape-earth/ecoscape-utilities
|
|
@@ -11,6 +11,11 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.7
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE.md
|
|
14
|
+
Requires-Dist: pandas>=1.0.0
|
|
15
|
+
Requires-Dist: scgt>=0.0.20
|
|
16
|
+
Requires-Dist: pyproj>=3.0.0
|
|
17
|
+
Requires-Dist: numpy>=1.0.0
|
|
18
|
+
Dynamic: license-file
|
|
14
19
|
|
|
15
20
|
# EcoScape Utilities
|
|
16
21
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
ecoscape_utilities/__init__.py,sha256=LXt1rL9JVsjnCmsNZwZ2aoElphIK-koaEDdW6ffZsMQ,50
|
|
2
|
+
ecoscape_utilities/bird_runs.py,sha256=v43PfH_4ojpkTE-EFOJxr0oOW3M9suNm_1zMjZ9P-eI,5409
|
|
3
|
+
ecoscape_utilities/ebird_db.py,sha256=A3xoeymVIZbnxhYwlS9Ex22NJdeUVJDI_PATT0BGL3k,31321
|
|
4
|
+
ecoscape_utilities-0.0.32.dist-info/licenses/LICENSE.md,sha256=3vh2mpA_XIR3FJot6a5F9DqktAoq45sEGIRkYjvAEeU,1304
|
|
5
|
+
ecoscape_utilities-0.0.32.dist-info/METADATA,sha256=7o3bGeSdY49LAM57i-Cknxwt3hHBGMdAHZNTfszgMw0,1382
|
|
6
|
+
ecoscape_utilities-0.0.32.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
7
|
+
ecoscape_utilities-0.0.32.dist-info/top_level.txt,sha256=jLf7iMlySaJg0Vh8z4lbAaqOc5W5ruMgKFvp797CryQ,19
|
|
8
|
+
ecoscape_utilities-0.0.32.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
ecoscape_utilities/__init__.py,sha256=Zk3uTyqbLTdAUiTEutTKG9vI82XD-DQ7SvEB-6h4pcM,26
|
|
2
|
-
ecoscape_utilities/bird_runs.py,sha256=v43PfH_4ojpkTE-EFOJxr0oOW3M9suNm_1zMjZ9P-eI,5409
|
|
3
|
-
ecoscape_utilities-0.0.25.dist-info/LICENSE.md,sha256=3vh2mpA_XIR3FJot6a5F9DqktAoq45sEGIRkYjvAEeU,1304
|
|
4
|
-
ecoscape_utilities-0.0.25.dist-info/METADATA,sha256=8ilvkRRwtKfmhQgmVmpGf-9v4ALdjEQesZkMLig-ycs,1246
|
|
5
|
-
ecoscape_utilities-0.0.25.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
6
|
-
ecoscape_utilities-0.0.25.dist-info/top_level.txt,sha256=jLf7iMlySaJg0Vh8z4lbAaqOc5W5ruMgKFvp797CryQ,19
|
|
7
|
-
ecoscape_utilities-0.0.25.dist-info/RECORD,,
|
{ecoscape_utilities-0.0.25.dist-info → ecoscape_utilities-0.0.32.dist-info/licenses}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|