ecoscape-utilities 0.0.43__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecoscape_utilities-0.0.43/LICENSE.md +10 -0
- ecoscape_utilities-0.0.43/PKG-INFO +36 -0
- ecoscape_utilities-0.0.43/README.md +17 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities/__init__.py +3 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities/bird_runs.py +108 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities/ebird_db.py +734 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities.egg-info/PKG-INFO +36 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities.egg-info/SOURCES.txt +12 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities.egg-info/dependency_links.txt +1 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities.egg-info/requires.txt +4 -0
- ecoscape_utilities-0.0.43/ecoscape_utilities.egg-info/top_level.txt +1 -0
- ecoscape_utilities-0.0.43/pyproject.toml +35 -0
- ecoscape_utilities-0.0.43/requirements.txt +4 -0
- ecoscape_utilities-0.0.43/setup.cfg +4 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Copyright 2023, The Regents of the University of California.
|
|
2
|
+
|
|
3
|
+
Redistribution and use in source and binary forms, with or without modification,
|
|
4
|
+
are permitted provided that the following conditions are met:
|
|
5
|
+
|
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
7
|
+
|
|
8
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
9
|
+
|
|
10
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ecoscape-utilities
|
|
3
|
+
Version: 0.0.43
|
|
4
|
+
Summary: A collection of EcoScape utilities.
|
|
5
|
+
Author-email: Luca de Alfaro <luca@ucsc.edu>, Coen Adler <ctadler@ucsc.edu>, Artie Nazarov <anazarov@ucsc.edu>, Natalia Ocampo-Peñuela <nocampop@ucsc.edu>, Jasmine Tai <cjtai@ucsc.edu>, Natalie Valett <nvalett@ucsc.edu>
|
|
6
|
+
Project-URL: Homepage, https://github.com/ecoscape-earth/ecoscape-utilities
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/ecoscape-earth/ecoscape-utilities/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE.md
|
|
14
|
+
Requires-Dist: pandas>=1.0.0
|
|
15
|
+
Requires-Dist: scgt>=0.0.20
|
|
16
|
+
Requires-Dist: pyproj>=3.0.0
|
|
17
|
+
Requires-Dist: numpy>=1.0.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# EcoScape Utilities
|
|
21
|
+
|
|
22
|
+
This package is simply a collection of utilities that are useful for running
|
|
23
|
+
Colab notebooks and other code.
|
|
24
|
+
These are not packages distributed with pip. To install on Colab, simply do:
|
|
25
|
+
|
|
26
|
+
!pip install git+https://github.com/ecoscape-earth/ecoscape-utilities.git
|
|
27
|
+
|
|
28
|
+
## Authors
|
|
29
|
+
|
|
30
|
+
* Luca de Alfaro (luca@ucsc.edu)
|
|
31
|
+
* Natalia Ocampo-Peñuela (nocampop@ucsc.edu)
|
|
32
|
+
* Coen Adler (ctadler@ucsc.edu)
|
|
33
|
+
* Artie Nazarov (anazarov@ucsc.edu)
|
|
34
|
+
* Natalie Valett (nvalett@ucsc.edu)
|
|
35
|
+
* Jasmine Tai (cjtai@ucsc.edu)
|
|
36
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# EcoScape Utilities
|
|
2
|
+
|
|
3
|
+
This package is simply a collection of utilities that are useful for running
|
|
4
|
+
Colab notebooks and other code.
|
|
5
|
+
These are not packages distributed with pip. To install on Colab, simply do:
|
|
6
|
+
|
|
7
|
+
!pip install git+https://github.com/ecoscape-earth/ecoscape-utilities.git
|
|
8
|
+
|
|
9
|
+
## Authors
|
|
10
|
+
|
|
11
|
+
* Luca de Alfaro (luca@ucsc.edu)
|
|
12
|
+
* Natalia Ocampo-Peñuela (nocampop@ucsc.edu)
|
|
13
|
+
* Coen Adler (ctadler@ucsc.edu)
|
|
14
|
+
* Artie Nazarov (anazarov@ucsc.edu)
|
|
15
|
+
* Natalie Valett (nvalett@ucsc.edu)
|
|
16
|
+
* Jasmine Tai (cjtai@ucsc.edu)
|
|
17
|
+
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from collections import namedtuple
|
|
3
|
+
import shutil
|
|
4
|
+
|
|
5
|
+
BirdRunInfo = namedtuple('BirdRunInfo', [
|
|
6
|
+
'nickname', # 6-letter ebird name.
|
|
7
|
+
'name', # official name.
|
|
8
|
+
'state', # State for the analysis.
|
|
9
|
+
'run_name', # Name of the run.
|
|
10
|
+
'terrain_fn', # File for the terrain
|
|
11
|
+
'habitat_fn', # File for the habitat.
|
|
12
|
+
'transmission_fn', # Terrain transmission.
|
|
13
|
+
'resistance_original_fn', # Original terrain resistance, unrefined.
|
|
14
|
+
'terrain_histogram_json_fn', # File name for terrain histogram.
|
|
15
|
+
'terrain_histogram_csv_fn', # File name for terrain histogram.
|
|
16
|
+
'repopulation_fn', 'gradient_fn', 'log_fn',
|
|
17
|
+
'validation_fn',
|
|
18
|
+
'obs_path',
|
|
19
|
+
'obs_csv_path',
|
|
20
|
+
])
|
|
21
|
+
|
|
22
|
+
def delete_run(base_path, nickname, state, run_name="Standard"):
|
|
23
|
+
"""Deletes the files for the given run."""
|
|
24
|
+
p = os.path.join(base_path, f"{nickname}/{state}/Output/{run_name}")
|
|
25
|
+
print("Deleting", p)
|
|
26
|
+
shutil.rmtree(p, ignore_errors=True)
|
|
27
|
+
|
|
28
|
+
class BirdRun(object):
|
|
29
|
+
|
|
30
|
+
def __init__(self, data_path):
|
|
31
|
+
"""Initializes a bird run, given a data path"""
|
|
32
|
+
self.files_path = data_path
|
|
33
|
+
|
|
34
|
+
def get_bird_run(self, nickname, bird_name, run_name=None, state="US-CA"):
|
|
35
|
+
"""Given a bird name in 6-letter ebird format, returns the BirdRun object for the bird."""
|
|
36
|
+
d = {"bird": nickname,
|
|
37
|
+
"run_name": run_name or "Standard",
|
|
38
|
+
"state": state}
|
|
39
|
+
self.createdir(os.path.join(self.files_path, "{bird}/{state}/Output/{run_name}".format(**d)))
|
|
40
|
+
return BirdRunInfo(
|
|
41
|
+
nickname = nickname,
|
|
42
|
+
name = bird_name,
|
|
43
|
+
state = state,
|
|
44
|
+
run_name = run_name or "Standard",
|
|
45
|
+
# Input ,
|
|
46
|
+
terrain_fn = os.path.join(self.files_path, "{bird}/{state}/terrain.tif".format(**d)),
|
|
47
|
+
habitat_fn = os.path.join(self.files_path, "{bird}/{state}/habitat.tif".format(**d)),
|
|
48
|
+
transmission_fn = os.path.join(self.files_path, "{bird}/{state}/transmission_refined_1.csv".format(**d)),
|
|
49
|
+
resistance_original_fn = os.path.join(self.files_path, "{bird}/resistance.csv".format(**d)),
|
|
50
|
+
terrain_histogram_json_fn = os.path.join(self.files_path, "{bird}/{state}/terrain_hist.json".format(**d)),
|
|
51
|
+
terrain_histogram_csv_fn = os.path.join(self.files_path, "{bird}/{state}/terrain_hist.csv".format(**d)),
|
|
52
|
+
# Validation files.
|
|
53
|
+
validation_fn = os.path.join(self.files_path, "{bird}/{state}/Ratios".format(**d)),
|
|
54
|
+
# Output files
|
|
55
|
+
repopulation_fn = os.path.join(self.files_path, "{bird}/{state}/Output/{run_name}/repopulation.tif".format(**d)),
|
|
56
|
+
gradient_fn = os.path.join(self.files_path, "{bird}/{state}/Output/{run_name}/gradient.tif".format(**d)),
|
|
57
|
+
log_fn = os.path.join(self.files_path, "{bird}/{state}/Output/{run_name}/log.json".format(**d)),
|
|
58
|
+
obs_path = os.path.join(self.files_path, "{bird}/{state}/Observations".format(**d)),
|
|
59
|
+
obs_csv_path = os.path.join(self.files_path, "{bird}/{state}/Output/{run_name}/observations.csv".format(**d)),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def get_observations_fn(self, obs_path, bigsquare=False, **kwargs):
|
|
63
|
+
"""Completes the name of an observation ratio file, adding the information on minimum number of observations,
|
|
64
|
+
and maximum length walked.
|
|
65
|
+
"""
|
|
66
|
+
d = dict(**kwargs)
|
|
67
|
+
d["isbig"] = "_big" if bigsquare else ""
|
|
68
|
+
return os.path.join(obs_path, "OBS_min_{min_checklists}_len_{max_distance}{isbig}.json".format(**d))
|
|
69
|
+
|
|
70
|
+
def get_observations_display_fn(self, obs_path, bigsquare=False, **kwargs):
|
|
71
|
+
"""Completes the name of an observation ratio tif file, adding the information on minimum number of observations,
|
|
72
|
+
and maximum length walked.
|
|
73
|
+
"""
|
|
74
|
+
d = dict(**kwargs)
|
|
75
|
+
d["isbig"] = "_big" if bigsquare else ""
|
|
76
|
+
return os.path.join(obs_path, "OBS_min_{min_checklists}_len_{max_distance}{isbig}.tif".format(**d))
|
|
77
|
+
|
|
78
|
+
def get_observations_all_fn(self, obs_path, **kwargs):
|
|
79
|
+
"""Completes the name of an observation ratio file, adding the information on minimum number of observations,
|
|
80
|
+
and maximum length walked.
|
|
81
|
+
"""
|
|
82
|
+
d = dict(**kwargs)
|
|
83
|
+
return os.path.join(obs_path, "OBS_all_len_{max_distance}_{date_range}_{num_squares}.csv".format(**d))
|
|
84
|
+
|
|
85
|
+
def get_terrain_occurrences_fn(self, obs_path, **kwargs):
|
|
86
|
+
"""Completes the name of an observation ratio file, adding the information on minimum number of observations,
|
|
87
|
+
and maximum length walked.
|
|
88
|
+
"""
|
|
89
|
+
d = dict(**kwargs)
|
|
90
|
+
return os.path.join(obs_path, "TEROBS_all_len_{max_distance}_{date_range}_{num_squares}.csv".format(**d))
|
|
91
|
+
|
|
92
|
+
def get_observations_all_display_fn(self, obs_path, **kwargs):
|
|
93
|
+
"""Completes the name of an observation ratio tif file, adding the information on minimum number of observations,
|
|
94
|
+
and maximum length walked.
|
|
95
|
+
"""
|
|
96
|
+
d = dict(**kwargs)
|
|
97
|
+
return os.path.join(obs_path, "OBS_all_len_{max_distance}_{date_range}_{num_squares}.tif".format(**d))
|
|
98
|
+
|
|
99
|
+
def createdir_for_file(self, fn):
|
|
100
|
+
"""Ensures that the path to a file exists."""
|
|
101
|
+
dirs, ffn = os.path.split(fn)
|
|
102
|
+
# print("Creating", dirs)
|
|
103
|
+
os.makedirs(dirs, exist_ok=True)
|
|
104
|
+
|
|
105
|
+
def createdir(self, dir_path):
|
|
106
|
+
"""Ensures that a folder exists."""
|
|
107
|
+
# print("Creating", dir_path)
|
|
108
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import numpy as np
|
|
3
|
+
import os
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import sqlite3
|
|
6
|
+
from sqlite3 import Error
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from pyproj import Transformer
|
|
11
|
+
from pyproj.crs import CRS
|
|
12
|
+
|
|
13
|
+
from scgt import GeoTiff
|
|
14
|
+
|
|
15
|
+
def expand_sqlite_query(query, params):
|
|
16
|
+
"""
|
|
17
|
+
Expands an SQLite query string with named parameters from a dictionary.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
query (str): The SQL query string with :variable placeholders.
|
|
21
|
+
params (dict): A dictionary mapping variable names to their values.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
str: The expanded SQL query string.
|
|
25
|
+
"""
|
|
26
|
+
expanded_query = query
|
|
27
|
+
for key, value in params.items():
|
|
28
|
+
placeholder = f":{key}"
|
|
29
|
+
if isinstance(value, str):
|
|
30
|
+
# Escape any single quotes within the string
|
|
31
|
+
formatted_value = f"'{value.replace("'", "''")}'"
|
|
32
|
+
elif value is None:
|
|
33
|
+
formatted_value = 'NULL'
|
|
34
|
+
else:
|
|
35
|
+
# For integers, floats, and other types
|
|
36
|
+
formatted_value = str(value)
|
|
37
|
+
|
|
38
|
+
expanded_query = expanded_query.replace(placeholder, formatted_value)
|
|
39
|
+
|
|
40
|
+
return expanded_query
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
A module for interaction with a sqlite database. Contains functions for query execution,
|
|
45
|
+
and some common functionality we need to run on the DB
|
|
46
|
+
"""
|
|
47
|
+
class Connection:
|
|
48
|
+
def __init__(self, db_file):
|
|
49
|
+
"""Initializes the connection to the SQLite database
|
|
50
|
+
@param db_file: The file path to the database file
|
|
51
|
+
"""
|
|
52
|
+
conn = None
|
|
53
|
+
try:
|
|
54
|
+
conn = sqlite3.connect(db_file)
|
|
55
|
+
except Error as e:
|
|
56
|
+
print("Error in create_connection: ", e)
|
|
57
|
+
self.conn = conn
|
|
58
|
+
|
|
59
|
+
def __enter__(self):
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def __exit__(self, type, value, traceback):
|
|
63
|
+
if self.conn:
|
|
64
|
+
self.conn.close()
|
|
65
|
+
|
|
66
|
+
def get_cursor(self):
|
|
67
|
+
try:
|
|
68
|
+
cur = self.conn.cursor()
|
|
69
|
+
except:
|
|
70
|
+
print("error connecting to db")
|
|
71
|
+
cur = None
|
|
72
|
+
return cur
|
|
73
|
+
|
|
74
|
+
def execute_query(self, query, verbose=False):
|
|
75
|
+
"""
|
|
76
|
+
executes the given query in the database
|
|
77
|
+
:param query (str): a sqlite query to the database
|
|
78
|
+
:param verbose (boolean): flag to print out result of the query
|
|
79
|
+
:returns: result of the query as a list of rows
|
|
80
|
+
"""
|
|
81
|
+
try:
|
|
82
|
+
cur = self.get_cursor()
|
|
83
|
+
if isinstance(query, str):
|
|
84
|
+
cur.execute(query)
|
|
85
|
+
else:
|
|
86
|
+
cur.execute(query[0], query[1])
|
|
87
|
+
self.conn.commit()
|
|
88
|
+
rows = cur.fetchall()
|
|
89
|
+
if verbose:
|
|
90
|
+
for row in rows:
|
|
91
|
+
print(row)
|
|
92
|
+
return rows
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print("Error executing query:\n\t", query, ".\n Error: ", e)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class EbirdObservations(Connection):
|
|
98
|
+
"""Class for eBird-specific connections, includes functionality particular to the eBird database"""
|
|
99
|
+
|
|
100
|
+
def __init__(self, db_file):
|
|
101
|
+
super().__init__(db_file)
|
|
102
|
+
|
|
103
|
+
def get_all_squares(self, state=None,
|
|
104
|
+
breeding=None, date_range=None,
|
|
105
|
+
lat_range=None, lng_range=None, max_dist=2, min_time=None,
|
|
106
|
+
verbose=False):
|
|
107
|
+
"""
|
|
108
|
+
Gets all squares with bird (any bird) observations, for a certain state,
|
|
109
|
+
and withing certain lat, lng, and date ranges.
|
|
110
|
+
:param state (str): state code
|
|
111
|
+
:param breeding: None, or pair of months delimiting the breeding season, e.g. ("04", "06").
|
|
112
|
+
:param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
|
|
113
|
+
:param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
|
|
114
|
+
:param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
|
|
115
|
+
:param max_dist (int): max kilometers traveled for the checklist for any observation we consider
|
|
116
|
+
(any of further distance will be too noisy, and should be disreguarded)
|
|
117
|
+
:param min_time (int): minimum time in minutes for the checklist for any observation we consider
|
|
118
|
+
:returns: list of squares which fall within the query parameters
|
|
119
|
+
"""
|
|
120
|
+
query_string=['select DISTINCT SQUARE from checklist where "ALL SPECIES REPORTED" = 1']
|
|
121
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
122
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
123
|
+
d = {"dist": max_dist}
|
|
124
|
+
if min_time is not None:
|
|
125
|
+
query_string.append('and "DURATION MINUTES" >= :min_time')
|
|
126
|
+
d["min_time"] = min_time
|
|
127
|
+
if state is not None:
|
|
128
|
+
query_string.append('and "STATE CODE" = :state')
|
|
129
|
+
d['state'] = state
|
|
130
|
+
# Adds breeding portion
|
|
131
|
+
if breeding is not None:
|
|
132
|
+
query_string.extend([
|
|
133
|
+
'and substr("OBSERVATION DATE", 6, 2) >= :br1',
|
|
134
|
+
'and substr("OBSERVATION DATE", 6, 2) <= :br2',
|
|
135
|
+
])
|
|
136
|
+
d['br1'], d['br2'] = breeding
|
|
137
|
+
if date_range is not None:
|
|
138
|
+
query_string.append('and "OBSERVATION DATE" >= :min_date')
|
|
139
|
+
query_string.append('and "OBSERVATION DATE" <= :max_date')
|
|
140
|
+
d["min_date"], d["max_date"] = date_range
|
|
141
|
+
if lat_range is not None:
|
|
142
|
+
query_string.append('and "LATITUDE" >= :min_lat')
|
|
143
|
+
query_string.append('and "LATITUDE" <= :max_lat')
|
|
144
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
145
|
+
if lng_range is not None:
|
|
146
|
+
query_string.append('and "LONGITUDE" >= :min_lng')
|
|
147
|
+
query_string.append('and "LONGITUDE" <= :max_lng')
|
|
148
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
149
|
+
query_string = " ".join(query_string)
|
|
150
|
+
if verbose:
|
|
151
|
+
print("Query:", query_string)
|
|
152
|
+
print("Expanded query:", expand_sqlite_query(query_string, d))
|
|
153
|
+
squares_list = self.execute_query((query_string, d))
|
|
154
|
+
return [sq[0] for sq in squares_list]
|
|
155
|
+
|
|
156
|
+
def get_square_observations(self, square, bird,
|
|
157
|
+
breeding=None, date_range=None,
|
|
158
|
+
lat_range=None, lng_range=None, max_dist=2, min_time=None,
|
|
159
|
+
verbose=False):
|
|
160
|
+
"""
|
|
161
|
+
Get the number of checklists, number of checklists with a bird,
|
|
162
|
+
total time, total distance, and total bird sightings, for a square.
|
|
163
|
+
:param square: tuple of 2 floats, representing (lat, lng) of the square
|
|
164
|
+
:param bird: bird
|
|
165
|
+
:param breeding: pair of months delimiting breeding season, or None (e.g., ("04", "06")).
|
|
166
|
+
:param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
|
|
167
|
+
:param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
|
|
168
|
+
:param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
|
|
169
|
+
:param max_dist (int): max kilometers traveled for the checklist for any observation we consider
|
|
170
|
+
(any of further distance will be too noisy, and should be disreguarded)
|
|
171
|
+
:param min_time (int): minimum time in minutes for the checklist for any observation we consider
|
|
172
|
+
:returns: num_checklists, num_bird_checklists, num_birds for the given square.
|
|
173
|
+
"""
|
|
174
|
+
# Adds deprecation warning.
|
|
175
|
+
warnings.warn("This function is deprecated. Use get_square_checklists instead.", DeprecationWarning)
|
|
176
|
+
# Gets the number of checklists, the total time, the total distance, and the total number of birds.
|
|
177
|
+
query_string=['select COUNT(*), SUM("EFFORT DISTANCE KM"), SUM("DURATION MINUTES")',
|
|
178
|
+
'FROM checklist where SQUARE = :square']
|
|
179
|
+
d = {'square': square}
|
|
180
|
+
query_string.append('and "ALL SPECIES REPORTED" = 1')
|
|
181
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
182
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
183
|
+
d["dist"] = max_dist
|
|
184
|
+
if min_time is not None:
|
|
185
|
+
query_string.append('and "DURATION MINUTES" >= :min_time')
|
|
186
|
+
d["min_time"] = min_time
|
|
187
|
+
# Adds breeding portion
|
|
188
|
+
if breeding is not None:
|
|
189
|
+
query_string.extend([
|
|
190
|
+
'and substr("OBSERVATION DATE", 6, 2) >= :br1',
|
|
191
|
+
'and substr("OBSERVATION DATE", 6, 2) <= :br2',
|
|
192
|
+
])
|
|
193
|
+
d['br1'], d['br2'] = breeding
|
|
194
|
+
if date_range is not None:
|
|
195
|
+
query_string.append('and "OBSERVATION DATE" >= :min_date')
|
|
196
|
+
query_string.append('and "OBSERVATION DATE" <= :max_date')
|
|
197
|
+
d["min_date"], d["max_date"] = date_range
|
|
198
|
+
if lat_range is not None:
|
|
199
|
+
query_string.append('and "LATITUDE" >= :min_lat')
|
|
200
|
+
query_string.append('and "LATITUDE" <= :max_lat')
|
|
201
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
202
|
+
if lng_range is not None:
|
|
203
|
+
query_string.append('and "LONGITUDE" >= :min_lng')
|
|
204
|
+
query_string.append('and "LONGITUDE" <= :max_lng')
|
|
205
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
206
|
+
query_string = " ".join(query_string)
|
|
207
|
+
if verbose:
|
|
208
|
+
print("Query:", query_string)
|
|
209
|
+
r = self.execute_query((query_string, d))
|
|
210
|
+
if r is not None:
|
|
211
|
+
num_checklists = float(r[0][0])
|
|
212
|
+
total_km = float(r[0][1])
|
|
213
|
+
total_minutes = float(r[0][2])
|
|
214
|
+
else:
|
|
215
|
+
num_checklists = 0
|
|
216
|
+
total_km = 0
|
|
217
|
+
total_minutes = 0
|
|
218
|
+
# Then, the number of checklists with the bird, and the total number of birds.
|
|
219
|
+
query_string = ['select COUNT(DISTINCT checklist."SAMPLING EVENT IDENTIFIER"),',
|
|
220
|
+
'SUM(observation."OBSERVATION COUNT")',
|
|
221
|
+
'from checklist join observation',
|
|
222
|
+
'on checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
223
|
+
]
|
|
224
|
+
query_string.append('where checklist.SQUARE = :square')
|
|
225
|
+
query_string.append('and checklist."ALL SPECIES REPORTED" = 1')
|
|
226
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
227
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
228
|
+
d["dist"] = max_dist
|
|
229
|
+
# Adds breeding portion
|
|
230
|
+
if breeding is not None:
|
|
231
|
+
query_string.extend([
|
|
232
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) >= :br1',
|
|
233
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) <= :br2',
|
|
234
|
+
])
|
|
235
|
+
d['br1'], d['br2'] = breeding
|
|
236
|
+
if min_time is not None:
|
|
237
|
+
query_string.append('and "checklist.DURATION MINUTES" >= :min_time')
|
|
238
|
+
d["min_time"] = min_time
|
|
239
|
+
if date_range is not None:
|
|
240
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
241
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
242
|
+
d["min_date"], d["max_date"] = date_range
|
|
243
|
+
if lat_range is not None:
|
|
244
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
245
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
246
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
247
|
+
if lng_range is not None:
|
|
248
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
249
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
250
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
251
|
+
# Ask about the bird.
|
|
252
|
+
query_string.append('and observation."COMMON NAME" = :bird')
|
|
253
|
+
d["bird"] = bird.name
|
|
254
|
+
# Runs the query.
|
|
255
|
+
query_string = " ".join(query_string)
|
|
256
|
+
if verbose:
|
|
257
|
+
print("Query:", query_string)
|
|
258
|
+
print("Expanded query:", expand_sqlite_query(query_string, d))
|
|
259
|
+
r = self.execute_query((query_string, d))
|
|
260
|
+
if r is None:
|
|
261
|
+
num_birds = 0
|
|
262
|
+
num_bird_checklists = 0
|
|
263
|
+
else:
|
|
264
|
+
r = r[0]
|
|
265
|
+
num_bird_checklists = float(r[0])
|
|
266
|
+
num_birds = 0 if r[1] is None else float(r[1])
|
|
267
|
+
return dict(
|
|
268
|
+
num_checklists=num_checklists,
|
|
269
|
+
num_bird_checklists=num_bird_checklists,
|
|
270
|
+
num_birds=num_birds,
|
|
271
|
+
total_km=total_km,
|
|
272
|
+
total_minutes=total_minutes,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
def get_state_checklists(self, state, bird,
|
|
276
|
+
breeding=None, date_range=None,
|
|
277
|
+
lat_range=None, lng_range=None, max_dist=2,
|
|
278
|
+
verbose=False):
|
|
279
|
+
"""Returns a dataframe consisting of all checklists in a square, with data
|
|
280
|
+
on a (possible) occurrence of a bird."""
|
|
281
|
+
query_string = [
|
|
282
|
+
'SELECT checklist."SQUARE", ',
|
|
283
|
+
'checklist."SAMPLING EVENT IDENTIFIER", ',
|
|
284
|
+
'checklist."PROTOCOL TYPE", ',
|
|
285
|
+
'checklist."EFFORT DISTANCE KM", ',
|
|
286
|
+
'checklist."DURATION MINUTES", ',
|
|
287
|
+
'checklist."OBSERVATION DATE", ',
|
|
288
|
+
'checklist."TIME OBSERVATIONS STARTED", ',
|
|
289
|
+
'checklist."OBSERVER ID", ',
|
|
290
|
+
'checklist."LATITUDE", ',
|
|
291
|
+
'checklist."LONGITUDE", ',
|
|
292
|
+
'observation."OBSERVATION COUNT" ',
|
|
293
|
+
'FROM checklist LEFT JOIN observation ',
|
|
294
|
+
'ON checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER" ',
|
|
295
|
+
'AND observation."COMMON NAME" = :bird ',
|
|
296
|
+
'WHERE ',
|
|
297
|
+
'checklist."STATE CODE" = :state',
|
|
298
|
+
'and checklist."ALL SPECIES REPORTED" = 1',
|
|
299
|
+
'and checklist."PROTOCOL TYPE" IN ("Traveling", "Stationary") ',
|
|
300
|
+
'and checklist."EFFORT DISTANCE KM" <= :dist',
|
|
301
|
+
]
|
|
302
|
+
# Main query parameters
|
|
303
|
+
d = {"state": state, "dist": max_dist, "bird": bird.name}
|
|
304
|
+
# Adds breeding portion
|
|
305
|
+
if breeding is not None:
|
|
306
|
+
query_string.extend([
|
|
307
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) >= :br1',
|
|
308
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) <= :br2',
|
|
309
|
+
])
|
|
310
|
+
d['br1'], d['br2'] = breeding
|
|
311
|
+
if date_range is not None:
|
|
312
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
313
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
314
|
+
d["min_date"], d["max_date"] = date_range
|
|
315
|
+
if lat_range is not None:
|
|
316
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
317
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
318
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
319
|
+
if lng_range is not None:
|
|
320
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
321
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
322
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
323
|
+
# Submits the query.
|
|
324
|
+
query_string = " ".join(query_string)
|
|
325
|
+
if verbose:
|
|
326
|
+
print("Query:", query_string)
|
|
327
|
+
print("Expanded query:", expand_sqlite_query(query_string, d))
|
|
328
|
+
checklists_df = pd.read_sql_query(query_string, self.conn, params=d)
|
|
329
|
+
return checklists_df
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def get_square_checklists(self, square, bird,
|
|
333
|
+
breeding=None, date_range=None,
|
|
334
|
+
lat_range=None, lng_range=None, max_dist=2,
|
|
335
|
+
verbose=False):
|
|
336
|
+
"""Returns a dataframe consisting of all checklists in a square, with data
|
|
337
|
+
on a (possible) occurrence of a bird."""
|
|
338
|
+
query_string = [
|
|
339
|
+
'SELECT checklist."SQUARE", ',
|
|
340
|
+
'checklist."SAMPLING EVENT IDENTIFIER", ',
|
|
341
|
+
'checklist."PROTOCOL TYPE", ',
|
|
342
|
+
'checklist."EFFORT DISTANCE KM", ',
|
|
343
|
+
'checklist."DURATION MINUTES", ',
|
|
344
|
+
'checklist."OBSERVATION DATE", ',
|
|
345
|
+
'checklist."TIME OBSERVATIONS STARTED", ',
|
|
346
|
+
'checklist."OBSERVER ID", ',
|
|
347
|
+
'checklist."LATITUDE", ',
|
|
348
|
+
'checklist."LONGITUDE", ',
|
|
349
|
+
'observation."OBSERVATION COUNT" ',
|
|
350
|
+
'FROM checklist LEFT JOIN observation ',
|
|
351
|
+
'ON checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER" ',
|
|
352
|
+
'AND observation."COMMON NAME" = :bird ',
|
|
353
|
+
'WHERE ',
|
|
354
|
+
'checklist.SQUARE = :square',
|
|
355
|
+
'and checklist."ALL SPECIES REPORTED" = 1',
|
|
356
|
+
'and checklist."PROTOCOL TYPE" != "Incidental" ',
|
|
357
|
+
'and checklist."EFFORT DISTANCE KM" <= :dist',
|
|
358
|
+
]
|
|
359
|
+
# Main query parameters
|
|
360
|
+
d = {"square": square, "dist": max_dist, "bird": bird.name}
|
|
361
|
+
# Adds breeding portion
|
|
362
|
+
if breeding is not None:
|
|
363
|
+
query_string.extend([
|
|
364
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) >= :br1',
|
|
365
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) <= :br2',
|
|
366
|
+
])
|
|
367
|
+
d['br1'], d['br2'] = breeding
|
|
368
|
+
if date_range is not None:
|
|
369
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
370
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
371
|
+
d["min_date"], d["max_date"] = date_range
|
|
372
|
+
if lat_range is not None:
|
|
373
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
374
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
375
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
376
|
+
if lng_range is not None:
|
|
377
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
378
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
379
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
380
|
+
# Submits the query.
|
|
381
|
+
query_string = " ".join(query_string)
|
|
382
|
+
if verbose:
|
|
383
|
+
print("Query:", query_string)
|
|
384
|
+
print("Expanded query:", expand_sqlite_query(query_string, d))
|
|
385
|
+
checklists_df = pd.read_sql_query(query_string, self.conn, params=d)
|
|
386
|
+
return checklists_df
|
|
387
|
+
|
|
388
|
+
def get_square_individual_checklists(self, square, bird,
|
|
389
|
+
breeding=None, date_range=None, min_time=None,
|
|
390
|
+
lat_range=None, lng_range=None, max_dist=2,
|
|
391
|
+
verbose=False):
|
|
392
|
+
"""
|
|
393
|
+
Get the checklists for a square, so that statistics can be computed.
|
|
394
|
+
The result is returned as a dataframe.
|
|
395
|
+
|
|
396
|
+
and total bird sightings, for a square.
|
|
397
|
+
:param square: tuple of 2 floats, representing (lat, lng) of the square
|
|
398
|
+
:param bird (str): name of bird
|
|
399
|
+
:param breeding: None, or pair of months delimiting breeding season ("04", "06").
|
|
400
|
+
:param date_range: tuple of 2 date-strings in format "YYYY-MM-DD" to get only observations in this date range
|
|
401
|
+
:param lat_range: tuple of 2 floats for the lower and upper bounds for latitude
|
|
402
|
+
:param lng_range: tuple of 2 floats for the lower and upper bounds for longitude
|
|
403
|
+
:param max_dist (int): max kilometers traveled for the checklist for any observation we consider
|
|
404
|
+
(any of further distance will be too noisy, and should be disreguarded)
|
|
405
|
+
:returns: list of squares which fall within the query parameters
|
|
406
|
+
"""
|
|
407
|
+
# Adds deprecation warning.
|
|
408
|
+
warnings.warn("This function is deprecated. Use get_square_checklists instead.", DeprecationWarning)
|
|
409
|
+
# First the checklists, with or without the bird.
|
|
410
|
+
query_string=['select DISTINCT("SAMPLING EVENT IDENTIFIER")',
|
|
411
|
+
'FROM checklist where SQUARE = :square']
|
|
412
|
+
d = {'square': square}
|
|
413
|
+
query_string.append('and "ALL SPECIES REPORTED" = 1')
|
|
414
|
+
query_string.append('and "PROTOCOL TYPE" != "Incidental"')
|
|
415
|
+
query_string.append('and "EFFORT DISTANCE KM" <= :dist')
|
|
416
|
+
d["dist"] = max_dist
|
|
417
|
+
if min_time is not None:
|
|
418
|
+
query_string.append('and "DURATION MINUTES" >= :min_time')
|
|
419
|
+
d["min_time"] = min_time
|
|
420
|
+
# Adds breeding portion
|
|
421
|
+
if breeding is not None:
|
|
422
|
+
query_string.extend([
|
|
423
|
+
'and substr("OBSERVATION DATE", 6, 2) >= :br1',
|
|
424
|
+
'and substr("OBSERVATION DATE", 6, 2) <= :br2',
|
|
425
|
+
])
|
|
426
|
+
d['br1'], d['br2'] = breeding
|
|
427
|
+
if date_range is not None:
|
|
428
|
+
query_string.append('and "OBSERVATION DATE" >= :min_date')
|
|
429
|
+
query_string.append('and "OBSERVATION DATE" <= :max_date')
|
|
430
|
+
d["min_date"], d["max_date"] = date_range
|
|
431
|
+
if lat_range is not None:
|
|
432
|
+
query_string.append('and "LATITUDE" >= :min_lat')
|
|
433
|
+
query_string.append('and "LATITUDE" <= :max_lat')
|
|
434
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
435
|
+
if lng_range is not None:
|
|
436
|
+
query_string.append('and "LONGITUDE" >= :min_lng')
|
|
437
|
+
query_string.append('and "LONGITUDE" <= :max_lng')
|
|
438
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
439
|
+
query_string = " ".join(query_string)
|
|
440
|
+
if verbose:
|
|
441
|
+
print("Query:", query_string)
|
|
442
|
+
checklists_df = pd.read_sql_query(query_string, self.conn, params=d)
|
|
443
|
+
|
|
444
|
+
# Then, the number of checklists with the bird, and the total number of birds.
|
|
445
|
+
query_string = ['select checklist."SAMPLING EVENT IDENTIFIER", ',
|
|
446
|
+
'observation."OBSERVATION COUNT"',
|
|
447
|
+
'from checklist join observation',
|
|
448
|
+
'on checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
449
|
+
]
|
|
450
|
+
query_string.append('where checklist.SQUARE = :square')
|
|
451
|
+
query_string.append('and checklist."ALL SPECIES REPORTED" = 1')
|
|
452
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
453
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
454
|
+
d["dist"] = max_dist
|
|
455
|
+
# Adds breeding portion
|
|
456
|
+
if breeding is not None:
|
|
457
|
+
query_string.extend([
|
|
458
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) >= :br1',
|
|
459
|
+
'and substr(checklist."OBSERVATION DATE", 6, 2) <= :br2',
|
|
460
|
+
])
|
|
461
|
+
d['br1'], d['br2'] = breeding
|
|
462
|
+
if date_range is not None:
|
|
463
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
464
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
465
|
+
d["min_date"], d["max_date"] = date_range
|
|
466
|
+
if min_time is not None:
|
|
467
|
+
query_string.append('and checklist."DURATION MINUTES" >= :min_time')
|
|
468
|
+
d["min_time"] = min_time
|
|
469
|
+
if lat_range is not None:
|
|
470
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
471
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
472
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
473
|
+
if lng_range is not None:
|
|
474
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
475
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
476
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
477
|
+
# Ask about the bird.
|
|
478
|
+
query_string.append('and observation."COMMON NAME" = :bird')
|
|
479
|
+
d["bird"] = bird.name
|
|
480
|
+
# Runs the query.
|
|
481
|
+
query_string = " ".join(query_string)
|
|
482
|
+
if verbose:
|
|
483
|
+
print("Query:", query_string)
|
|
484
|
+
print("Expanded query:", expand_sqlite_query(query_string, d))
|
|
485
|
+
rows = self.execute_query((query_string, d))
|
|
486
|
+
counts = defaultdict(int)
|
|
487
|
+
for r in rows:
|
|
488
|
+
counts[r[0]] = r[1]
|
|
489
|
+
checklists_df["Count"] = checklists_df.apply(lambda row : counts[row["SAMPLING EVENT IDENTIFIER"]], axis=1)
|
|
490
|
+
return checklists_df
|
|
491
|
+
|
|
492
|
+
def get_squares_with_bird(self, bird, max_dist=1, breeding=None, min_time=None,
|
|
493
|
+
date_range=None, lat_range=None, lng_range=None,
|
|
494
|
+
state=None, verbose=False):
|
|
495
|
+
"""Gets all the squares where a bird has been sighted. This is used
|
|
496
|
+
primarily to refine the terrain resistance.
|
|
497
|
+
:param bird: Common name of the bird
|
|
498
|
+
:param max_dist: max length of the checklist in Km
|
|
499
|
+
:param breeding: pair of months delimiting breeding season, or None.
|
|
500
|
+
:param date_range: date range in years, as a string tuple of yyyy-mm-dd dates
|
|
501
|
+
:param lat_range: range of latitudes to consider, as number tuple, optional.
|
|
502
|
+
:param lng_range: range of longitudes to consider, as number tuple, optional.
|
|
503
|
+
:param state: state, to limit the query. Example: "US-CA"
|
|
504
|
+
:param verbose: if True, more debugging information is printed.
|
|
505
|
+
:return: List of squares with the bird.
|
|
506
|
+
"""
|
|
507
|
+
query_string = [
|
|
508
|
+
'select DISTINCT checklist.SQUARE',
|
|
509
|
+
'from checklist join observation on',
|
|
510
|
+
'checklist."SAMPLING EVENT IDENTIFIER" = observation."SAMPLING EVENT IDENTIFIER"',
|
|
511
|
+
'where observation."COMMON NAME" = :bird',
|
|
512
|
+
'and checklist."STATE CODE" = :state',
|
|
513
|
+
'and checklist."ALL SPECIES REPORTED" = 1',
|
|
514
|
+
]
|
|
515
|
+
d = {'dist': max_dist ,'bird': bird, 'state': state}
|
|
516
|
+
query_string.append('and checklist."PROTOCOL TYPE" != "Incidental"')
|
|
517
|
+
query_string.append('and checklist."EFFORT DISTANCE KM" <= :dist')
|
|
518
|
+
# Adds breeding portion
|
|
519
|
+
if breeding is not None:
|
|
520
|
+
query_string.extend([
|
|
521
|
+
'and substr("OBSERVATION DATE", 6, 2) >= :br1',
|
|
522
|
+
'and substr("OBSERVATION DATE", 6, 2) <= :br2',
|
|
523
|
+
])
|
|
524
|
+
d['br1'], d['br2'] = breeding
|
|
525
|
+
if min_time is not None:
|
|
526
|
+
query_string.append('and "DURATION MINUTES" >= :min_time')
|
|
527
|
+
d["min_time"] = min_time
|
|
528
|
+
if date_range is not None:
|
|
529
|
+
query_string.append('and checklist."OBSERVATION DATE" >= :min_date')
|
|
530
|
+
query_string.append('and checklist."OBSERVATION DATE" <= :max_date')
|
|
531
|
+
d["min_date"], d["max_date"] = date_range
|
|
532
|
+
if lat_range is not None:
|
|
533
|
+
query_string.append('and checklist."LATITUDE" >= :min_lat')
|
|
534
|
+
query_string.append('and checklist."LATITUDE" <= :max_lat')
|
|
535
|
+
d["min_lat"], d["max_lat"] = lat_range
|
|
536
|
+
if lng_range is not None:
|
|
537
|
+
query_string.append('and checklist."LONGITUDE" >= :min_lng')
|
|
538
|
+
query_string.append('and checklist."LONGITUDE" <= :max_lng')
|
|
539
|
+
d["min_lng"], d["max_lng"] = lng_range
|
|
540
|
+
# Runs the query.
|
|
541
|
+
query_string = " ".join(query_string)
|
|
542
|
+
if verbose:
|
|
543
|
+
print("Query:", query_string)
|
|
544
|
+
print("Expanded query:", expand_sqlite_query(query_string, d))
|
|
545
|
+
r = self.execute_query((query_string, d))
|
|
546
|
+
return [row[0] for row in r]
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def format_coords(coords, bigsquare=False):
|
|
550
|
+
"""
|
|
551
|
+
formats coords from the eBird database format '4406;-12131' to
|
|
552
|
+
tuple (44.06, -121.31) for (lat, lng) in WGS84 format
|
|
553
|
+
:param coords (str): coordinates in eBird database format (ie '4406;-12131')
|
|
554
|
+
:param bigsquare (bool): option is used in case these are big squares (one less decimal).
|
|
555
|
+
:returns: tuple (lat, long)
|
|
556
|
+
"""
|
|
557
|
+
lat, long = coords.split(';')
|
|
558
|
+
# Note that we have to use a - sign here for longitude, since these are negative
|
|
559
|
+
# numbers, and since awk rounds towards zero, rather than towards negative infinity.
|
|
560
|
+
if bigsquare:
|
|
561
|
+
lat = float(lat[:-1] + '.' + lat[-1:]) + 0.05
|
|
562
|
+
long = float(long[:-1] + '.' + long[-1:]) - 0.05
|
|
563
|
+
else:
|
|
564
|
+
lat = float(lat[:-2] + '.' + lat[-2:]) + 0.005
|
|
565
|
+
long = float(long[:-2] + '.' + long[-2:]) - 0.005
|
|
566
|
+
return (lat, long)
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def transform_coords(geotiff, coord):
|
|
570
|
+
"""
|
|
571
|
+
transforms WGS84 coordinates to the same projection as the given geotiff
|
|
572
|
+
:param geotiff (scgt.GeoTiff): geotiff which we want our coordinates to map to
|
|
573
|
+
:param coords: tuple of 2 floats (lat, lng), representing coordinates in WGS84 format
|
|
574
|
+
:returns: tuple (lat, long) in the CRS of geotiff
|
|
575
|
+
"""
|
|
576
|
+
lat, long = coord
|
|
577
|
+
transformer = Transformer.from_crs("WGS84", CRS.from_user_input(geotiff.crs), always_xy=True)
|
|
578
|
+
xx, yy = transformer.transform(long, lat)
|
|
579
|
+
return (xx, yy)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
"""
|
|
583
|
+
A module for common functionality in the validaiton process using ebird data
|
|
584
|
+
"""
|
|
585
|
+
class Validation(object):
|
|
586
|
+
|
|
587
|
+
def __init__(self, obs_fn):
|
|
588
|
+
"""
|
|
589
|
+
Generates a class for validation.
|
|
590
|
+
:param obs_fn: Observations filename.
|
|
591
|
+
"""
|
|
592
|
+
self.obs_fn = obs_fn
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def filter_CA_rectangle(self, observation_ratios, bigsquare=False):
|
|
596
|
+
"""
|
|
597
|
+
Filters observation ratios, keeping only the ones in California.
|
|
598
|
+
:param observation_ratios: list of tuples (square, observation_ratio)
|
|
599
|
+
:returns: list of tuples (square, observation_ratio) with only squares in CA
|
|
600
|
+
"""
|
|
601
|
+
# California rectangle
|
|
602
|
+
ca_lng_max = -113
|
|
603
|
+
ca_lng_min = -125
|
|
604
|
+
ca_lat_max = 43
|
|
605
|
+
ca_lat_min = 32
|
|
606
|
+
result = {}
|
|
607
|
+
for square, ratio in observation_ratios.items():
|
|
608
|
+
lat, lng = format_coords(square, bigsquare=bigsquare)
|
|
609
|
+
if ca_lat_min <= lat <= ca_lat_max and ca_lng_min <= lng <= ca_lng_max:
|
|
610
|
+
result[square] = ratio
|
|
611
|
+
return result
|
|
612
|
+
|
|
613
|
+
def plot_observations(self, observation_ratios, hab_fn, output_path,
|
|
614
|
+
bigsquare=False, obs_multiplier=1):
|
|
615
|
+
"""
|
|
616
|
+
Creates a Geotiff with the observation ratios plotted
|
|
617
|
+
:param observation_ratios: list of tuples (square, observation_ratio)
|
|
618
|
+
:param hab_fn: file path to the habitat geotiff to clone
|
|
619
|
+
:param output_path: file path to create our new geotiff
|
|
620
|
+
:param obs_multiplier: scalar to multiply the observation_ratios by
|
|
621
|
+
"""
|
|
622
|
+
tile_scale = 30 if bigsquare else 3
|
|
623
|
+
with GeoTiff.from_file(hab_fn) as hab_f:
|
|
624
|
+
with hab_f.clone_shape(output_path, no_data_value=-1, dtype='float32') as obsTiff:
|
|
625
|
+
for (square, observed) in observation_ratios:
|
|
626
|
+
if (isinstance(square, str)):
|
|
627
|
+
square = format_coords(square, bigsquare=bigsquare)
|
|
628
|
+
coord = transform_coords(obsTiff, square)
|
|
629
|
+
obsTiff.set_tile_from_coord(coord, observed * obs_multiplier, tile_scale)
|
|
630
|
+
|
|
631
|
+
### Correlation Functions ###
|
|
632
|
+
def get_df_correlation(self, df):
|
|
633
|
+
return df.corr()
|
|
634
|
+
|
|
635
|
+
# Weighted correlation coefficent
|
|
636
|
+
def weighted_correlation(self, df):
|
|
637
|
+
'''
|
|
638
|
+
:param df: dataframe with 3 columns: 'repop', 'obs_ratio', and 'weight'
|
|
639
|
+
:returns: the weighted correlation coefficent of the df
|
|
640
|
+
'''
|
|
641
|
+
# Weighted Mean
|
|
642
|
+
def m(x, w):
|
|
643
|
+
return np.sum(x * w) / np.sum(w)
|
|
644
|
+
|
|
645
|
+
# Weighted Covariance
|
|
646
|
+
def cov(x, y, w):
|
|
647
|
+
return np.sum(w * (x - m(x, w)) * (y - m(y, w))) / np.sum(w)
|
|
648
|
+
|
|
649
|
+
# Weighted Correlation
|
|
650
|
+
return cov(df['repop'], df['obs_ratio'], df['weight']) / np.sqrt(cov(df['repop'], df['repop'], df['weight']) * cov(df['obs_ratio'], df['obs_ratio'], df['weight']))
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def weighted_repop_to_observation_ratio_df(
|
|
654
|
+
self, repop_tif, hab, observation_ratios, bigsquare=False,
|
|
655
|
+
tile_scale=4, weighted_tile_size=100):
|
|
656
|
+
'''
|
|
657
|
+
:param repop_tif: repopulation geotiff
|
|
658
|
+
:param hab: habitat geotiff used to compute repop
|
|
659
|
+
:param observation_ratios: list of pairs (square, observation ratio) from ebird.
|
|
660
|
+
:param tile_scale: percentage of habitat the tile must contain to be considered "in habitat" if being refined by hab
|
|
661
|
+
:param tile_scale: size of the tile around square
|
|
662
|
+
:param weighted_tile_size: size of the tile to attribute grouped weights to
|
|
663
|
+
:returns: a dataframe with columns repopulation, observation ratio, and weights
|
|
664
|
+
'''
|
|
665
|
+
df = pd.DataFrame(columns=['repop', 'hab', 'max_repop', 'max_hab', 'obs_ratio', 'lat', 'lng', 'x', 'y', ])
|
|
666
|
+
count = defaultdict(int)
|
|
667
|
+
for (square, ratio) in observation_ratios:
|
|
668
|
+
if (isinstance(square, str)):
|
|
669
|
+
coords = format_coords(square, bigsquare=bigsquare)
|
|
670
|
+
else:
|
|
671
|
+
coords = square
|
|
672
|
+
lat, lng = coords
|
|
673
|
+
repop_pix_coords = transform_coords(repop_tif, coords)
|
|
674
|
+
hab_pix_coords = transform_coords(hab, coords)
|
|
675
|
+
repop_tile = repop_tif.get_tile_from_coord(repop_pix_coords, tile_scale=tile_scale)
|
|
676
|
+
hab_tile = hab.get_tile_from_coord(hab_pix_coords, tile_scale=tile_scale)
|
|
677
|
+
if repop_tile is None or hab_tile is None:
|
|
678
|
+
continue
|
|
679
|
+
x, y = repop_tif.get_pixel_from_coord(coords)
|
|
680
|
+
x_floor = x // weighted_tile_size
|
|
681
|
+
y_floor = y // weighted_tile_size
|
|
682
|
+
count[(x_floor, y_floor)] += 1
|
|
683
|
+
# df = df.append(
|
|
684
|
+
df = pd.concat([df, pd.DataFrame.from_records([
|
|
685
|
+
{'repop': np.average(repop_tile.m),
|
|
686
|
+
'hab': np.average(hab_tile.m),
|
|
687
|
+
'max_repop': np.max(repop_tile.m),
|
|
688
|
+
'max_hab': np.max(hab_tile.m),
|
|
689
|
+
'obs_ratio': ratio,
|
|
690
|
+
'lat': lat,
|
|
691
|
+
'lng': lng,
|
|
692
|
+
'x': x,
|
|
693
|
+
'y': y,
|
|
694
|
+
}])])
|
|
695
|
+
# Now adds the weight column.
|
|
696
|
+
df['weight'] = df.apply(lambda row:
|
|
697
|
+
1 / count[(row.x // weighted_tile_size, row.y // weighted_tile_size)], axis=1)
|
|
698
|
+
return df
|
|
699
|
+
|
|
700
|
+
def get_repop_ratios(self, repop_tif, hab_tif, tile_scale=3, div_by_255=False):
|
|
701
|
+
"""
|
|
702
|
+
Takes as input a dataframe containing columns Square (and possibly other columns), and
|
|
703
|
+
adds to it columns for the total repopulation and amount of habitat.
|
|
704
|
+
:param repop_tif: repopulation geotiff
|
|
705
|
+
:param hab_tif: habitat geotiff used to compute repop
|
|
706
|
+
:param tile_scale: size of the tile around square
|
|
707
|
+
"""
|
|
708
|
+
df = pd.read_csv(self.obs_fn)
|
|
709
|
+
def f(row):
|
|
710
|
+
square = row["Square"]
|
|
711
|
+
if (isinstance(square, str)):
|
|
712
|
+
coords = format_coords(square)
|
|
713
|
+
else:
|
|
714
|
+
coords = square
|
|
715
|
+
lat, lng = coords
|
|
716
|
+
repop_pix_coords = transform_coords(repop_tif, coords)
|
|
717
|
+
hab_pix_coords = transform_coords(hab_tif, coords)
|
|
718
|
+
repop_tile = repop_tif.get_tile_from_coord(repop_pix_coords, tile_scale=tile_scale)
|
|
719
|
+
hab_tile = hab_tif.get_tile_from_coord(hab_pix_coords, tile_scale=tile_scale)
|
|
720
|
+
if repop_tile is None or hab_tile is None:
|
|
721
|
+
return pd.NA, pd.NA, pd.NA, pd.NA, lat, lng
|
|
722
|
+
avg_repop = np.average(repop_tile.m)
|
|
723
|
+
avg_hab = np.average(hab_tile.m)
|
|
724
|
+
max_repop = np.max(repop_tile.m)
|
|
725
|
+
max_hab = np.max(hab_tile.m)
|
|
726
|
+
avg_repop_in_hab = np.average(repop_tile.m * hab_tile.m > 0)
|
|
727
|
+
if div_by_255:
|
|
728
|
+
avg_repop /= 255.
|
|
729
|
+
max_repop /= 255.
|
|
730
|
+
avg_repop_in_hab /= 255.
|
|
731
|
+
return avg_repop, avg_hab, max_repop, max_hab, avg_repop_in_hab, lat, lng
|
|
732
|
+
df["avg_repop"], df["avg_hab"], df["max_repop"], df["max_hab"], df["avg_repop_in_hab"], df["lat"], df["lng"] = zip(*df.apply(f, axis=1))
|
|
733
|
+
return df
|
|
734
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ecoscape-utilities
|
|
3
|
+
Version: 0.0.43
|
|
4
|
+
Summary: A collection of EcoScape utilities.
|
|
5
|
+
Author-email: Luca de Alfaro <luca@ucsc.edu>, Coen Adler <ctadler@ucsc.edu>, Artie Nazarov <anazarov@ucsc.edu>, Natalia Ocampo-Peñuela <nocampop@ucsc.edu>, Jasmine Tai <cjtai@ucsc.edu>, Natalie Valett <nvalett@ucsc.edu>
|
|
6
|
+
Project-URL: Homepage, https://github.com/ecoscape-earth/ecoscape-utilities
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/ecoscape-earth/ecoscape-utilities/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE.md
|
|
14
|
+
Requires-Dist: pandas>=1.0.0
|
|
15
|
+
Requires-Dist: scgt>=0.0.20
|
|
16
|
+
Requires-Dist: pyproj>=3.0.0
|
|
17
|
+
Requires-Dist: numpy>=1.0.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# EcoScape Utilities
|
|
21
|
+
|
|
22
|
+
This package is simply a collection of utilities that are useful for running
|
|
23
|
+
Colab notebooks and other code.
|
|
24
|
+
These are not packages distributed with pip. To install on Colab, simply do:
|
|
25
|
+
|
|
26
|
+
!pip install git+https://github.com/ecoscape-earth/ecoscape-utilities.git
|
|
27
|
+
|
|
28
|
+
## Authors
|
|
29
|
+
|
|
30
|
+
* Luca de Alfaro (luca@ucsc.edu)
|
|
31
|
+
* Natalia Ocampo-Peñuela (nocampop@ucsc.edu)
|
|
32
|
+
* Coen Adler (ctadler@ucsc.edu)
|
|
33
|
+
* Artie Nazarov (anazarov@ucsc.edu)
|
|
34
|
+
* Natalie Valett (nvalett@ucsc.edu)
|
|
35
|
+
* Jasmine Tai (cjtai@ucsc.edu)
|
|
36
|
+
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
LICENSE.md
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
requirements.txt
|
|
5
|
+
ecoscape_utilities/__init__.py
|
|
6
|
+
ecoscape_utilities/bird_runs.py
|
|
7
|
+
ecoscape_utilities/ebird_db.py
|
|
8
|
+
ecoscape_utilities.egg-info/PKG-INFO
|
|
9
|
+
ecoscape_utilities.egg-info/SOURCES.txt
|
|
10
|
+
ecoscape_utilities.egg-info/dependency_links.txt
|
|
11
|
+
ecoscape_utilities.egg-info/requires.txt
|
|
12
|
+
ecoscape_utilities.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ecoscape_utilities
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ecoscape-utilities"
|
|
7
|
+
version = "0.0.43"
|
|
8
|
+
authors = [
|
|
9
|
+
{name="Luca de Alfaro", email="luca@ucsc.edu"},
|
|
10
|
+
{name="Coen Adler", email="ctadler@ucsc.edu"},
|
|
11
|
+
{name="Artie Nazarov", email="anazarov@ucsc.edu"},
|
|
12
|
+
{name="Natalia Ocampo-Peñuela", email="nocampop@ucsc.edu"},
|
|
13
|
+
{name="Jasmine Tai", email="cjtai@ucsc.edu"},
|
|
14
|
+
{name="Natalie Valett", email="nvalett@ucsc.edu"}
|
|
15
|
+
]
|
|
16
|
+
description = "A collection of EcoScape utilities."
|
|
17
|
+
readme = "README.md"
|
|
18
|
+
requires-python = ">=3.7"
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"License :: OSI Approved :: BSD License",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
]
|
|
24
|
+
dynamic = ["dependencies"]
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.dynamic]
|
|
27
|
+
dependencies = {file = ["requirements.txt"]}
|
|
28
|
+
|
|
29
|
+
[tool.setuptools]
|
|
30
|
+
packages = ["ecoscape_utilities"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
"Homepage" = "https://github.com/ecoscape-earth/ecoscape-utilities"
|
|
35
|
+
"Bug Tracker" = "https://github.com/ecoscape-earth/ecoscape-utilities/issues"
|