eodag 3.0.1__py3-none-any.whl → 3.1.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +164 -127
- eodag/api/product/_assets.py +11 -11
- eodag/api/product/_product.py +45 -30
- eodag/api/product/drivers/__init__.py +81 -4
- eodag/api/product/drivers/base.py +65 -4
- eodag/api/product/drivers/generic.py +65 -0
- eodag/api/product/drivers/sentinel1.py +97 -0
- eodag/api/product/drivers/sentinel2.py +95 -0
- eodag/api/product/metadata_mapping.py +101 -85
- eodag/api/search_result.py +13 -23
- eodag/cli.py +26 -5
- eodag/config.py +78 -81
- eodag/plugins/apis/base.py +1 -1
- eodag/plugins/apis/ecmwf.py +46 -22
- eodag/plugins/apis/usgs.py +16 -15
- eodag/plugins/authentication/aws_auth.py +16 -13
- eodag/plugins/authentication/base.py +5 -3
- eodag/plugins/authentication/header.py +3 -3
- eodag/plugins/authentication/keycloak.py +4 -4
- eodag/plugins/authentication/oauth.py +7 -3
- eodag/plugins/authentication/openid_connect.py +16 -16
- eodag/plugins/authentication/sas_auth.py +4 -4
- eodag/plugins/authentication/token.py +41 -10
- eodag/plugins/authentication/token_exchange.py +1 -1
- eodag/plugins/base.py +4 -4
- eodag/plugins/crunch/base.py +4 -4
- eodag/plugins/crunch/filter_date.py +4 -4
- eodag/plugins/crunch/filter_latest_intersect.py +6 -6
- eodag/plugins/crunch/filter_latest_tpl_name.py +7 -7
- eodag/plugins/crunch/filter_overlap.py +4 -4
- eodag/plugins/crunch/filter_property.py +6 -7
- eodag/plugins/download/aws.py +58 -78
- eodag/plugins/download/base.py +38 -56
- eodag/plugins/download/creodias_s3.py +29 -0
- eodag/plugins/download/http.py +173 -183
- eodag/plugins/download/s3rest.py +10 -11
- eodag/plugins/manager.py +10 -20
- eodag/plugins/search/__init__.py +6 -5
- eodag/plugins/search/base.py +87 -44
- eodag/plugins/search/build_search_result.py +1067 -329
- eodag/plugins/search/cop_marine.py +22 -12
- eodag/plugins/search/creodias_s3.py +9 -73
- eodag/plugins/search/csw.py +11 -11
- eodag/plugins/search/data_request_search.py +16 -15
- eodag/plugins/search/qssearch.py +103 -187
- eodag/plugins/search/stac_list_assets.py +85 -0
- eodag/plugins/search/static_stac_search.py +3 -3
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +663 -304
- eodag/resources/providers.yml +823 -1749
- eodag/resources/stac_api.yml +2 -2
- eodag/resources/user_conf_template.yml +11 -0
- eodag/rest/cache.py +2 -2
- eodag/rest/config.py +3 -3
- eodag/rest/core.py +112 -82
- eodag/rest/errors.py +5 -5
- eodag/rest/server.py +33 -14
- eodag/rest/stac.py +40 -38
- eodag/rest/types/collections_search.py +3 -3
- eodag/rest/types/eodag_search.py +29 -23
- eodag/rest/types/queryables.py +15 -16
- eodag/rest/types/stac_search.py +15 -25
- eodag/rest/utils/__init__.py +14 -21
- eodag/rest/utils/cql_evaluate.py +6 -6
- eodag/rest/utils/rfc3339.py +2 -2
- eodag/types/__init__.py +75 -28
- eodag/types/bbox.py +2 -2
- eodag/types/download_args.py +3 -3
- eodag/types/queryables.py +183 -72
- eodag/types/search_args.py +4 -4
- eodag/types/whoosh.py +127 -3
- eodag/utils/__init__.py +152 -50
- eodag/utils/exceptions.py +28 -21
- eodag/utils/import_system.py +2 -2
- eodag/utils/repr.py +65 -6
- eodag/utils/requests.py +13 -13
- eodag/utils/rest.py +2 -2
- eodag/utils/s3.py +208 -0
- eodag/utils/stac_reader.py +10 -10
- {eodag-3.0.1.dist-info → eodag-3.1.0b2.dist-info}/METADATA +77 -76
- eodag-3.1.0b2.dist-info/RECORD +113 -0
- {eodag-3.0.1.dist-info → eodag-3.1.0b2.dist-info}/WHEEL +1 -1
- {eodag-3.0.1.dist-info → eodag-3.1.0b2.dist-info}/entry_points.txt +4 -2
- eodag/utils/constraints.py +0 -244
- eodag-3.0.1.dist-info/RECORD +0 -109
- {eodag-3.0.1.dist-info → eodag-3.1.0b2.dist-info}/LICENSE +0 -0
- {eodag-3.0.1.dist-info → eodag-3.1.0b2.dist-info}/top_level.txt +0 -0
eodag/types/queryables.py
CHANGED
|
@@ -15,11 +15,20 @@
|
|
|
15
15
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
16
|
# See the License for the specific language governing permissions and
|
|
17
17
|
# limitations under the License.
|
|
18
|
-
from
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from collections import UserDict
|
|
21
|
+
from datetime import date, datetime
|
|
22
|
+
from typing import Annotated, Any, Optional, Union
|
|
19
23
|
|
|
20
24
|
from annotated_types import Lt
|
|
21
25
|
from pydantic import BaseModel, Field
|
|
26
|
+
from pydantic.fields import FieldInfo
|
|
22
27
|
from pydantic.types import PositiveInt
|
|
28
|
+
from pydantic_core import PydanticUndefined
|
|
29
|
+
|
|
30
|
+
from eodag.types import annotated_dict_to_model, model_fields_to_annotated
|
|
31
|
+
from eodag.utils.repr import remove_class_repr, shorter_type_repr
|
|
23
32
|
|
|
24
33
|
Percentage = Annotated[PositiveInt, Lt(100)]
|
|
25
34
|
|
|
@@ -28,17 +37,11 @@ class CommonQueryables(BaseModel):
|
|
|
28
37
|
"""A class representing search common queryable properties."""
|
|
29
38
|
|
|
30
39
|
productType: Annotated[str, Field()]
|
|
31
|
-
id: Annotated[Optional[str], Field(None)]
|
|
32
|
-
start: Annotated[Optional[str], Field(None, alias="startTimeFromAscendingNode")]
|
|
33
|
-
end: Annotated[Optional[str], Field(None, alias="completionTimeFromAscendingNode")]
|
|
34
|
-
geom: Annotated[Optional[str], Field(None, alias="geometry")]
|
|
35
40
|
|
|
36
41
|
@classmethod
|
|
37
42
|
def get_queryable_from_alias(cls, value: str) -> str:
|
|
38
43
|
"""Get queryable parameter from alias
|
|
39
44
|
|
|
40
|
-
>>> CommonQueryables.get_queryable_from_alias('startTimeFromAscendingNode')
|
|
41
|
-
'start'
|
|
42
45
|
>>> CommonQueryables.get_queryable_from_alias('productType')
|
|
43
46
|
'productType'
|
|
44
47
|
"""
|
|
@@ -49,75 +52,183 @@ class CommonQueryables(BaseModel):
|
|
|
49
52
|
}
|
|
50
53
|
return alias_map.get(value, value)
|
|
51
54
|
|
|
55
|
+
@classmethod
|
|
56
|
+
def get_with_default(
|
|
57
|
+
cls, field: str, default: Optional[Any]
|
|
58
|
+
) -> Annotated[Any, FieldInfo]:
|
|
59
|
+
"""Get field and set default value."""
|
|
60
|
+
annotated_fields = model_fields_to_annotated(cls.model_fields)
|
|
61
|
+
f = annotated_fields[field]
|
|
62
|
+
if default is None:
|
|
63
|
+
return f
|
|
64
|
+
f.__metadata__[0].default = default
|
|
65
|
+
return f
|
|
66
|
+
|
|
52
67
|
|
|
53
68
|
class Queryables(CommonQueryables):
|
|
54
|
-
"""A class representing all search queryable properties.
|
|
69
|
+
"""A class representing all search queryable properties.
|
|
55
70
|
|
|
56
|
-
|
|
71
|
+
Parameters default value is set to ``None`` to have them not required.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
start: Annotated[
|
|
75
|
+
Union[datetime, date], Field(None, alias="startTimeFromAscendingNode")
|
|
76
|
+
]
|
|
77
|
+
end: Annotated[
|
|
78
|
+
Union[datetime, date], Field(None, alias="completionTimeFromAscendingNode")
|
|
79
|
+
]
|
|
80
|
+
geom: Annotated[str, Field(None, alias="geometry")]
|
|
81
|
+
uid: Annotated[str, Field(None)]
|
|
57
82
|
# OpenSearch Parameters for Collection Search (Table 3)
|
|
58
|
-
doi: Annotated[
|
|
59
|
-
platform: Annotated[
|
|
60
|
-
platformSerialIdentifier: Annotated[
|
|
61
|
-
instrument: Annotated[
|
|
62
|
-
sensorType: Annotated[
|
|
63
|
-
compositeType: Annotated[
|
|
64
|
-
processingLevel: Annotated[
|
|
65
|
-
orbitType: Annotated[
|
|
66
|
-
spectralRange: Annotated[
|
|
67
|
-
wavelengths: Annotated[
|
|
68
|
-
hasSecurityConstraints: Annotated[
|
|
69
|
-
dissemination: Annotated[
|
|
83
|
+
doi: Annotated[str, Field(None)]
|
|
84
|
+
platform: Annotated[str, Field(None)]
|
|
85
|
+
platformSerialIdentifier: Annotated[str, Field(None)]
|
|
86
|
+
instrument: Annotated[str, Field(None)]
|
|
87
|
+
sensorType: Annotated[str, Field(None)]
|
|
88
|
+
compositeType: Annotated[str, Field(None)]
|
|
89
|
+
processingLevel: Annotated[str, Field(None)]
|
|
90
|
+
orbitType: Annotated[str, Field(None)]
|
|
91
|
+
spectralRange: Annotated[str, Field(None)]
|
|
92
|
+
wavelengths: Annotated[str, Field(None)]
|
|
93
|
+
hasSecurityConstraints: Annotated[str, Field(None)]
|
|
94
|
+
dissemination: Annotated[str, Field(None)]
|
|
70
95
|
# INSPIRE obligated OpenSearch Parameters for Collection Search (Table 4)
|
|
71
|
-
title: Annotated[
|
|
72
|
-
topicCategory: Annotated[
|
|
73
|
-
keyword: Annotated[
|
|
74
|
-
abstract: Annotated[
|
|
75
|
-
resolution: Annotated[
|
|
76
|
-
organisationName: Annotated[
|
|
77
|
-
organisationRole: Annotated[
|
|
78
|
-
publicationDate: Annotated[
|
|
79
|
-
lineage: Annotated[
|
|
80
|
-
useLimitation: Annotated[
|
|
81
|
-
accessConstraint: Annotated[
|
|
82
|
-
otherConstraint: Annotated[
|
|
83
|
-
classification: Annotated[
|
|
84
|
-
language: Annotated[
|
|
85
|
-
specification: Annotated[
|
|
96
|
+
title: Annotated[str, Field(None)]
|
|
97
|
+
topicCategory: Annotated[str, Field(None)]
|
|
98
|
+
keyword: Annotated[str, Field(None)]
|
|
99
|
+
abstract: Annotated[str, Field(None)]
|
|
100
|
+
resolution: Annotated[int, Field(None)]
|
|
101
|
+
organisationName: Annotated[str, Field(None)]
|
|
102
|
+
organisationRole: Annotated[str, Field(None)]
|
|
103
|
+
publicationDate: Annotated[str, Field(None)]
|
|
104
|
+
lineage: Annotated[str, Field(None)]
|
|
105
|
+
useLimitation: Annotated[str, Field(None)]
|
|
106
|
+
accessConstraint: Annotated[str, Field(None)]
|
|
107
|
+
otherConstraint: Annotated[str, Field(None)]
|
|
108
|
+
classification: Annotated[str, Field(None)]
|
|
109
|
+
language: Annotated[str, Field(None)]
|
|
110
|
+
specification: Annotated[str, Field(None)]
|
|
86
111
|
# OpenSearch Parameters for Product Search (Table 5)
|
|
87
|
-
parentIdentifier: Annotated[
|
|
88
|
-
productionStatus: Annotated[
|
|
89
|
-
acquisitionType: Annotated[
|
|
90
|
-
orbitNumber: Annotated[
|
|
91
|
-
orbitDirection: Annotated[
|
|
92
|
-
track: Annotated[
|
|
93
|
-
frame: Annotated[
|
|
94
|
-
swathIdentifier: Annotated[
|
|
95
|
-
cloudCover: Annotated[
|
|
96
|
-
snowCover: Annotated[
|
|
97
|
-
lowestLocation: Annotated[
|
|
98
|
-
highestLocation: Annotated[
|
|
99
|
-
productVersion: Annotated[
|
|
100
|
-
productQualityStatus: Annotated[
|
|
101
|
-
productQualityDegradationTag: Annotated[
|
|
102
|
-
processorName: Annotated[
|
|
103
|
-
processingCenter: Annotated[
|
|
104
|
-
creationDate: Annotated[
|
|
105
|
-
modificationDate: Annotated[
|
|
106
|
-
processingDate: Annotated[
|
|
107
|
-
sensorMode: Annotated[
|
|
108
|
-
archivingCenter: Annotated[
|
|
109
|
-
processingMode: Annotated[
|
|
112
|
+
parentIdentifier: Annotated[str, Field(None)]
|
|
113
|
+
productionStatus: Annotated[str, Field(None)]
|
|
114
|
+
acquisitionType: Annotated[str, Field(None)]
|
|
115
|
+
orbitNumber: Annotated[int, Field(None)]
|
|
116
|
+
orbitDirection: Annotated[str, Field(None)]
|
|
117
|
+
track: Annotated[str, Field(None)]
|
|
118
|
+
frame: Annotated[str, Field(None)]
|
|
119
|
+
swathIdentifier: Annotated[str, Field(None)]
|
|
120
|
+
cloudCover: Annotated[Percentage, Field(None)]
|
|
121
|
+
snowCover: Annotated[Percentage, Field(None)]
|
|
122
|
+
lowestLocation: Annotated[str, Field(None)]
|
|
123
|
+
highestLocation: Annotated[str, Field(None)]
|
|
124
|
+
productVersion: Annotated[str, Field(None)]
|
|
125
|
+
productQualityStatus: Annotated[str, Field(None)]
|
|
126
|
+
productQualityDegradationTag: Annotated[str, Field(None)]
|
|
127
|
+
processorName: Annotated[str, Field(None)]
|
|
128
|
+
processingCenter: Annotated[str, Field(None)]
|
|
129
|
+
creationDate: Annotated[str, Field(None)]
|
|
130
|
+
modificationDate: Annotated[str, Field(None)]
|
|
131
|
+
processingDate: Annotated[str, Field(None)]
|
|
132
|
+
sensorMode: Annotated[str, Field(None)]
|
|
133
|
+
archivingCenter: Annotated[str, Field(None)]
|
|
134
|
+
processingMode: Annotated[str, Field(None)]
|
|
110
135
|
# OpenSearch Parameters for Acquistion Parameters Search (Table 6)
|
|
111
|
-
availabilityTime: Annotated[
|
|
112
|
-
acquisitionStation: Annotated[
|
|
113
|
-
acquisitionSubType: Annotated[
|
|
114
|
-
illuminationAzimuthAngle: Annotated[
|
|
115
|
-
illuminationZenithAngle: Annotated[
|
|
116
|
-
illuminationElevationAngle: Annotated[
|
|
117
|
-
polarizationMode: Annotated[
|
|
118
|
-
polarizationChannels: Annotated[
|
|
119
|
-
antennaLookDirection: Annotated[
|
|
120
|
-
minimumIncidenceAngle: Annotated[
|
|
121
|
-
maximumIncidenceAngle: Annotated[
|
|
122
|
-
dopplerFrequency: Annotated[
|
|
123
|
-
incidenceAngleVariation: Annotated[
|
|
136
|
+
availabilityTime: Annotated[str, Field(None)]
|
|
137
|
+
acquisitionStation: Annotated[str, Field(None)]
|
|
138
|
+
acquisitionSubType: Annotated[str, Field(None)]
|
|
139
|
+
illuminationAzimuthAngle: Annotated[str, Field(None)]
|
|
140
|
+
illuminationZenithAngle: Annotated[str, Field(None)]
|
|
141
|
+
illuminationElevationAngle: Annotated[str, Field(None)]
|
|
142
|
+
polarizationMode: Annotated[str, Field(None)]
|
|
143
|
+
polarizationChannels: Annotated[str, Field(None)]
|
|
144
|
+
antennaLookDirection: Annotated[str, Field(None)]
|
|
145
|
+
minimumIncidenceAngle: Annotated[float, Field(None)]
|
|
146
|
+
maximumIncidenceAngle: Annotated[float, Field(None)]
|
|
147
|
+
dopplerFrequency: Annotated[float, Field(None)]
|
|
148
|
+
incidenceAngleVariation: Annotated[float, Field(None)]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class QueryablesDict(UserDict[str, Any]):
|
|
152
|
+
"""Class inheriting from UserDict which contains queryables with their annotated type;
|
|
153
|
+
|
|
154
|
+
:param additional_properties: if additional properties (properties not given in EODAG config)
|
|
155
|
+
are allowed
|
|
156
|
+
:param kwargs: named arguments to initialise the dict (queryable keys + annotated types)
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
additional_properties: bool = Field(True)
|
|
160
|
+
additional_information: str = Field("")
|
|
161
|
+
|
|
162
|
+
def __init__(
|
|
163
|
+
self,
|
|
164
|
+
additional_properties: bool = True,
|
|
165
|
+
additional_information: str = "",
|
|
166
|
+
**kwargs: Any,
|
|
167
|
+
):
|
|
168
|
+
self.additional_properties = additional_properties
|
|
169
|
+
self.additional_information = additional_information
|
|
170
|
+
super().__init__(kwargs)
|
|
171
|
+
|
|
172
|
+
def _repr_html_(self, embedded: bool = False) -> str:
|
|
173
|
+
add_info = (
|
|
174
|
+
f" additional_information={self.additional_information}"
|
|
175
|
+
if self.additional_information
|
|
176
|
+
else ""
|
|
177
|
+
)
|
|
178
|
+
thead = (
|
|
179
|
+
f"""<thead><tr><td style='text-align: left; color: grey;'>
|
|
180
|
+
{type(self).__name__} ({len(self)}) - additional_properties={
|
|
181
|
+
self.additional_properties}
|
|
182
|
+
"""
|
|
183
|
+
+ add_info
|
|
184
|
+
+ "</td></tr></thead>"
|
|
185
|
+
if not embedded
|
|
186
|
+
else ""
|
|
187
|
+
)
|
|
188
|
+
tr_style = "style='background-color: transparent;'" if embedded else ""
|
|
189
|
+
return (
|
|
190
|
+
f"<table>{thead}<tbody>"
|
|
191
|
+
+ "".join(
|
|
192
|
+
[
|
|
193
|
+
f"""<tr {tr_style}><td style='text-align: left;'>
|
|
194
|
+
<details><summary style='color: grey;'>
|
|
195
|
+
<span style='color: black'>'{k}'</span>: 
|
|
196
|
+
typing.Annotated[{
|
|
197
|
+
"<span style='color: black'>" + shorter_type_repr(v.__args__[0]) + "</span>, "
|
|
198
|
+
}
|
|
199
|
+
FieldInfo({"'default': '<span style='color: black'>"
|
|
200
|
+
+ str(v.__metadata__[0].get_default()) + "</span>', "
|
|
201
|
+
if v.__metadata__[0].get_default()
|
|
202
|
+
and v.__metadata__[0].get_default() != PydanticUndefined else ""}
|
|
203
|
+
{"'required': <span style='color: black'>"
|
|
204
|
+
+ str(v.__metadata__[0].is_required()) + "</span>,"}
|
|
205
|
+
...
|
|
206
|
+
)]
|
|
207
|
+
</summary>
|
|
208
|
+
<span style='color: grey'>typing.Annotated[</span><table style='margin: 0;'>
|
|
209
|
+
<tr style='background-color: transparent;'>
|
|
210
|
+
<td style='padding: 5px 0 0 10px; text-align: left; vertical-align:top;'>
|
|
211
|
+
{remove_class_repr(str(v.__args__[0]))},</td>
|
|
212
|
+
</tr><tr style='background-color: transparent;'>
|
|
213
|
+
<td style='padding: 5px 0 0 10px; text-align: left; vertical-align:top;'>
|
|
214
|
+
{v.__metadata__[0].__repr__()}</td>
|
|
215
|
+
</tr>
|
|
216
|
+
</table><span style='color: grey'>]</span>
|
|
217
|
+
</details>
|
|
218
|
+
</td></tr>
|
|
219
|
+
"""
|
|
220
|
+
for k, v in self.items()
|
|
221
|
+
]
|
|
222
|
+
)
|
|
223
|
+
+ "</tbody></table>"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def get_model(self, model_name: str = "Queryables") -> BaseModel:
|
|
227
|
+
"""
|
|
228
|
+
Converts object from :class:`eodag.api.product.QueryablesDict` to :class:`pydantic.BaseModel`
|
|
229
|
+
so that validation can be performed
|
|
230
|
+
|
|
231
|
+
:param model_name: name used for :class:`pydantic.BaseModel` creation
|
|
232
|
+
:return: pydantic BaseModel of the queryables dict
|
|
233
|
+
"""
|
|
234
|
+
return annotated_dict_to_model(model_name, self.data)
|
eodag/types/search_args.py
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# limitations under the License.
|
|
18
18
|
import re
|
|
19
19
|
from datetime import datetime
|
|
20
|
-
from typing import Annotated, Any,
|
|
20
|
+
from typing import Annotated, Any, Optional, Union, cast
|
|
21
21
|
|
|
22
22
|
from annotated_types import MinLen
|
|
23
23
|
from pydantic import BaseModel, ConfigDict, Field, conint, field_validator
|
|
@@ -31,10 +31,10 @@ from eodag.utils import DEFAULT_ITEMS_PER_PAGE, DEFAULT_PAGE
|
|
|
31
31
|
from eodag.utils.exceptions import ValidationError
|
|
32
32
|
|
|
33
33
|
NumType = Union[float, int]
|
|
34
|
-
GeomArgs = Union[
|
|
34
|
+
GeomArgs = Union[list[NumType], tuple[NumType], dict[str, NumType], str, BaseGeometry]
|
|
35
35
|
|
|
36
36
|
PositiveInt = conint(gt=0)
|
|
37
|
-
SortByList = Annotated[
|
|
37
|
+
SortByList = Annotated[list[tuple[str, str]], MinLen(1)]
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class SearchArgs(BaseModel):
|
|
@@ -48,7 +48,7 @@ class SearchArgs(BaseModel):
|
|
|
48
48
|
start: Optional[str] = Field(None)
|
|
49
49
|
end: Optional[str] = Field(None)
|
|
50
50
|
geom: Optional[BaseGeometry] = Field(None)
|
|
51
|
-
locations: Optional[
|
|
51
|
+
locations: Optional[dict[str, str]] = Field(None)
|
|
52
52
|
page: Optional[int] = Field(DEFAULT_PAGE, gt=0) # type: ignore
|
|
53
53
|
items_per_page: Optional[PositiveInt] = Field(DEFAULT_ITEMS_PER_PAGE) # type: ignore
|
|
54
54
|
sort_by: Optional[SortByList] = Field(None) # type: ignore
|
eodag/types/whoosh.py
CHANGED
|
@@ -15,13 +15,14 @@
|
|
|
15
15
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
16
|
# See the License for the specific language governing permissions and
|
|
17
17
|
# limitations under the License.
|
|
18
|
-
from typing import List
|
|
19
|
-
|
|
20
18
|
from whoosh.fields import Schema
|
|
19
|
+
from whoosh.index import _DEF_INDEX_NAME, FileIndex
|
|
21
20
|
from whoosh.matching import NullMatcher
|
|
22
21
|
from whoosh.qparser import OrGroup, QueryParser, plugins
|
|
23
22
|
from whoosh.query.positional import Phrase
|
|
24
23
|
from whoosh.query.qcore import QueryError
|
|
24
|
+
from whoosh.util.text import utf8encode
|
|
25
|
+
from whoosh.writing import SegmentWriter
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class RobustPhrase(Phrase):
|
|
@@ -49,7 +50,7 @@ class EODAGQueryParser(QueryParser):
|
|
|
49
50
|
|
|
50
51
|
def __init__(
|
|
51
52
|
self,
|
|
52
|
-
filters:
|
|
53
|
+
filters: list[str],
|
|
53
54
|
schema: Schema,
|
|
54
55
|
):
|
|
55
56
|
"""
|
|
@@ -77,3 +78,126 @@ class EODAGQueryParser(QueryParser):
|
|
|
77
78
|
phraseclass=RobustPhrase,
|
|
78
79
|
group=OrGroup,
|
|
79
80
|
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class CleanSegmentWriter(SegmentWriter):
|
|
84
|
+
"""Override to clean up writer for failed document add when exceptions were absorbed
|
|
85
|
+
cf: https://github.com/whoosh-community/whoosh/pull/543
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def add_document(self, **fields):
|
|
89
|
+
"""Add document"""
|
|
90
|
+
self._check_state()
|
|
91
|
+
perdocwriter = self.perdocwriter
|
|
92
|
+
schema = self.schema
|
|
93
|
+
docnum = self.docnum
|
|
94
|
+
add_post = self.pool.add
|
|
95
|
+
|
|
96
|
+
docboost = self._doc_boost(fields)
|
|
97
|
+
fieldnames = sorted(
|
|
98
|
+
[name for name in fields.keys() if not name.startswith("_")]
|
|
99
|
+
)
|
|
100
|
+
self._check_fields(schema, fieldnames)
|
|
101
|
+
|
|
102
|
+
perdocwriter.start_doc(docnum)
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
for fieldname in fieldnames:
|
|
106
|
+
value = fields.get(fieldname)
|
|
107
|
+
if value is None:
|
|
108
|
+
continue
|
|
109
|
+
field = schema[fieldname]
|
|
110
|
+
|
|
111
|
+
length = 0
|
|
112
|
+
if field.indexed:
|
|
113
|
+
# TODO: Method for adding progressive field values, ie
|
|
114
|
+
# setting start_pos/start_char?
|
|
115
|
+
fieldboost = self._field_boost(fields, fieldname, docboost)
|
|
116
|
+
# Ask the field to return a list of (text, weight, vbytes)
|
|
117
|
+
# tuples
|
|
118
|
+
items = field.index(value)
|
|
119
|
+
# Only store the length if the field is marked scorable
|
|
120
|
+
scorable = field.scorable
|
|
121
|
+
# Add the terms to the pool
|
|
122
|
+
for tbytes, freq, weight, vbytes in items:
|
|
123
|
+
weight *= fieldboost
|
|
124
|
+
if scorable:
|
|
125
|
+
length += freq
|
|
126
|
+
add_post((fieldname, tbytes, docnum, weight, vbytes))
|
|
127
|
+
|
|
128
|
+
if field.separate_spelling():
|
|
129
|
+
spellfield = field.spelling_fieldname(fieldname)
|
|
130
|
+
for word in field.spellable_words(value):
|
|
131
|
+
word = utf8encode(word)[0]
|
|
132
|
+
add_post((spellfield, word, 0, 1, vbytes))
|
|
133
|
+
|
|
134
|
+
vformat = field.vector
|
|
135
|
+
if vformat:
|
|
136
|
+
analyzer = field.analyzer
|
|
137
|
+
# Call the format's word_values method to get posting values
|
|
138
|
+
vitems = vformat.word_values(value, analyzer, mode="index")
|
|
139
|
+
# Remove unused frequency field from the tuple
|
|
140
|
+
vitems = sorted(
|
|
141
|
+
(text, weight, vbytes) for text, _, weight, vbytes in vitems
|
|
142
|
+
)
|
|
143
|
+
perdocwriter.add_vector_items(fieldname, field, vitems)
|
|
144
|
+
|
|
145
|
+
# Allow a custom value for stored field/column
|
|
146
|
+
customval = fields.get("_stored_%s" % fieldname, value)
|
|
147
|
+
|
|
148
|
+
# Add the stored value and length for this field to the per-
|
|
149
|
+
# document writer
|
|
150
|
+
sv = customval if field.stored else None
|
|
151
|
+
perdocwriter.add_field(fieldname, field, sv, length)
|
|
152
|
+
|
|
153
|
+
column = field.column_type
|
|
154
|
+
if column and customval is not None:
|
|
155
|
+
cv = field.to_column_value(customval)
|
|
156
|
+
perdocwriter.add_column_value(fieldname, column, cv)
|
|
157
|
+
except Exception as ex:
|
|
158
|
+
# cancel doc
|
|
159
|
+
perdocwriter._doccount -= 1
|
|
160
|
+
perdocwriter._indoc = False
|
|
161
|
+
raise ex
|
|
162
|
+
|
|
163
|
+
perdocwriter.finish_doc()
|
|
164
|
+
self._added = True
|
|
165
|
+
self.docnum += 1
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class CleanFileIndex(FileIndex):
|
|
169
|
+
"""Override to call CleanSegmentWriter"""
|
|
170
|
+
|
|
171
|
+
def writer(self, procs=1, **kwargs):
|
|
172
|
+
"""file index writer"""
|
|
173
|
+
if procs > 1:
|
|
174
|
+
from whoosh.multiproc import MpWriter
|
|
175
|
+
|
|
176
|
+
return MpWriter(self, procs=procs, **kwargs)
|
|
177
|
+
else:
|
|
178
|
+
return CleanSegmentWriter(self, **kwargs)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def create_in(dirname, schema, indexname=None):
|
|
182
|
+
"""
|
|
183
|
+
Override to call the CleanFileIndex.
|
|
184
|
+
|
|
185
|
+
Convenience function to create an index in a directory. Takes care of
|
|
186
|
+
creating a FileStorage object for you.
|
|
187
|
+
|
|
188
|
+
:param dirname: the path string of the directory in which to create the
|
|
189
|
+
index.
|
|
190
|
+
:param schema: a :class:`whoosh.fields.Schema` object describing the
|
|
191
|
+
index's fields.
|
|
192
|
+
:param indexname: the name of the index to create; you only need to specify
|
|
193
|
+
this if you are creating multiple indexes within the same storage
|
|
194
|
+
object.
|
|
195
|
+
:returns: :class:`Index`
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
from whoosh.filedb.filestore import FileStorage
|
|
199
|
+
|
|
200
|
+
if not indexname:
|
|
201
|
+
indexname = _DEF_INDEX_NAME
|
|
202
|
+
storage = FileStorage(dirname)
|
|
203
|
+
return CleanFileIndex.create(storage, schema, indexname)
|