eodag 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +110 -189
- eodag/api/product/metadata_mapping.py +42 -3
- eodag/cli.py +6 -3
- eodag/config.py +7 -1
- eodag/plugins/authentication/openid_connect.py +1 -2
- eodag/plugins/download/aws.py +145 -178
- eodag/plugins/download/base.py +3 -2
- eodag/plugins/download/creodias_s3.py +10 -5
- eodag/plugins/download/http.py +14 -6
- eodag/plugins/download/s3rest.py +7 -3
- eodag/plugins/manager.py +1 -1
- eodag/plugins/search/base.py +34 -4
- eodag/plugins/search/build_search_result.py +3 -0
- eodag/plugins/search/cop_marine.py +2 -0
- eodag/plugins/search/data_request_search.py +6 -1
- eodag/plugins/search/qssearch.py +64 -25
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +30 -171
- eodag/resources/providers.yml +87 -328
- eodag/resources/stac.yml +1 -2
- eodag/resources/stac_provider.yml +1 -1
- eodag/resources/user_conf_template.yml +0 -11
- eodag/rest/core.py +5 -16
- eodag/rest/stac.py +0 -4
- eodag/utils/__init__.py +41 -27
- eodag/utils/exceptions.py +4 -0
- eodag/utils/free_text_search.py +229 -0
- eodag/utils/s3.py +605 -65
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/METADATA +7 -9
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/RECORD +34 -34
- eodag/types/whoosh.py +0 -203
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/WHEEL +0 -0
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/entry_points.txt +0 -0
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/licenses/LICENSE +0 -0
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/top_level.txt +0 -0
eodag/rest/core.py
CHANGED
|
@@ -23,6 +23,7 @@ import os
|
|
|
23
23
|
import re
|
|
24
24
|
from typing import TYPE_CHECKING, cast
|
|
25
25
|
from unittest.mock import Mock
|
|
26
|
+
from urllib.parse import urlencode
|
|
26
27
|
|
|
27
28
|
import dateutil
|
|
28
29
|
from cachetools.func import lru_cache
|
|
@@ -67,8 +68,6 @@ from eodag.utils import (
|
|
|
67
68
|
deepcopy,
|
|
68
69
|
dict_items_recursive_apply,
|
|
69
70
|
format_dict_items,
|
|
70
|
-
obj_md5sum,
|
|
71
|
-
urlencode,
|
|
72
71
|
)
|
|
73
72
|
from eodag.utils.exceptions import (
|
|
74
73
|
MisconfiguredError,
|
|
@@ -123,7 +122,7 @@ def format_product_types(product_types: list[dict[str, Any]]) -> str:
|
|
|
123
122
|
"""
|
|
124
123
|
result: list[str] = []
|
|
125
124
|
for pt in product_types:
|
|
126
|
-
result.append(f
|
|
125
|
+
result.append(f"* *__{pt['ID']}__*: {pt['abstract']}")
|
|
127
126
|
return "\n".join(sorted(result))
|
|
128
127
|
|
|
129
128
|
|
|
@@ -266,11 +265,7 @@ def download_stac_item(
|
|
|
266
265
|
_order_and_update(product, auth, kwargs)
|
|
267
266
|
|
|
268
267
|
download_stream = product.downloader._stream_download_dict(
|
|
269
|
-
product,
|
|
270
|
-
auth=auth,
|
|
271
|
-
asset=asset,
|
|
272
|
-
wait=-1,
|
|
273
|
-
timeout=-1,
|
|
268
|
+
product, auth=auth, asset=asset, wait=-1, timeout=-1
|
|
274
269
|
)
|
|
275
270
|
except NotImplementedError:
|
|
276
271
|
logger.warning(
|
|
@@ -686,7 +681,7 @@ def crunch_products(
|
|
|
686
681
|
cruncher = crunchers.get(cruncher_name)
|
|
687
682
|
if not cruncher:
|
|
688
683
|
raise ValidationError(
|
|
689
|
-
f
|
|
684
|
+
f"Unknown crunch name. Use one of: {', '.join(crunchers.keys())}"
|
|
690
685
|
)
|
|
691
686
|
|
|
692
687
|
cruncher_config: dict[str, Any] = {}
|
|
@@ -696,7 +691,7 @@ def crunch_products(
|
|
|
696
691
|
raise ValidationError(
|
|
697
692
|
(
|
|
698
693
|
f"cruncher {cruncher} require additional parameters:"
|
|
699
|
-
f
|
|
694
|
+
f" {', '.join(cruncher.config_params)}"
|
|
700
695
|
)
|
|
701
696
|
)
|
|
702
697
|
cruncher_config[config_param] = config_param_value
|
|
@@ -756,12 +751,6 @@ def eodag_api_init() -> None:
|
|
|
756
751
|
clean = {k: v for k, v in update_fields.items() if v}
|
|
757
752
|
p_f.update(clean)
|
|
758
753
|
|
|
759
|
-
eodag_api.product_types_config_md5 = obj_md5sum(
|
|
760
|
-
eodag_api.product_types_config.source
|
|
761
|
-
)
|
|
762
|
-
|
|
763
|
-
eodag_api.build_index()
|
|
764
|
-
|
|
765
754
|
# pre-build search plugins
|
|
766
755
|
for provider in eodag_api.available_providers():
|
|
767
756
|
next(eodag_api._plugins_manager.get_search_plugins(provider=provider))
|
eodag/rest/stac.py
CHANGED
|
@@ -790,10 +790,6 @@ class StacCollection(StacCommon):
|
|
|
790
790
|
f"Product type keywords: {str(product_type_collection['keywords'])}",
|
|
791
791
|
)
|
|
792
792
|
|
|
793
|
-
# merge providers
|
|
794
|
-
if "providers" in ext_stac_collection:
|
|
795
|
-
ext_stac_collection["providers"] += product_type_collection["providers"]
|
|
796
|
-
|
|
797
793
|
product_type_collection.update(ext_stac_collection)
|
|
798
794
|
|
|
799
795
|
# parse f-strings
|
eodag/utils/__init__.py
CHANGED
|
@@ -36,6 +36,7 @@ import re
|
|
|
36
36
|
import shutil
|
|
37
37
|
import ssl
|
|
38
38
|
import string
|
|
39
|
+
import struct
|
|
39
40
|
import sys
|
|
40
41
|
import types
|
|
41
42
|
import unicodedata
|
|
@@ -61,18 +62,7 @@ from typing import (
|
|
|
61
62
|
Union,
|
|
62
63
|
cast,
|
|
63
64
|
)
|
|
64
|
-
|
|
65
|
-
# All modules using these should import them from utils package
|
|
66
|
-
from urllib.parse import ( # noqa; noqa
|
|
67
|
-
parse_qs,
|
|
68
|
-
parse_qsl,
|
|
69
|
-
quote,
|
|
70
|
-
unquote,
|
|
71
|
-
urlencode,
|
|
72
|
-
urljoin,
|
|
73
|
-
urlparse,
|
|
74
|
-
urlsplit,
|
|
75
|
-
)
|
|
65
|
+
from urllib.parse import urlparse, urlsplit
|
|
76
66
|
from urllib.request import url2pathname
|
|
77
67
|
|
|
78
68
|
if sys.version_info >= (3, 12):
|
|
@@ -80,7 +70,6 @@ if sys.version_info >= (3, 12):
|
|
|
80
70
|
else:
|
|
81
71
|
from typing_extensions import Unpack # noqa
|
|
82
72
|
|
|
83
|
-
|
|
84
73
|
import click
|
|
85
74
|
import orjson
|
|
86
75
|
import shapefile
|
|
@@ -224,14 +213,13 @@ class FloatRange(click.types.FloatParamType):
|
|
|
224
213
|
):
|
|
225
214
|
if self.min is None:
|
|
226
215
|
self.fail(
|
|
227
|
-
"%s is bigger than the maximum valid value
|
|
216
|
+
"%s is bigger than the maximum valid value %s." % (rv, self.max),
|
|
228
217
|
param,
|
|
229
218
|
ctx,
|
|
230
219
|
)
|
|
231
220
|
elif self.max is None:
|
|
232
221
|
self.fail(
|
|
233
|
-
"%s is smaller than the minimum valid value "
|
|
234
|
-
"%s." % (rv, self.min),
|
|
222
|
+
"%s is smaller than the minimum valid value %s." % (rv, self.min),
|
|
235
223
|
param,
|
|
236
224
|
ctx,
|
|
237
225
|
)
|
|
@@ -387,27 +375,29 @@ def merge_mappings(mapping1: dict[Any, Any], mapping2: dict[Any, Any]) -> None:
|
|
|
387
375
|
# `m1_keys_lowercase.get(key, key)`
|
|
388
376
|
current_value = mapping1.get(m1_keys_lowercase.get(key, key))
|
|
389
377
|
if current_value is not None:
|
|
390
|
-
current_value_type = type(current_value)
|
|
391
|
-
new_value_type = type(value)
|
|
392
378
|
try:
|
|
393
379
|
# If current or new value is a list (search queryable parameter), simply replace current with new
|
|
394
380
|
if (
|
|
395
|
-
|
|
396
|
-
and
|
|
397
|
-
or
|
|
398
|
-
and
|
|
381
|
+
isinstance(value, list)
|
|
382
|
+
and not isinstance(current_value, list)
|
|
383
|
+
or not isinstance(value, list)
|
|
384
|
+
and isinstance(current_value, list)
|
|
399
385
|
):
|
|
400
386
|
mapping1[m1_keys_lowercase.get(key, key)] = value
|
|
401
387
|
else:
|
|
402
388
|
mapping1[m1_keys_lowercase.get(key, key)] = cast_scalar_value(
|
|
403
|
-
value,
|
|
389
|
+
value, type(current_value)
|
|
404
390
|
)
|
|
405
391
|
except (TypeError, ValueError):
|
|
406
392
|
# Ignore any override value that does not have the same type
|
|
407
393
|
# as the default value
|
|
408
394
|
logger.debug(
|
|
409
|
-
|
|
410
|
-
|
|
395
|
+
"Ignored '%s' setting override from '%s' to '%s', (could not cast %s to %s)",
|
|
396
|
+
key,
|
|
397
|
+
current_value,
|
|
398
|
+
value,
|
|
399
|
+
type(value),
|
|
400
|
+
type(current_value),
|
|
411
401
|
)
|
|
412
402
|
pass
|
|
413
403
|
else:
|
|
@@ -1451,8 +1441,7 @@ def cast_scalar_value(value: Any, new_type: Any) -> Any:
|
|
|
1451
1441
|
# case
|
|
1452
1442
|
if value.capitalize() not in ("True", "False"):
|
|
1453
1443
|
raise ValueError(
|
|
1454
|
-
"Only true or false strings (case insensitive) are "
|
|
1455
|
-
"allowed for booleans"
|
|
1444
|
+
"Only true or false strings (case insensitive) are allowed for booleans"
|
|
1456
1445
|
)
|
|
1457
1446
|
# Get the real Python value of the boolean. e.g: value='tRuE'
|
|
1458
1447
|
# => eval(value.capitalize())=True.
|
|
@@ -1505,6 +1494,7 @@ def guess_extension(type: str) -> Optional[str]:
|
|
|
1505
1494
|
return mimetypes.guess_extension(type, strict=False)
|
|
1506
1495
|
|
|
1507
1496
|
|
|
1497
|
+
@functools.lru_cache(maxsize=2)
|
|
1508
1498
|
def get_ssl_context(ssl_verify: bool) -> ssl.SSLContext:
|
|
1509
1499
|
"""
|
|
1510
1500
|
Returns an SSL context based on ``ssl_verify`` argument.
|
|
@@ -1572,3 +1562,27 @@ def remove_str_array_quotes(input_str: str) -> str:
|
|
|
1572
1562
|
continue
|
|
1573
1563
|
output_str += input_str[i]
|
|
1574
1564
|
return output_str
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
def parse_le_uint32(data: bytes) -> int:
|
|
1568
|
+
"""
|
|
1569
|
+
Parse little-endian unsigned 4-byte integer.
|
|
1570
|
+
|
|
1571
|
+
>>> parse_le_uint32(b'\\x01\\x00\\x00\\x00')
|
|
1572
|
+
1
|
|
1573
|
+
>>> parse_le_uint32(b'\\xff\\xff\\xff\\xff')
|
|
1574
|
+
4294967295
|
|
1575
|
+
"""
|
|
1576
|
+
return struct.unpack("<I", data)[0]
|
|
1577
|
+
|
|
1578
|
+
|
|
1579
|
+
def parse_le_uint16(data: bytes) -> int:
|
|
1580
|
+
"""
|
|
1581
|
+
Parse little-endian unsigned 2-byte integer.
|
|
1582
|
+
|
|
1583
|
+
>>> parse_le_uint16(b'\\x01\\x00')
|
|
1584
|
+
1
|
|
1585
|
+
>>> parse_le_uint16(b'\\xff\\xff')
|
|
1586
|
+
65535
|
|
1587
|
+
"""
|
|
1588
|
+
return struct.unpack("<H", data)[0]
|
eodag/utils/exceptions.py
CHANGED
|
@@ -79,6 +79,10 @@ class STACOpenerError(EodagError):
|
|
|
79
79
|
"""An error indicating that a STAC file could not be opened"""
|
|
80
80
|
|
|
81
81
|
|
|
82
|
+
class InvalidDataError(EodagError):
|
|
83
|
+
"""Raised when data is invalid, malformed, or corrupt and cannot be processed as expected."""
|
|
84
|
+
|
|
85
|
+
|
|
82
86
|
class RequestError(EodagError):
|
|
83
87
|
"""An error indicating that a request has failed. Usually eodag functions
|
|
84
88
|
and methods should catch and skip this"""
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2025, CS GROUP - France, https://www.csgroup.eu/
|
|
3
|
+
#
|
|
4
|
+
# This file is part of EODAG project
|
|
5
|
+
# https://www.github.com/CS-SI/EODAG
|
|
6
|
+
#
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
import re
|
|
19
|
+
from typing import Callable
|
|
20
|
+
|
|
21
|
+
from eodag.utils.exceptions import ValidationError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _tokenize(expr: str) -> list[str]:
|
|
25
|
+
"""
|
|
26
|
+
Tokenizes a search expression into words, logical operators, and quoted phrases.
|
|
27
|
+
|
|
28
|
+
Handles:
|
|
29
|
+
- Logical operators: AND, OR, NOT
|
|
30
|
+
- Quoted phrases: "exact phrase"
|
|
31
|
+
- Wildcards: * and ? inside words
|
|
32
|
+
- Parentheses: (, )
|
|
33
|
+
|
|
34
|
+
:param expr: The search string (e.g., '("foo" OR bar) AND baz')
|
|
35
|
+
:return: A list of tokens (e.g., ['(', '"foo"', 'OR', 'BAR', ')', 'AND', 'BAZ'])
|
|
36
|
+
|
|
37
|
+
>>> _tokenize('("foo* bar?" OR baz) AND qux')
|
|
38
|
+
['(', '"foo* bar?"', 'OR', 'BAZ', ')', 'AND', 'QUX']
|
|
39
|
+
"""
|
|
40
|
+
# Match quoted phrases or unquoted tokens (including * and ?), or parentheses
|
|
41
|
+
pattern = r'"[^"]*"|AND|OR|NOT|\(|\)|[^\s()"]+'
|
|
42
|
+
raw_tokens = re.findall(pattern, expr)
|
|
43
|
+
|
|
44
|
+
tokens = []
|
|
45
|
+
for token in raw_tokens:
|
|
46
|
+
if token.startswith('"') and token.endswith('"'):
|
|
47
|
+
tokens.append(token)
|
|
48
|
+
elif token.upper() in {"AND", "OR", "NOT"}:
|
|
49
|
+
tokens.append(token.upper())
|
|
50
|
+
else:
|
|
51
|
+
tokens.append(token.upper())
|
|
52
|
+
return tokens
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _to_postfix(tokens: list[str]) -> list[str]:
|
|
56
|
+
"""
|
|
57
|
+
Converts infix tokens to postfix (Reverse Polish Notation) using the Shunting Yard algorithm.
|
|
58
|
+
|
|
59
|
+
:param tokens: List of tokens in infix order.
|
|
60
|
+
:return: List of tokens in postfix order.
|
|
61
|
+
|
|
62
|
+
:raises ValidationError: If parentheses are unbalanced.
|
|
63
|
+
|
|
64
|
+
>>> _to_postfix(['FOO', 'AND', '(', 'BAR', 'OR', 'BAZ', ')'])
|
|
65
|
+
['FOO', 'BAR', 'BAZ', 'OR', 'AND']
|
|
66
|
+
|
|
67
|
+
>>> _to_postfix(['(', 'FOO', 'AND', 'BAR'])
|
|
68
|
+
Traceback (most recent call last):
|
|
69
|
+
...
|
|
70
|
+
eodag.utils.exceptions.ValidationError: Mismatched parentheses in expression
|
|
71
|
+
"""
|
|
72
|
+
precedence = {"NOT": 3, "AND": 2, "OR": 1}
|
|
73
|
+
output: list[str] = []
|
|
74
|
+
stack: list[str] = []
|
|
75
|
+
|
|
76
|
+
for token in tokens:
|
|
77
|
+
if token in precedence:
|
|
78
|
+
while (
|
|
79
|
+
stack
|
|
80
|
+
and stack[-1] != "("
|
|
81
|
+
and precedence.get(stack[-1], 0) >= precedence[token]
|
|
82
|
+
):
|
|
83
|
+
output.append(stack.pop())
|
|
84
|
+
stack.append(token)
|
|
85
|
+
elif token == "(":
|
|
86
|
+
stack.append(token)
|
|
87
|
+
elif token == ")":
|
|
88
|
+
while stack and stack[-1] != "(":
|
|
89
|
+
output.append(stack.pop())
|
|
90
|
+
if not stack:
|
|
91
|
+
raise ValidationError("Mismatched parentheses in expression")
|
|
92
|
+
# Remove '('
|
|
93
|
+
stack.pop()
|
|
94
|
+
else:
|
|
95
|
+
output.append(token)
|
|
96
|
+
|
|
97
|
+
while stack:
|
|
98
|
+
if stack[-1] == "(":
|
|
99
|
+
raise ValidationError("Mismatched parentheses in expression")
|
|
100
|
+
output.append(stack.pop())
|
|
101
|
+
|
|
102
|
+
return output
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _make_evaluator(postfix_expr: list[str]) -> Callable[[dict[str, str]], bool]:
|
|
106
|
+
"""
|
|
107
|
+
Returns a function that evaluates a postfix expression on a dictionary of string fields.
|
|
108
|
+
|
|
109
|
+
Quoted phrases are matched exactly (case-insensitive).
|
|
110
|
+
Unquoted tokens are matched as case-insensitive full words (unless they contain wildcards).
|
|
111
|
+
|
|
112
|
+
:param postfix_expr: List of tokens in postfix order.
|
|
113
|
+
:return: A function that returns True if the dict matches.
|
|
114
|
+
|
|
115
|
+
>>> evaluator = _make_evaluator(['FOO', 'BAR', 'OR'])
|
|
116
|
+
>>> evaluator({'title': 'some foo text'})
|
|
117
|
+
True
|
|
118
|
+
>>> evaluator({'title': 'some bar text'})
|
|
119
|
+
True
|
|
120
|
+
>>> evaluator({'title': 'nothing'})
|
|
121
|
+
False
|
|
122
|
+
>>> evaluator2 = _make_evaluator(['"foo text"', 'NOT'])
|
|
123
|
+
>>> evaluator2({'title': 'some foo text'})
|
|
124
|
+
False
|
|
125
|
+
>>> evaluator2({'title': 'some bar'})
|
|
126
|
+
True
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def evaluate(entry: dict[str, str]) -> bool:
|
|
130
|
+
stack: list[bool] = []
|
|
131
|
+
text = " ".join(str(v) for v in entry.values()).lower()
|
|
132
|
+
|
|
133
|
+
for token in postfix_expr:
|
|
134
|
+
if token == "AND":
|
|
135
|
+
b, a = stack.pop(), stack.pop()
|
|
136
|
+
stack.append(a and b)
|
|
137
|
+
elif token == "OR":
|
|
138
|
+
b, a = stack.pop(), stack.pop()
|
|
139
|
+
stack.append(a or b)
|
|
140
|
+
elif token == "NOT":
|
|
141
|
+
a = stack.pop()
|
|
142
|
+
stack.append(not a)
|
|
143
|
+
else:
|
|
144
|
+
if token.startswith('"') and token.endswith('"'):
|
|
145
|
+
phrase = token[1:-1].lower()
|
|
146
|
+
stack.append(phrase in text)
|
|
147
|
+
else:
|
|
148
|
+
# Wildcard tokens → regex with .* and .
|
|
149
|
+
if "*" in token or "?" in token:
|
|
150
|
+
wildcard_pattern = (
|
|
151
|
+
re.escape(token.lower())
|
|
152
|
+
.replace(r"\*", ".*")
|
|
153
|
+
.replace(r"\?", ".")
|
|
154
|
+
)
|
|
155
|
+
regex = re.compile(wildcard_pattern, flags=re.IGNORECASE)
|
|
156
|
+
else:
|
|
157
|
+
# Plain token → must match as a whole word
|
|
158
|
+
regex = re.compile(
|
|
159
|
+
rf"\b{re.escape(token.lower())}\b", flags=re.IGNORECASE
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
stack.append(bool(regex.search(text)))
|
|
163
|
+
|
|
164
|
+
return stack[0]
|
|
165
|
+
|
|
166
|
+
return evaluate
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def compile_free_text_query(query: str) -> Callable[[dict[str, str]], bool]:
|
|
170
|
+
"""
|
|
171
|
+
Compiles a free-text logical search query into a dictionary evaluator function.
|
|
172
|
+
|
|
173
|
+
The evaluator checks whether the concatenated string values of a dictionary
|
|
174
|
+
(case-insensitive) satisfy the given logical expression.
|
|
175
|
+
|
|
176
|
+
Processing steps:
|
|
177
|
+
1. Tokenize the query into words, quoted phrases, wildcards, and operators.
|
|
178
|
+
2. Convert infix tokens into postfix notation using the Shunting Yard algorithm.
|
|
179
|
+
3. Build an evaluator function that applies the expression to dictionary fields.
|
|
180
|
+
|
|
181
|
+
Supported features:
|
|
182
|
+
- Logical operators: ``AND``, ``OR``, ``NOT``
|
|
183
|
+
- Grouping with parentheses: ``(``, ``)``
|
|
184
|
+
- Exact phrases in quotes: ``"foo bar"`` (case-insensitive substring match)
|
|
185
|
+
- Wildcards inside tokens:
|
|
186
|
+
- ``*`` → matches zero or more characters
|
|
187
|
+
- ``?`` → matches exactly one character
|
|
188
|
+
- Plain tokens without wildcards → matched as whole words (word boundary aware)
|
|
189
|
+
- Case-insensitive matching across all tokens and phrases
|
|
190
|
+
|
|
191
|
+
:param query: A logical search expression
|
|
192
|
+
(e.g., ``'("foo bar" OR baz*) AND NOT qux'``).
|
|
193
|
+
:return: A function that takes a ``dict[str, str]`` and returns ``True`` if it matches.
|
|
194
|
+
|
|
195
|
+
:Example:
|
|
196
|
+
|
|
197
|
+
>>> evaluator = compile_free_text_query('("FooAndBar" OR BAR) AND "FOOBAR collection"')
|
|
198
|
+
>>> evaluator({
|
|
199
|
+
... "title": "titleFOOBAR - Lorem FOOBAR collection",
|
|
200
|
+
... "abstract": "abstract FOOBAR - This is FOOBAR. FooAndBar"
|
|
201
|
+
... })
|
|
202
|
+
True
|
|
203
|
+
>>> evaluator({
|
|
204
|
+
... "title": "collection FOOBAR",
|
|
205
|
+
... "abstract": "abstract FOOBAR - This is FOOBAR. FooAndBar"
|
|
206
|
+
... })
|
|
207
|
+
False
|
|
208
|
+
>>> evaluator({
|
|
209
|
+
... "title": "titleFOOBAR - Lorem FOOBAR ",
|
|
210
|
+
... "abstract": "abstract FOOBAR - This is FOOBAR."
|
|
211
|
+
... })
|
|
212
|
+
False
|
|
213
|
+
>>> evaluator({"title": "Only Bar here"})
|
|
214
|
+
False
|
|
215
|
+
|
|
216
|
+
Wildcard example:
|
|
217
|
+
|
|
218
|
+
>>> evaluator = compile_free_text_query('foo*')
|
|
219
|
+
>>> evaluator({"title": "this is foobar"})
|
|
220
|
+
True
|
|
221
|
+
>>> evaluator({"title": "something with fooo"})
|
|
222
|
+
True
|
|
223
|
+
>>> evaluator({"title": "bar only"})
|
|
224
|
+
False
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
tokens = _tokenize(query)
|
|
228
|
+
postfix = _to_postfix(tokens)
|
|
229
|
+
return _make_evaluator(postfix)
|