eodag 3.7.0__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eodag/types/whoosh.py DELETED
@@ -1,203 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2024, CS Systemes d'Information, https://www.csgroup.eu/
3
- #
4
- # This file is part of EODAG project
5
- # https://www.github.com/CS-SI/EODAG
6
- #
7
- # Licensed under the Apache License, Version 2.0 (the "License");
8
- # you may not use this file except in compliance with the License.
9
- # You may obtain a copy of the License at
10
- #
11
- # http://www.apache.org/licenses/LICENSE-2.0
12
- #
13
- # Unless required by applicable law or agreed to in writing, software
14
- # distributed under the License is distributed on an "AS IS" BASIS,
15
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
- # See the License for the specific language governing permissions and
17
- # limitations under the License.
18
- from whoosh.fields import Schema
19
- from whoosh.index import _DEF_INDEX_NAME, FileIndex
20
- from whoosh.matching import NullMatcher
21
- from whoosh.qparser import OrGroup, QueryParser, plugins
22
- from whoosh.query.positional import Phrase
23
- from whoosh.query.qcore import QueryError
24
- from whoosh.util.text import utf8encode
25
- from whoosh.writing import SegmentWriter
26
-
27
-
28
- class RobustPhrase(Phrase):
29
- """
30
- Matches documents containing a given phrase.
31
- """
32
-
33
- def matcher(self, searcher, context=None):
34
- """
35
- Override the default to not raise error on match exception but simply return not found
36
- Needed to handle phrase search in whoosh.fields.IDLIST
37
- """
38
- try:
39
- return super().matcher(searcher, context)
40
- except QueryError:
41
- return NullMatcher()
42
-
43
-
44
- class EODAGQueryParser(QueryParser):
45
- """
46
- A hand-written query parser built on modular plug-ins.
47
-
48
- Override the default to include specific EODAG configuration
49
- """
50
-
51
- def __init__(
52
- self,
53
- filters: list[str],
54
- schema: Schema,
55
- ):
56
- """
57
- EODAG QueryParser initialization
58
-
59
- :param filters: list of fieldnames to filter on
60
- :param schema: Whoosh Schema
61
- """
62
- super().__init__(
63
- None,
64
- schema=schema,
65
- plugins=[
66
- plugins.SingleQuotePlugin(),
67
- plugins.FieldsPlugin(),
68
- plugins.WildcardPlugin(),
69
- plugins.PhrasePlugin(),
70
- plugins.GroupPlugin(),
71
- plugins.OperatorsPlugin(),
72
- plugins.BoostPlugin(),
73
- plugins.EveryPlugin(),
74
- plugins.RangePlugin(),
75
- plugins.PlusMinusPlugin(),
76
- plugins.MultifieldPlugin(filters, fieldboosts=None),
77
- ],
78
- phraseclass=RobustPhrase,
79
- group=OrGroup,
80
- )
81
-
82
-
83
- class CleanSegmentWriter(SegmentWriter):
84
- """Override to clean up writer for failed document add when exceptions were absorbed
85
- cf: https://github.com/whoosh-community/whoosh/pull/543
86
- """
87
-
88
- def add_document(self, **fields):
89
- """Add document"""
90
- self._check_state()
91
- perdocwriter = self.perdocwriter
92
- schema = self.schema
93
- docnum = self.docnum
94
- add_post = self.pool.add
95
-
96
- docboost = self._doc_boost(fields)
97
- fieldnames = sorted(
98
- [name for name in fields.keys() if not name.startswith("_")]
99
- )
100
- self._check_fields(schema, fieldnames)
101
-
102
- perdocwriter.start_doc(docnum)
103
-
104
- try:
105
- for fieldname in fieldnames:
106
- value = fields.get(fieldname)
107
- if value is None:
108
- continue
109
- field = schema[fieldname]
110
-
111
- length = 0
112
- if field.indexed:
113
- # TODO: Method for adding progressive field values, ie
114
- # setting start_pos/start_char?
115
- fieldboost = self._field_boost(fields, fieldname, docboost)
116
- # Ask the field to return a list of (text, weight, vbytes)
117
- # tuples
118
- items = field.index(value)
119
- # Only store the length if the field is marked scorable
120
- scorable = field.scorable
121
- # Add the terms to the pool
122
- for tbytes, freq, weight, vbytes in items:
123
- weight *= fieldboost
124
- if scorable:
125
- length += freq
126
- add_post((fieldname, tbytes, docnum, weight, vbytes))
127
-
128
- if field.separate_spelling():
129
- spellfield = field.spelling_fieldname(fieldname)
130
- for word in field.spellable_words(value):
131
- word = utf8encode(word)[0]
132
- add_post((spellfield, word, 0, 1, vbytes))
133
-
134
- vformat = field.vector
135
- if vformat:
136
- analyzer = field.analyzer
137
- # Call the format's word_values method to get posting values
138
- vitems = vformat.word_values(value, analyzer, mode="index")
139
- # Remove unused frequency field from the tuple
140
- vitems = sorted(
141
- (text, weight, vbytes) for text, _, weight, vbytes in vitems
142
- )
143
- perdocwriter.add_vector_items(fieldname, field, vitems)
144
-
145
- # Allow a custom value for stored field/column
146
- customval = fields.get("_stored_%s" % fieldname, value)
147
-
148
- # Add the stored value and length for this field to the per-
149
- # document writer
150
- sv = customval if field.stored else None
151
- perdocwriter.add_field(fieldname, field, sv, length)
152
-
153
- column = field.column_type
154
- if column and customval is not None:
155
- cv = field.to_column_value(customval)
156
- perdocwriter.add_column_value(fieldname, column, cv)
157
- except Exception as ex:
158
- # cancel doc
159
- perdocwriter._doccount -= 1
160
- perdocwriter._indoc = False
161
- raise ex
162
-
163
- perdocwriter.finish_doc()
164
- self._added = True
165
- self.docnum += 1
166
-
167
-
168
- class CleanFileIndex(FileIndex):
169
- """Override to call CleanSegmentWriter"""
170
-
171
- def writer(self, procs=1, **kwargs):
172
- """file index writer"""
173
- if procs > 1:
174
- from whoosh.multiproc import MpWriter
175
-
176
- return MpWriter(self, procs=procs, **kwargs)
177
- else:
178
- return CleanSegmentWriter(self, **kwargs)
179
-
180
-
181
- def create_in(dirname, schema, indexname=None):
182
- """
183
- Override to call the CleanFileIndex.
184
-
185
- Convenience function to create an index in a directory. Takes care of
186
- creating a FileStorage object for you.
187
-
188
- :param dirname: the path string of the directory in which to create the
189
- index.
190
- :param schema: a :class:`whoosh.fields.Schema` object describing the
191
- index's fields.
192
- :param indexname: the name of the index to create; you only need to specify
193
- this if you are creating multiple indexes within the same storage
194
- object.
195
- :returns: :class:`Index`
196
- """
197
-
198
- from whoosh.filedb.filestore import FileStorage
199
-
200
- if not indexname:
201
- indexname = _DEF_INDEX_NAME
202
- storage = FileStorage(dirname)
203
- return CleanFileIndex.create(storage, schema, indexname)
File without changes